python3解析XML文件

/

软硬件环境

  • Ubuntu 15.10 32bit
  • Python 3.5.1
  • PyQt 5.5.1

前言

Python解析XML的方法挺多,本文主要是利用ElementTree来完成。

实例讲解

解析XML

以如下的XML文件为例

  1. <root>
  2. <version>1.0.04</version>
  3. <mysqlhost>10.10.10.240</mysqlhost>
  4. <mysqlport>3306</mysqlport>
  5. <mysqluser>root</mysqluser>
  6. <mysqlpassword>123456</mysqlpassword>
  7. <mysqldatabase>longjingdb</mysqldatabase>
  8. <mysqltable>mac</mysqltable>
  9. <mysqlstbtype>L6000</mysqlstbtype>
  10. <irdetokeytype>1</irdetokeytype>
  11. <printerhost>192.168.1.51</printerhost>
  12. <printerport>4001</printerport>
  13. </root>

编写了一个类来解析,用一个字典来存放

  1. class SYSXMLParser(object):
  2. def __init__(self,file):
  3. self.xmlFile = file
  4. self.sysXMLDict = {}
  5. def getSysXMLDict(self):
  6. tree = ET.parse(self.xmlFile)
  7. root = tree.getroot()
  8. for child in root.getchildren():
  9. self.sysXMLDict[child.tag] = child.attrib
  10. self.sysXMLDict[child.tag] = child.text
  11. return self.sysXMLDict

执行后打印的结果如下

  1. {'mysqlstbtype': 'L6000', 'mysqlpassword': '123456', 'version': '1.0.04', 'printerhost': '192.168.1.51', 'printerport': '4001', 'mysqltable': 'mac', 'mysqldatabase': 'longjingdb', 'mysqlport': '3306', 'mysqluser': 'root', 'mysqlhost': '10.10.10.240', 'irdetokeytype': '1'}

创建XML

手头上刚好有个实例,提供一个文件夹,里面是一些烧录镜像文件,针对这个镜像文件夹,需要生成一个XML文件,XML指定各个镜像文件的名字、对应烧录的地址、还有镜像的路径和md5值。

  1. # -*- coding: utf-8 -*-
  2. __author__ = '[email protected]'
  3. import os
  4. import sys
  5. import xml.etree.ElementTree as ET
  6. from common.constant import *
  7. from checksum.md5 import *
  8. FirstRoundImages = {'pmp.toc':PMP_ADDRESS,'secboot.toc':SECBOOT_ADDRESS,'secos.toc':SECOS_ADDRESS,'secosbak.toc':SECOS_BACK_ADDRESS,
  9. 'u-boot.toc':UBOOT_ADDRESS,'u-bootbak.toc':UBOOT_BACK_ADDRESS,'splash.dat':SPLASH_ADDRESS}
  10. SecondRoundImages = {'factorytest.img':FACTORYTEST_ADDRESS,'boot.img':BOOT_ADDRESS,'system.img':SYSTEM_ADDRESS,'dvbdata.img':DVBDATA_ADDRESS,
  11. 'userdata.img':USERDATA_ADDRESS,'cache.img':CACHE_ADDRESS,'otaloader.img':OTALOADER_ADDRESS,'iploader.img':IPLOADER_ADDRESS,
  12. 'recovery.img':RECOVERY_ADDRESS}
  13. class GenerateConfigXML(object):
  14. firstRoundImageDict = {}
  15. secondRoundImageDict = {}
  16. def __init__(self,path):
  17. self.path = path
  18. def buildConfigXML(self):
  19. '''
  20. :param path: images dir
  21. :return:
  22. '''
  23. self.listDir(self.path)
  24. root = ET.Element("root")
  25. self.firstRound = ET.SubElement(root,"FirstRound")
  26. for image in self.firstRoundImageDict.keys():
  27. if image == "pmp.toc":
  28. imagePmp = ET.SubElement(self.firstRound,image)
  29. imagePmp.set("name",image)
  30. imagePmp.set("address",self.firstRoundImageDict[image])
  31. imagePmp.set("path",os.path.relpath(self.path + "/" + image))
  32. imagePmp.set("md5",CalcMD5.calcFileMd5(self.path + "/" + image))
  33. self.firstRoundImageDict.pop(image)
  34. break
  35. for image in self.firstRoundImageDict.keys():
  36. if image == "secboot.toc":
  37. imagePmp = ET.SubElement(self.firstRound,image)
  38. imagePmp.set("name",image)
  39. imagePmp.set("address",self.firstRoundImageDict[image])
  40. imagePmp.set("path",os.path.relpath(self.path + "/" + image))
  41. imagePmp.set("md5",CalcMD5.calcFileMd5(self.path + "/" + image))
  42. self.firstRoundImageDict.pop(image)
  43. break
  44. for image in self.firstRoundImageDict.keys():
  45. if image == "secos.toc":
  46. imagePmp = ET.SubElement(self.firstRound,image)
  47. imagePmp.set("name",image)
  48. imagePmp.set("address",self.firstRoundImageDict[image])
  49. imagePmp.set("path",os.path.relpath(self.path + "/" + image))
  50. imagePmp.set("md5",CalcMD5.calcFileMd5(self.path + "/" + image))
  51. self.firstRoundImageDict.pop(image)
  52. break
  53. for image in self.firstRoundImageDict.keys():
  54. if image == "secosbak.toc":
  55. imagePmp = ET.SubElement(self.firstRound,image)
  56. imagePmp.set("name",image)
  57. imagePmp.set("address",self.firstRoundImageDict[image])
  58. imagePmp.set("path",os.path.relpath(self.path + "/" + 'secos.toc'))
  59. imagePmp.set("md5",CalcMD5.calcFileMd5(self.path + "/" + 'secos.toc'))
  60. self.firstRoundImageDict.pop(image)
  61. break
  1. for (name,address) in self.firstRoundImageDict.items():
  2. if name == "pmp.toc":
  3. continue
  4. if name == "u-bootbak.toc":
  5. imageName = ET.SubElement(self.firstRound,name)
  6. imageName.set("name",name)
  7. imageName.set("address",address)
  8. imageName.set("path",os.path.relpath(self.path + "/u-boot.toc"))
  9. imageName.set("md5",CalcMD5.calcFileMd5(self.path + "/u-boot.toc"))
  10. continue
  11. imageName = ET.SubElement(self.firstRound,name)
  12. imageName.set("name",name)
  13. imageName.set("address",address)
  14. imageName.set("path",os.path.relpath(self.path + "/" + name))
  15. imageName.set("md5",CalcMD5.calcFileMd5(self.path + "/" + name))
  16. self.secondRound = ET.SubElement(root,"SecondRound")
  17. for (name,address) in self.secondRoundImageDict.items():
  18. imageName = ET.SubElement(self.secondRound,name)
  19. imageName.set("name",name)
  20. imageName.set("address",address)
  21. imageName.set("path",os.path.relpath(self.path + "/" + name))
  22. imageName.set("md5",CalcMD5.calcFileMd5(self.path + "/" + name))
  23. tree = ET.ElementTree(root)
  24. self.indent(root)
  25. if os.path.exists(XML_CONFIG_FILE):
  26. os.remove(XML_CONFIG_FILE)
  27. tree.write("config.xml")
  1. def listDir(self, path):
  2. for root,dirs,files in os.walk(path):
  3. for file in files:
  4. if file in FirstRoundImages.keys():
  5. print("firstRound: " + file)
  6. if file == 'secos.toc':
  7. self.firstRoundImageDict[file] = FirstRoundImages[file]
  8. self.firstRoundImageDict['secosbak.toc'] = FirstRoundImages['secosbak.toc']
  9. continue
  10. if file == 'u-boot.toc':
  11. self.firstRoundImageDict[file] = FirstRoundImages[file]
  12. self.firstRoundImageDict['u-bootbak.toc'] = FirstRoundImages['u-bootbak.toc']
  13. continue
  14. self.firstRoundImageDict[file] = FirstRoundImages[file]
  15. if file in SecondRoundImages.keys():
  16. print("secondRound: " + file)
  17. self.secondRoundImageDict[file] = SecondRoundImages[file]
  1. def indent(self, elem, level=0):
  2. i = "\n" + level*" "
  3. if len(elem):
  4. if not elem.text or not elem.text.strip():
  5. elem.text = i + " "
  6. for e in elem:
  7. self.indent(e, level+1)
  8. if not e.tail or not e.tail.strip():
  9. e.tail = i
  10. if level and (not elem.tail or not elem.tail.strip()):
  11. elem.tail = i
  12. return elem
  13. if __name__ == '__main__':
  14. if len(sys.argv) < 2:
  15. print("Usage: python3 generateConfigXml.py dirOfTheImages")
  16. sys.exit(1)
  17. obj = GenerateConfigXML(sys.argv[1])
  18. obj.buildConfigXML()

最后生成的config.xml内容如下:

  1. <root>
  1. <FirstRound />
  2. <SecondRound>
  1. <factorytest.img address="otaloaderbak" md5="2bca7c24acf471ad4126e63224b117c3" name="factorytest.img" path="factory20160411/factorytest.img" />
  2. <iploader.img address="iploader" md5="40d6bf4fe05bb4ce2d500464d48da5a4" name="iploader.img" path="factory20160411/iploader.img" />
  3. <boot.img address="boot" md5="b1937b921ee1122f1946dcb96811e69e" name="boot.img" path="factory20160411/boot.img" />
  4. <system.img address="system" md5="12fe7f8c8920c18a3cc9a815f201cca7" name="system.img" path="factory20160411/system.img" />
  5. <otaloader.img address="otaloader" md5="0831e96caf91482dbb9efde1ca29d3bd" name="otaloader.img" path="factory20160411/otaloader.img" />
  6. <userdata.img address="userdata" md5="536d084f75a46470f2373e3052d288dc" name="userdata.img" path="factory20160411/userdata.img" />
  1. </SecondRound>
  1. </root>

最后顺便提供下python3下的MD5计算方法,见下面这个类

  1. # -*- coding: utf-8 -*-
  2. __author__ = '[email protected]'
  3. import hashlib
  4. class CalcMD5(object):
  5. def __init__(self):
  6. pass
  7. @classmethod
  8. def calcFileMd5(self,filePath):
  9. '''
  10. :param filePath:
  11. :return: file checksum value
  12. '''
  13. md5 = hashlib.md5()
  14. fp = open(filePath,'rb')
  15. md5.update(fp.read())
  16. while True:
  17. block = fp.read(1048576)
  18. if not block:
  19. break
  20. md5.update(block)
  21. fp.close()
  22. return md5.hexdigest()
  23. @classmethod
  24. def calcStringMd5(self,str):
  25. '''
  26. :param str:
  27. :return: string checksum value
  28. '''
  29. return hashlib.md5(str.encode("utf-8")).hexdigest()

转载请注明作者和出处,并添加本页链接。
原文链接: http://xugaoxiang.com/post/22

给我留言