使用xml.etree.ElementTree 解析xml文檔
-
加載xml文件,1,加載指的的字符串,ElementTree.fromstring(text)
2.加載制定文件 ElementTree.parse(‘path’) -
獲取element的方法:getiterator, getchildren, find方法(支持部分xpath),findall(支持部分xpath)
-
獲取值和屬性,.text, .attrib[‘category’], .set
# coding=UTF-8
import xml.etree.ElementTree as ET
# parse解析xml
tree=ET.parse('./lx_xml.xml')
#獲取根節點
root=tree.getroot()
# 獲取根節點的tagname
tag_name=root.tag
print tag_name #students
#獲取子節點,標籤名字和屬性
for child in root:
print child.tag,
print child.attrib
# student {'no': '2009081097'}
# student {'no': '2009081098'}
# student {'no': '2009081099'}
# 查找標籤,findall返回對象,需要遍歷
student=root.findall("student")
for i in student:
#get("xx")獲取元素的某個屬性值
attr=i.get('no')
#查找某一個子節點的,text
name=i.find('name').text
print attr
print name
# 查找某個標籤
student=root.find("student")
no=student.get('no')
print no
#支持Xpath
student1_name=root.findall(".//student/name")
for i in student1_name:
print i.text
應用:
#找到所有書的節點對象
root = ET.parse('./demo.xml')
books = root.findall('//book')
# print books
#找到第二本書的屬性,attrib返回的是一個字典{'category': 'cooking'}
attrib = books[1].attrib['category']
print attrib
#找到第二本書的價格root
price = root.find('//book[2]/price').text
print price
封裝
#coding=utf-8
import xml.etree.ElementTree as ET
import traceback
class ElementTreeXml:
def __init__(self,filePath):
self.filePath = filePath
try:
self.root = ET.parse(self.filePath)
except:
print 'File parse failed!'+traceback.print_exc()
#如果不定義root,之後報錯會說self.root Undefined
self.root = None
def getElementText(self,xpath):
result = None
if self.root is not None:
try:
element = self.root.find(xpath)
result = element.text
except:
print 'Element is not found!'
traceback.print_exc()
return result
def getElementAttribute(self,xpath,name):
result = None
if self.root is not None:
try:
element = self.root.find(xpath)
result = element.attrib[name]
except:
print 'Element is not found!'
traceback.print_exc()
return result
if __name__ == '__main__':
root = ElementTreeXml('./demo.xml')
book2Text = root.getElementText('//book[2]/price')
book2Attrib = root.getElementAttribute('//book[2]','category')
print book2Text
print book2Attrib