class WriteXmlError(Exception) : pass
class ReadXMLError(Exception) : pass
class WriteBadKeyError(Exception) : pass
class WriteBadValueError(Exception) : pass
class ReadBadKeyError(Exception) : pass
class ReadBadValueError(Exception) : pass
class InitXmlError(Exception) : pass
class CreateChildError(Exception) : pass
class PtWriteXml :
def __init__(self, xmlFile):
try:
self.__ok = 1
impl = xml.dom.minidom.getDOMImplementation()
self.doc = impl.createDocument(None, ROOT, None)
self.root = self.doc.documentElement
self.xmlFile = xmlFile
except Exception:
self.__ok = 0
raise InitXmlError, "init xml.dom.minidom doc error"
else:
self.__ok = 1
def close(self):
try:
file = open(self.xmlFile, 'w')
import codecs
writer = codecs.lookup('utf-8')[3](file)
self.doc.writexml(writer=writer, indent='', addindent='/t', newl='/n', encoding=u'utf-8')
except Exception:
raise WriteXmlError, "write xml file error"
def addResItem(self, key, value):
if not self.__ok:
return
try:
item = self.doc.createElement(NODE)
except Exception:
self.__ok = 0
raise CreateChildError, "create xml child node error"
try:
if not key:
key = ''
key + ''
item.setAttribute(KEY, key)
except Exception:
self.__ok = 0
raise WriteBadKeyError, "key must be string, not other things"
try:
if not value:
value = ''
value + ''
item.setAttribute(VALUE, value)
except Exception:
self.__ok = 0
raise WriteBadValueError, "value must be string, not other things"
try:
self.root.appendChild(item)
except Exception:
self.__ok = 0
raise CreateChildError, "create xml child node error"
class PtReadXml :
def __init__(self, xmlFile):
try:
self.__ok = 1
self.level = 0
self.doc = xml.dom.minidom.parse(xmlFile)
self.root = self.doc.documentElement
except Exception:
self.__ok = 0
raise InitXmlError, "init xml.dom.minidom doc error"
else:
self.__ok = 1
def close(self):
pass
def getResItems(self):
if not self.__ok:
return
try:
first = self.root.firstChild
while 1:
try:
if not first:
break
first.getAttribute(KEY)
except:
first = first.nextSibling
else:
break
print 'xml content : '
print self.root.toxml()
has = self.root.hasChildNodes()
print 'xml content : '
print first.toxml()
items = {}
while 1:
if not first :
break
key = first.getAttribute(KEY)
value = first.getAttribute(VALUE)
items[key] = value
first = first.nextSibling
first = first.nextSibling
return items
except Exception:
print 'get items error : %s' % key
self.__ok = 0
def getLevel(self):
try:
first = self.root.firstChild
self.level = 1
while 1:
if not first:
break
try:
child = first.firstChild
if child:
self.level = 2
break
except:
pass
first = first.nextSibling
except:
pass
return self.level
'''peter.lee : clfff.peter@...'''
import unittest
from RWXML import PtWriteXml
from RWXML import PtReadXml
import RWXML
import sys
FILENAME = r'd:/text.xml'
'''
good_items = ( ('three', 'third'),
('1', 'one'),
('2', 'two'))
bad_items = ( (1, 'one'),
('two', 2),
(3,3))
'''
good_items = { 'three' : 'third',
'1' : 'one',
'2' : 'two',
'4' : None}
bad_key = 1
bad_value = 1
def testR_XML():
rx = PtReadXml(FILENAME)
items = rx.getResItems()
print '----------items---------------'
for key in items:
print key
for key in items:
if good_items[key]:
if good_items[key] != items[key]:
raise RWXML.ReadXMLError
if not good_items[key]:
if items[key] != '':
raise RWXML.ReadXMLError
rx.close()
def testW_XML():
wx = PtWriteXml(FILENAME)
for item in good_items:
wx.addResItem(item, good_items[item])
wx.close()
class NormalRW(unittest.TestCase):
def testRW_XML(self):
testW_XML()
testR_XML()
class PTWriteXmlBadStructur(NormalRW):
def testWriteBadStructur(self):
testW_XML()
rx = PtReadXml(FILENAME)
self.assertEqual(rx.getLevel(), 1)
rx.close()
class PTWriteXmlBadData(unittest.TestCase):
def testWriteBadKey(self):
wx = PtWriteXml(FILENAME)
self.assertRaises(RWXML.WriteBadKeyError, wx.addResItem, bad_key, 'temp value')
wx.close()
def testWriteBadValue(self):
wx = PtWriteXml(FILENAME)
self.assertRaises(RWXML.WriteBadValueError, wx.addResItem, 'temp key', bad_value)
wx.close()
'''
def testWriteNoneKey(self):
wx = PtWriteXml(FILENAME)
self.assertRaises(RWXML.WriteBadKeyError, wx.addResItem, None, 'temp value')
wx.close()
def testWriteBadItem(self):
wx = PtWriteXml(FILENAME)
self.assertRaises(RWXML.WriteBadKeyError, wx.addResItem, bad_items[2][0], bad_items[2][1])
wx.close()
'''
'''
class PTReadXmlBadData(unittest.TestCase):
def testWriteBadKey(self):
rx = PtReadXml(FILENAME)
self.assertRaises(RWXML.ReadBadKeyError, wx.addResItem, k_2_v[0][0], k_2_v[0][1])
rx.close()
def testWriteBadValue(self):
rx = PtReadXml(FILENAME)
self.assertRaises(RWXML.ReadBadValueError, wx.addResItem, k_2_v[1][0], k_2_v[1][1])
rx.close()
def testWriteBadItem(self):
rx = PtReadXml(FILENAME)
self.assertRaises(RWXML.ReadBadKeyError, wx.addResItem, k_2_v[2][0], k_2_v[2][1])
rx.close()
'''
if __name__ == "__main__":
unittest.main()
以前用Python中的minidom寫過生成XML文件的程序,現在需要讀取XML文件中的內容了,首先想到的還是minidom模塊.一番編寫測試後,如願掌握了其函數的使用方式,和AJAX中的DOM操作沒什麼區別.
以前就知道elementtree在處理XML文件時廣受Python程序員的歡迎,也安裝過elementtree的安裝包,現在使用的Python2.5中已將其收錄了.既然我要處理XML文件,當然也要學着使用更高效和易用的模塊了.自己摸索了半天,除了有關名字空間的函數沒有試用外,其它函數都試用過了.以後處理XML文件可以得心應手了。
下面是一個簡單的例子,通過它可以知道各個函數的使用方法:
from xml.etree.ElementTree import ElementTree from xml.etree.ElementTree import Element from xml.etree.ElementTree import SubElement from xml.etree.ElementTree import dump from xml.etree.ElementTree import Comment from xml.etree.ElementTree import tostring ''' <?xml version="1.0"?> <PurchaseOrder> <account refnum="2390094"/> <item sku="33-993933" qty="4"> <name>Potato Smasher</name> <description>Smash Potatoes like never before.</description> </item> </PurchaseOrder> ''' ## Writing the content to xml document book = ElementTree() purchaseorder = Element('PurchaseOrder') book._setroot(purchaseorder) SubElement(purchaseorder, 'account', {'refnum' : "2390094"}) item = Element("item", {'sku' : '33-993933', 'qty' : '4'}) purchaseorder.append(item) print item.items() # [('sku', '33-993933'), ('qty', '4')] print item.attrib # {'sku': '33-993933', 'qty': '4'} print item.get('sku') # 33-993933 SubElement(item, 'name').text = "Potato Smasher" SubElement(item, 'description').text = "Smash Potatoes like never before." #book.write('book.xml',"utf-8") #print tostring(purchaseorder) #import sys #book.write(sys.stdout) #dump(book) ## Displaying the content of the xml document print purchaseorder.find('account') print purchaseorder.find('account').get('refnum') print purchaseorder.findall('account')[0].get('refnum') print purchaseorder.find('item/name') print purchaseorder.find('item/name').text ## How to use ElementTree([element,] [file]) ## 1. From standard XML element, it becomes root element print ElementTree(item).getroot().find('name').text ## 2. From XML file print ElementTree(file='book.xml').getroot().find('item/description').text ## Create an iterator for element in purchaseorder.getiterator(): print element.tag ## Get pretty look def indent(elem, level=0): i = "/n" + level*" " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " for e in elem: indent(e, level+1) if not e.tail or not e.tail.strip(): e.tail = i if level and (not elem.tail or not elem.tail.strip()): elem.tail = i return elem if __name__=="__main__": dump(indent(purchaseorder)) book.write('book.xml',"utf-8")
XML文件,我只用它作配置文件用。
由於Python有些問題曾經困擾得我很噁心,粘點代碼以表懷念!
一個用PYTHON操作XML文件的類。
功能:
可以讓Python寫出跟普通XML似的那種PP的換行。
暫定只能讀寫GB2312編碼的文件。
# -*- coding:cp936 -*-
# Author : PESoft
#__________________________________________________________
from xml.dom.minidom import parse, parseString
from xml.parsers.expat import ExpatError
import string
import re
#__________________________________________________________
# 寫文件 編碼:GB2312
def LoadFileContent( filename ):
try:
file = open(filename,'r')
strbuf = file.read(-1)
except IOError:
print "Error : ",filename,"未找到。"
#raise IOError
else:
file.close()
return strbuf
#__________________________________________________________
# 寫文件 編碼:GB2312
def SaveFileContent( filename, strbuf ):
try:
strbuf = '<?xml version=/"1.0/" encoding=/"GB2312/"?>/n/n' + strbuf
file = open(filename,'w')
file.write(strbuf)
except IOError:
print "Error : ",filename,"未找到。"
#raise IOError
else:
file.close()
#__________________________________________________________
class XMLParse:
""" XML文件處理類
此類外的字符類型爲GBK
此類內的字符類型爲UTF-8
從該類輸出的數據類型統爲GBK
"""
#__________________________________________________________
def __init__(self, fname) :
self.document = 0
self.__rootElm = 0
self.__filename = fname
self.__file = 0
self.__strbuf = ''
#__________________________________________________________
def readFile(self):
try:
strbuf = LoadFileContent(self.__filename)
# 將 GB2312 編碼的文件轉爲 UTF-8 編碼串解析
self.__strbuf = strbuf.decode('gbk').encode('UTF-8')
tempstr = self.__strbuf
i = tempstr.find( '>' )
tempstr = tempstr[:i].lower()
tempstr = tempstr.replace( 'gb2312', 'utf-8' )
self.__strbuf = tempstr + self.__strbuf[i:]
self.document = parseString(self.__strbuf)
self.__rootElm = self.document.documentElement
except ExpatError:
print "Error : ","讀文件", self.__filename
except IOError:
print "Error : ",self.__filename,"未找到。"
#__________________________________________________________
def writeFile(self):
odd_str = self.document.documentElement.toprettyxml()
regluar = r'(<[^/<>/t/n]+>)/s*([^<>/s]+)/s*(</.+>)'
t_str = ''
g = re.search(regluar, odd_str)
while g != None :
find_str= g.group(1) + g.group(2) + g.group(3)
t_str = t_str + odd_str[:g.start()] + find_str
odd_str = odd_str[g.end():]
g = re.search(regluar, odd_str)
t_str = t_str + odd_str
t_str = re.sub(r'[/t|/n]*/n','/n', t_str)
t_str = t_str.encode('utf-8')
self.__strbuf = unicode(t_str,'utf-8').encode('gbk')
SaveFileContent(self.__filename, self.__strbuf)
def FindRootElm( self ):
# 查找XML中的根項
#self.__rootElm = self.document.firstChild
self.__rootElm = self.document.documentElement
return self.__rootElm
def FindChildElm( self, element, rootelm = None ):
# 查找某項下的一個子項
elm = None
if rootelm == None:
elm = self.__rootElm.firstChild
else:
if rootelm.nodeType == rootelm.ELEMENT_NODE :
elm = rootelm.firstChild
if elm != None :
while elm != None :
if elm.nodeName == element :
return elm
elm = elm.nextSibling
return elm
def FindSibling( self, element, rootelm = None ):
item = rootelm.nextSibling
while item != None :
if item.nodeType == self.__rootElm.ELEMENT_NODE :
if item.nodeName == element :
break
item = item.nextSibling
return item
def GetElementData( self, element ) :
if element.nodeType == element.ELEMENT_NODE:
nodes = element.childNodes
for n in nodes :
if n.nodeType in (n.TEXT_NODE, n.CDATA_SECTION_NODE):
data = string.strip(n.data.encode("gbk"))
if data != None :
return data
return None