python 解析 xml

在一個 html 頁面中有如下 xml 代碼:

<table border="1" class="bodyTable" cellpadding="0" cellspacing="0">

<tr class="a">

<th>
Tests</th>

<th>
Errors </th>

<th>
Failures</th>

<th>
Skipped</th>

<th>
Success Rate</th>

<th>
Time</th>
</tr>

<tr class="b">

<td>
499</td>

<td>
9</td>

<td>
49</td>

<td>
4</td>

<td>
87.575%</td>

<td>
17.632</td>
</tr>
</table>
我們要獲得每個 key(Tests,Failures等 )對應的value,可以用如下代碼段實現。

from xml.sax.handler import ContentHandler
from xml.sax import parse
from optparse import OptionParser

class HeadlineHandler(ContentHandler):
    th = False
    td = False
    def __init__(self,k_list, v_list):
        #super(HeadlineHandler,self).__init__()
        ContentHandler.__init__(self)
        self.k_list = k_list
        self.v_list = v_list
        self.data_k = []
        self.data_v = []

    def startElement(self, name, attrs):
        if name == "th":
            self.th = True
        if name == "td":
            self.td = True

    def endElement(self, name):
        if name == "th":
            text = ''.join(self.data_k)
            self.data_k = []
            self.k_list.append(text)
            self.th = False
        if name == "td":
            text = ''.join(self.data_v)
            self.data_v = []
            self.v_list.append(text)
            self.td = False
        #print "k_list:%s"%self.k_list
        #print "v_list:%s"%self.v_list

    def characters(self, content):
        if self.th:
            self.data_k.append(content.strip('\n'))
        if self.td:
            self.data_v.append(content.strip('\n'))

def parse_xml(xml_path):
    #xml_path:xml文件路徑
    k_list = []
    v_list = []
    parse(xml_path,HeadlineHandler(k_list,v_list))
    #print k_list
    #print v_list
    print "---------------------------------------------------------------"
    for k in k_list:
        print k + "    ",
    print "\n"
    i = 0
    for v in v_list:
        if i == 0:
            print v + "  " + " "*len(k_list[i]),
        else:
            print v + "    " + " "*len(k_list[i-1]),
        i += 1
    print "\n"
    print "---------------------------------------------------------------"
    if v_list[1] != 0 or v_list[2] != 0:
        return False
    return True



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章