python解析xml轉化爲csv

1、測試的xml(test.xml)

<?xml version="1.0" encoding="UTF-8"?>
<BS_TCX>
<ServiceArea>area</ServiceArea>
<Version>1D</Version>
<IPVersion>6</IPVersion>
<MLDVersion>2</MLDVersion>
<NumOfTS>6</NumOfTS>
<TSList>

    <TS>
    <TSId>40F1</TSId>
    <MediaPortNumber>55101</MediaPortNumber>
    <GroupAddress>ff3e::9800:0</GroupAddress>
    <SourceAddress>2404:1a8:ffff:0:ff5b::192</SourceAddress>
    <CaBroadcasterGroupId>01</CaBroadcasterGroupId>
    <NumOfFEC>1</NumOfFEC>
    <FECList>
        <FEC>
            <FECMode>01</FECMode>
            <FECModeInfo>0A0A</FECModeInfo>
        </FEC>
    </FECList>
    <MaximumTSBitRate>10</MaximumTSBitRate>
    <NumOfService>2</NumOfService>
    <ServiceList>
        <Service>
            <ServiceName>hello</ServiceName>
            <ServiceId>0065</ServiceId>
            <LicenseId>0103000400000000</LicenseId>
            <TierBitMask>8000000000000000</TierBitMask>
            <ServiceType>01</ServiceType>
            <RemoteControlKeyId>1</RemoteControlKeyId>
            <StreamType>1b</StreamType>
            <RenderingObligation>92F0</RenderingObligation>
            <RenderingObligationRa>92F0</RenderingObligationRa>
        </Service>
        <Service>
            <ServiceName>EngService</ServiceName>
            <ServiceId>03A1</ServiceId>
            <ServiceType>A4</ServiceType>
        </Service>
    </ServiceList>
    </TS>

    <TS>
    <TSId>40D0</TSId>
    <MediaPortNumber>55102</MediaPortNumber>
    <GroupAddress>ff3e::9800:0</GroupAddress>
    <SourceAddress>2404:1a8:ffff:0:ff5b::193</SourceAddress>
    <CaBroadcasterGroupId>01</CaBroadcasterGroupId>
    <NumOfFEC>1</NumOfFEC>
    <FECList>
        <FEC>
            <FECMode>01</FECMode>
            <FECModeInfo>0A0A</FECModeInfo>
        </FEC>
    </FECList>
    <MaximumTSBitRate>10</MaximumTSBitRate>
    <NumOfService>2</NumOfService>
    <ServiceList>
        <Service>
            <ServiceName>hello</ServiceName>
            <ServiceId>008D</ServiceId>
            <LicenseId>0103000400000000</LicenseId>
            <TierBitMask>8000000000000000</TierBitMask>
            <ServiceType>01</ServiceType>
            <RemoteControlKeyId>4</RemoteControlKeyId>
            <StreamType>1b</StreamType>
            <RenderingObligation>92F0</RenderingObligation>
            <RenderingObligationRa>92F0</RenderingObligationRa>
        </Service>
        <Service>
            <ServiceName>hello</ServiceName>
            <!-- 142 -->
            <ServiceId>008E</ServiceId>
            <LicenseId>0103000400000000</LicenseId>
            <TierBitMask>8000000000000000</TierBitMask>
            <ServiceType>01</ServiceType>
            <StreamType>1b</StreamType>
            <RenderingObligation>DEF0</RenderingObligation>
            <RenderingObligationRa>DEF0</RenderingObligationRa>
        </Service>
    </ServiceList>
    </TS>

</TSList>
<SIMulticast>
    <MediaPortNumber>65010</MediaPortNumber>
    <GroupAddress>ff3e::9800:0</GroupAddress>
    <SourceAddress>2404:1a8:ffff:0:ff5b::10d</SourceAddress>
</SIMulticast>
</BS_TCX>

2、python腳本(Xml_To_CSV.py)

#!/usr/bin/python
# -*- coding: utf-8 -*- 

import sys 
import httplib
import datetime
import csv
import os


try: 
    import xml.etree.cElementTree as ET 
except ImportError: 
    import xml.etree.ElementTree as ET 

def printf(strings):
    now = datetime.datetime.now()
    now_string=now.strftime('%Y-%m-%d_%H:%M:%S')
    print "["+now_string+"] " +strings
    return "["+now_string+"] " +strings

def getXml(outputfile,host,url):
    conn  = None
    try:
        printf("downloading start!")
        conn  = httplib.HTTPConnection(host, 80, timeout=100)
        conn.request('GET', url)

        #response是HTTPResponse對象
        response = conn.getresponse()
        printf(str(response.status) + " " + response.reason)
        #print response.read()

        printf("downloading,pls wait......")
        f = open(outputfile, "w")
        f.write(response.read())
        f.close()
        printf("downloading complete!")

    except Exception, e:
        printf(e)
    finally:
        if conn :
            conn.close()

def getCSV_BS(inputfile, outputfile):
    #CSV表頭
    FIELDS = ['ServiceArea','Version', 'IPVersion', 'MLDVersion', 'NumOfTS',
              'TSId','MediaPortNumber','GroupAddress','SourceAddress','CaBroadcasterGroupId','LinkageDescriptorURL','NumOfFEC','FECMode','FECModeInfo','MaximumTSBitRate',
              'NumOfService','ServiceName','ServiceId','LicenseId','TierBitMask','ServiceType','StreamType','KeyPuID','ContractCrid','UncontractCrid','RemoteControlKeyId',
              'RenderingObligation','RenderingObligationRa','SIMulticast_MediaPortNumber','SIMulticast_GroupAddress','SIMulticast_SourceAddress']
    f1 = open(outputfile,"wb")
    writer = csv.DictWriter(f1, fieldnames=FIELDS)
    writer.writerow(dict(zip(FIELDS, FIELDS))) 
    f1.close()      

    try: 
        tree = ET.parse(inputfile)     #打開xml文檔 
        root = tree.getroot()         #獲得root節點  
    except Exception, e: 
        print "Error:cannot parse file:cfg.xml."
        print e
        sys.exit(1) 

    ServiceArea = root.find('ServiceArea').text
    Version = root.find('Version').text
    IPVersion = root.find('IPVersion').text
    MLDVersion = root.find('MLDVersion').text
    NumOfTS = root.find('NumOfTS').text

    SIMulticast_MediaPortNumber = root.find('SIMulticast').find('MediaPortNumber').text
    SIMulticast_GroupAddress = root.find('SIMulticast').find('GroupAddress').text
    SIMulticast_SourceAddress = root.find('SIMulticast').find('SourceAddress').text

    children =root.findall('TSList')[0]
    for period in children.findall('TS'): #找到root節點下的所有period節點 
        row = {}

        if period.find('TSId') != None:
            TSId = period.find('TSId').text
            TSId = int(TSId,16)

        if period.find('MediaPortNumber') != None:
            MediaPortNumber = period.find('MediaPortNumber').text
        else:
            MediaPortNumber = ''

        if period.find('GroupAddress') != None:
            GroupAddress = period.find('GroupAddress').text
        else:
            GroupAddress = ''

        if period.find('SourceAddress') != None:
            SourceAddress = period.find('SourceAddress').text
        else:
            SourceAddress = ''

        if period.find('CaBroadcasterGroupId') != None:
            CaBroadcasterGroupId = period.find('CaBroadcasterGroupId').text
        else:
            CaBroadcasterGroupId = ''

        if period.find('LinkageDescriptorURL') != None:
            LinkageDescriptorURL = period.find('LinkageDescriptorURL').text
        else:
            LinkageDescriptorURL = ''

        if period.find('NumOfFEC') != None:
            NumOfFEC = period.find('NumOfFEC').text
        else:
            NumOfFEC = ''

        if period.find('FECList') != None :
            FEC = period.find('FECList').find('FEC')
            FECMode = FEC.find('FECMode').text
            FECModeInfo = FEC.find('FECModeInfo').text
        else:
            FECMode = ''
            FECModeInfo = ''

        MaximumTSBitRate = period.find('MaximumTSBitRate').text
        NumOfService = period.find('NumOfService').text

        Service = period.find('ServiceList').find('Service')
        if Service.find('ServiceName') != None:
            ServiceName = Service.find('ServiceName').text
        else:
            ServiceName = ''

        if Service.find('ServiceId') != None:
            ServiceId = Service.find('ServiceId').text
        else:
            ServiceId = ''

        if Service.find('LicenseId') != None:
            LicenseId = Service.find('LicenseId').text
        else:
            LicenseId = ''

        if Service.find('TierBitMask') != None:
            TierBitMask = Service.find('TierBitMask').text
        else:
            TierBitMask = ''

        if Service.find('ServiceType') != None:
            ServiceType = Service.find('ServiceType').text
        else:
            ServiceType = ''

        if Service.find('KeyPuID') != None:
            KeyPuID = Service.find('KeyPuID').text
        else:
            KeyPuID = ''

        if Service.find('ContractCrid') != None:
            ContractCrid = Service.find('ContractCrid').text
        else:
            ContractCrid = ''

        if Service.find('UncontractCrid') != None:
            UncontractCrid = Service.find('UncontractCrid').text
        else:
            UncontractCrid = ''

        if Service.find('RemoteControlKeyId') != None:
            RemoteControlKeyId = Service.find('RemoteControlKeyId').text
        else:
            RemoteControlKeyId = ''

        if Service.find('StreamType') != None:
            StreamType = Service.find('StreamType').text
        else:
            StreamType = ''

        if Service.find('RenderingObligation') != None:
            RenderingObligation = Service.find('RenderingObligation').text
        else:
            RenderingObligation = ''

        if Service.find('RenderingObligationRa') != None:
            RenderingObligationRa = Service.find('RenderingObligationRa').text
        else:
            RenderingObligationRa = ''

        row["ServiceArea"] = ServiceArea
        row["Version"] = Version
        row["IPVersion"] = IPVersion
        row["MLDVersion"] = MLDVersion
        row["NumOfTS"] = NumOfTS

        row["TSId"] = TSId
        row["MediaPortNumber"] = MediaPortNumber
        row["GroupAddress"] = GroupAddress
        row["SourceAddress"] = SourceAddress
        row["CaBroadcasterGroupId"] = CaBroadcasterGroupId
        row["LinkageDescriptorURL"] = LinkageDescriptorURL
        row["NumOfFEC"] = NumOfFEC
        row["FECMode"] = FECMode
        row["FECModeInfo"] = FECModeInfo
        row["MaximumTSBitRate"] = MaximumTSBitRate
        row["NumOfService"] = NumOfService
        row["ServiceName"] = ServiceName
        row["ServiceId"] = ServiceId
        row["LicenseId"] = LicenseId
        row["TierBitMask"] = TierBitMask
        row["ServiceType"] = ServiceType
        row["KeyPuID"] = KeyPuID
        row["ContractCrid"] = ContractCrid
        row["UncontractCrid"] = UncontractCrid
        row["RemoteControlKeyId"] = RemoteControlKeyId
        row["StreamType"] = StreamType
        row["RenderingObligation"] = RenderingObligation
        row["RenderingObligationRa"] = RenderingObligationRa

        row["SIMulticast_MediaPortNumber"] = SIMulticast_MediaPortNumber
        row["SIMulticast_GroupAddress"] = SIMulticast_GroupAddress
        row["SIMulticast_SourceAddress"] = SIMulticast_SourceAddress

        #生成csv
        f1 = open(outputfile, 'ab')
        dict_writer = csv.DictWriter(f1, fieldnames=FIELDS)
        dict_writer.writerow(row)
        f1.close()    


if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding('utf-8') #@UndefinedVariable

    file_bs = "test.xml"
    outputfile_bs = "test.csv" 
    try:
        printf("get test.xml from internet")
        #getXml(file_bs,"tspstbvod1.plala.iptvf.jp","/bs_stblab/ngn-e/bs/extended/bs_tcx.xml")
        getCSV_BS(file_bs, outputfile_bs)
        printf("test.xml success")
    except Exception, e:
        print e
        printf("test.xml fail")

    os.system("pause")

電腦配置了python的運行環境,雙擊py文件就可以轉化了。

發佈了108 篇原創文章 · 獲贊 39 · 訪問量 9萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章