Python格式化xml並轉爲字典

xml處理

一、格式化xml

xml_text = '<?xml version="1.0" encoding="ISO-8859-1"?><note><to>George</to><from>John</from><heading>Reminder</heading><body>Do not forget the meeting!</body></note>'
url = "http://web.chacuo.net/formatxml"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
    "Host": "web.chacuo.net",
    "X-Requested-With": "XMLHttpRequest",
    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
}
form_data = {"data": xml_text, "type": "format", "beforeSend": "undefined"}
resp = requests.post(url, data=form_data, headers=headers, timeout=20)
print(resp.json()['data'][0])

二、將xml轉爲字典(import xmltodict)

  • xmltodict.parse()方法實現對xml字符串轉爲字典
  • xmltodict.unparse()方法可以將字典轉換爲xml字符串
import xmltodict
format_ed_xml = '<?xml version="1.0" encoding="ISO-8859-1"?><note><to>George</to><from>John</from><heading>Reminder</heading><body>Do not forget the meeting!</body></note>'
dict_xml = xmltodict.parse(format_ed_xml)
print(dict_xml)
# OrderedDict([('note', OrderedDict([('to', 'George'), ('from', 'John'), ('heading', 'Reminder'), ('body', 'Do not forget the meeting!')]))])

三、 xml.parsers.expat.ExpatError: XML or text declaration not at start of entity報錯解決方法

  • 按步驟一的方式,先將xml字符串格式化,然後再轉字典;

四、完整代碼如下

  • 格式化xml>xml轉字典>保存爲xml文件
import requests
import xmltodict


def pretty_xml(text: str) -> str:
    """
    將未格式化的xml字符串格式化
    :param text: 待格式化的xml字符串
    :return: 格式化好的字符串
    """
    url = "http://web.chacuo.net/formatxml"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
        "Host": "web.chacuo.net",
        "X-Requested-With": "XMLHttpRequest",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
    }
    form_data = {"data": text, "type": "format", "beforeSend": "undefined"}
    resp = requests.post(url, data=form_data, headers=headers, timeout=20)
    print(resp.json()['data'][0])
    return resp.json()['data'][0]


def save_xml(pretty_xml_str: str):
    """將xml存入xml文件"""
    with open("test.xml", "w", encoding="utf-8") as fp:
        fp.write(pretty_xml_str)


def xml_to_dict(format_ed_xml: str):
    """將xml轉爲字典"""
    dict_xml = xmltodict.parse(format_ed_xml)
    print(f"\n>>>>{dict_xml['note']['body']}")


if __name__ == "__main__":
    xml_text = '<?xml version="1.0" encoding="ISO-8859-1"?><note><to>George</to><from>John</from><heading>Reminder</heading><body>Do not forget the meeting!</body></note>'
    format_xml = pretty_xml(xml_text)  # 格式化xml
    xml_to_dict(format_xml)  # 將xml轉爲字典
    save_xml(format_xml)   # 存xml文件
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章