【pandas 小记】pandas 读写xml文件


import xml.etree.ElementTree as ET
import pandas as pd

def iter_records(records):
    """
    遍历每个节点的生成器
    :param records:
    :return:
    """
    for record in records:
        temp_dict = {}  # 存储节点key-value
        for var in record:
            temp_dict[var.attrib['var_name']] = var.text
        # 生成值,即每个节点的数据
        yield temp_dict


def read_xml(xmlFileName):
    """
    读取xml数据,返回df
    :param xmlFileName:
    :return:
    """
    with open(xmlFileName, 'r') as xml_file:
        tree = ET.parse(xml_file)
        # 访问根节点
        root = tree.getroot()
        # 从根节点开始遍历,返回df
        return pd.DataFrame(list(iter_records(root)))


def xml_encode(row):
    """
    将每行数据转换成xml
    :param row:
    :return:
    """
    # 输出<record>节点开始标签 ,名称可以根据实际需要修改
    xmlItem = ['  <record>']
    # 转换成xml格式
    for field in row.index:
        line = '  <var var_name="{0}">{1}</var>'.format(field, row[field])
        xmlItem.append(line)

    # 输出<record>节点结束标签
    xmlItem.append('  </record>')

    # 返回结果
    return '\n'.join(xmlItem)


def write_xml(xmlFileName, data):
    """
    将数据写入xml文件
    :param xmlFileName:
    :param data:
    :return:
    """
    with open(xmlFileName, 'w') as xmlfile:
        # 写头部
        xmlfile.write(
            '<?xml version="1.0" encoding="UTF-8"?>'
        )
        xmlfile.write('<records>\n')   # 名称可以根据实际需要修改

        # 写数据
        xmlfile.write(
            '\n'.join(data.apply(xml_encode, axis=1))
        )
        xmlfile.write('\n</records>')   # 名称可以根据实际需要修改


xml_filenane = r'E:\data\realEstate_trans.xml'
new_filename = r'E:\data\realEstate_trans_output.xml'
xml_df = read_xml(xml_filenane)    # 读取到df
print(xml_df)
# write_xml(new_filename, xml_df)  # 写入新xml文件

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章