import xml.etree.ElementTree as ET
import pandas as pd
def iter_records(records):
"""
遍历每个节点的生成器
:param records:
:return:
"""
for record in records:
temp_dict = {} # 存储节点key-value
for var in record:
temp_dict[var.attrib['var_name']] = var.text
# 生成值,即每个节点的数据
yield temp_dict
def read_xml(xmlFileName):
"""
读取xml数据,返回df
:param xmlFileName:
:return:
"""
with open(xmlFileName, 'r') as xml_file:
tree = ET.parse(xml_file)
# 访问根节点
root = tree.getroot()
# 从根节点开始遍历,返回df
return pd.DataFrame(list(iter_records(root)))
def xml_encode(row):
"""
将每行数据转换成xml
:param row:
:return:
"""
# 输出<record>节点开始标签 ,名称可以根据实际需要修改
xmlItem = [' <record>']
# 转换成xml格式
for field in row.index:
line = ' <var var_name="{0}">{1}</var>'.format(field, row[field])
xmlItem.append(line)
# 输出<record>节点结束标签
xmlItem.append(' </record>')
# 返回结果
return '\n'.join(xmlItem)
def write_xml(xmlFileName, data):
"""
将数据写入xml文件
:param xmlFileName:
:param data:
:return:
"""
with open(xmlFileName, 'w') as xmlfile:
# 写头部
xmlfile.write(
'<?xml version="1.0" encoding="UTF-8"?>'
)
xmlfile.write('<records>\n') # 名称可以根据实际需要修改
# 写数据
xmlfile.write(
'\n'.join(data.apply(xml_encode, axis=1))
)
xmlfile.write('\n</records>') # 名称可以根据实际需要修改
xml_filenane = r'E:\data\realEstate_trans.xml'
new_filename = r'E:\data\realEstate_trans_output.xml'
xml_df = read_xml(xml_filenane) # 读取到df
print(xml_df)
# write_xml(new_filename, xml_df) # 写入新xml文件
【pandas 小记】pandas 读写xml文件
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.