from xml.dom.minidom import parse
import xml.etree.ElementTree as ET
import re,os,datetime
from loguru import logger
import pymysql
conn = pymysql.connect(host='localhost',
user='root',
password='',
db='....',
charset='utf8')
cursor = conn.cursor()
logger.add(".\\log\\xml{}.log".format(datetime.date.today()), rotation="00:00")
datetime.date.today()
def readXML(file_name_path):
print(file_name_path)
domTree = parse(file_name_path)
# 文檔根元素
rootNode = domTree.documentElement
state = 0
# 所有顧客
customers = rootNode.getElementsByTagName("PubmedArticle")
for customer in customers:
if customer:
try:
# pmid 元素
name = customer.getElementsByTagName("PMID")[0]
pmid = name.childNodes[0].data
except:
pmid = ''
try:
# doi 元素
phone = customer.getElementsByTagName("ELocationID")[0]
doi = phone.childNodes[0].data
except:
try:
# doi 元素
name_doi = customer.getElementsByTagName("ArticleId")[1]
doi = name_doi.childNodes[0].data
if '10.' != doi[:3]:
name_doi = customer.getElementsByTagName("ArticleId")[2]
doi = name_doi.childNodes[0].data
except:
doi = ''
if doi:
print(f"文件路徑{file_name_path},pmid:{pmid},doi:{doi}")
# sql = """insert into `pmid_doi` (path,pmid,doi) values ('{}','{}','{}')""".format(file_name_path,pmid,doi)
sql = """update `pmid_doi` set path='{}',doi='{}' where pmid='{}'""".format(file_name_path.replace('\\','\\\\').replace('duqu-duqu',''),doi,pmid)
print(sql)
try:
if pmid:
cursor.execute(sql)
conn.commit()
logger.warning(f'數據庫修改成功{sql.replace("duqu-duqu","")}')
else:
state += 1
except Exception as e:
print('存庫失敗',e)
logger.warning(f'數據庫插入失敗{sql}')
if state != 0:
return False
return True
def file_names():
file_xml_names = []
xml_names = os.listdir(os.path.join(os.getcwd(),'xml'))
for xml_name in xml_names:
if 'duqu-duqu' not in xml_name:
# if 'duqu-duqu' in xml_name:
file_xml_names.append(os.path.join(os.getcwd(),'xml',xml_name))
return file_xml_names
def rename(file_name):
try:
os.rename(file_name,file_name.replace('.xml','duqu-duqu.xml'))
# os.rename(file_name,file_name.replace('duqu-duqu.xml','.xml'))
except Exception as e:
print('修改失敗',e)
if __name__ == '__main__':
while True:
result = file_names()
if not result:
break
for file_path in result:
if readXML(file_path):
rename(file_path)
print('程序結束運行')
python讀取xml文檔插入數據庫並重命名xml進行判斷是否存庫
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.