python讀取xml文檔插入數據庫並重命名xml進行判斷是否存庫

from xml.dom.minidom import parse
import xml.etree.ElementTree as ET
import re,os,datetime
from loguru import logger
import pymysql
conn = pymysql.connect(host='localhost',
                       user='root',
                       password='',
                       db='....',
                       charset='utf8')
cursor = conn.cursor()
logger.add(".\\log\\xml{}.log".format(datetime.date.today()), rotation="00:00")
datetime.date.today()
def readXML(file_name_path):
	print(file_name_path)
	domTree = parse(file_name_path)
	# 文檔根元素
	rootNode = domTree.documentElement
	state = 0
	# 所有顧客
	customers = rootNode.getElementsByTagName("PubmedArticle")
	for customer in customers:
		if customer:
			try:
				# pmid 元素
				name = customer.getElementsByTagName("PMID")[0]
				pmid = name.childNodes[0].data
			except:
				pmid = ''
			try:
				# doi 元素
				phone = customer.getElementsByTagName("ELocationID")[0]
				doi = phone.childNodes[0].data
			except:
				try:
					# doi 元素
					name_doi = customer.getElementsByTagName("ArticleId")[1]
					doi = name_doi.childNodes[0].data
					if '10.' != doi[:3]:
						name_doi = customer.getElementsByTagName("ArticleId")[2]
						doi = name_doi.childNodes[0].data
				except:
					doi = ''
			if doi:
				print(f"文件路徑{file_name_path},pmid:{pmid},doi:{doi}")
				# sql = """insert into `pmid_doi` (path,pmid,doi) values ('{}','{}','{}')""".format(file_name_path,pmid,doi)
				sql = """update `pmid_doi` set path='{}',doi='{}' where pmid='{}'""".format(file_name_path.replace('\\','\\\\').replace('duqu-duqu',''),doi,pmid)
				print(sql)
				try:
					if pmid:
						cursor.execute(sql)
						conn.commit()
						logger.warning(f'數據庫修改成功{sql.replace("duqu-duqu","")}')
					else:
						state += 1
				except Exception as e:
					print('存庫失敗',e)
					logger.warning(f'數據庫插入失敗{sql}')
	if state != 0:
		return False
	return True

def file_names():
	file_xml_names = []
	xml_names = os.listdir(os.path.join(os.getcwd(),'xml'))
	for xml_name in xml_names:
		if 'duqu-duqu' not in xml_name:
		# if 'duqu-duqu' in xml_name:
			file_xml_names.append(os.path.join(os.getcwd(),'xml',xml_name))
	return file_xml_names

def rename(file_name):
	try:
		os.rename(file_name,file_name.replace('.xml','duqu-duqu.xml'))
		# os.rename(file_name,file_name.replace('duqu-duqu.xml','.xml'))
	except Exception as e:
		print('修改失敗',e)



if __name__ == '__main__':
	while True:
		result = file_names()
		if not result:
			break
		for file_path in result:
			if readXML(file_path):
				rename(file_path)
	print('程序結束運行')
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章