類似快遞填單子,自動識別姓名手機號等複雜地址文本信息:
注意文本塊之間必須有空格等分隔符,只能多不能少,如果是混成一坨的密密麻麻文本塊,那得上AI加分詞,沒得玩。
作者:MC石頭 @ CSDN
import re
import datetime
import time
import sys
from openpyxl import Workbook
from tkinter import *
# coding:utf-8
#判斷一段文本中是否包含簡體中文
#zhmodel = re.compile(u'[\u4e00-\u9fa5]') #檢查中文
zhmodel = re.compile(u'[^\u4e00-\u9fa5]') #檢查非中文
uu = ["遼寧","吉林","黑龍江","江蘇","浙江","安徽","福建","江西","山東","內蒙","新疆","西藏","臺灣","河南","河北","山西","湖北","湖南","廣東","廣西","海南","四川","貴州","雲南","陝西","甘肅","寧夏","青海","北京","上海","重慶","天津","深圳"]
#檢測是否爲漢字
def is_Chinese(word):
match = zhmodel.search(word)
if match:
return False
else:
return True
#檢測是否爲地址
def is_Address(adstr):
getit = 0
for m in uu:
if adstr.find(m) >= 0:
getit = 1
if getit == 1:
return True
else:
return False
#解析文本里,中文名,手機號,和地址,的函數
def extract_info(list_value):
mobile = str()
temp = str()
temp2 = str()
temp3 = str()
name = address = ''
for i in list_value:
#第一步先提取手機號
if i.isnumeric() and len(i) == 11:
mobile = i
#第二步提取中文姓名
elif (is_Chinese(i)) and ( len(i) == 2 or len(i) == 3):
name = i
#第三步分析是否是地址 MC石頭CSDN
elif is_Address(i):
address = i
#剩下的文本全部揉一起
else:
temp = ' '.join([temp, i])
#print('ss1',temp)
return name, mobile, address, temp
#準備好Excel
wb = Workbook() #創建文件對象
# grab the active worksheet
ws = wb.active # MC石頭CSDN 獲取第一個sheet
#第一行寫時間頭
ws.append(['銷售整理表', '整理於:', time.strftime("%Y年%m月%d日 %H時%M分%S秒",time.localtime())]) #寫入多個單元格
ws.append(['姓名', '手機', '地址'])
a = datetime.date.today()
#準備好剪切板監控
r = Tk()
last_string = r.clipboard_get()
while True:
#監測頻率 MC石頭CSDN
time.sleep(0.3)
string = r.clipboard_get()
#對比內容是否有變
if string != last_string and string != '':
address = string
delivery_address = re.sub('[\s,,]+', ',', address).split(",")
result = extract_info(delivery_address)
ws.append([result[0], result[1], result[2], result[3]])
#保存到日期命名的Excel
wb.save("d:\\工作整理記錄 "+ a.__format__('%Y-%m-%d') +".xlsx")
print('添加excel內容完成:', result)
last_string = string
##address = "18119990001 劉上奇 北京市北京市東城區靜寧路昌運大廈4樓401號"
##delivery_address = re.sub('[\s,,]+', ',', address).split(",")
##result = extract_info(delivery_address)
##print(result)
### Data can be assigned directly to cells
##ws['A1'] = result[0] #寫入數字
##ws['B1'] = result[1]
##ws['C1'] = result[2]
##ws['E1'] = "你好"+"automation test" #寫入中文(unicode中文也可)
### Python types will automatically be converted
##import datetime
##import time
##ws['A2'] = datetime.datetime.now() #寫入一個當前時間
###寫入一個自定義的時間格式
##ws['A3'] =time.strftime("%Y年%m月%d日 %H時%M分%S秒",time.localtime())
作者:MC石頭 @ CSDN