python自動識別地址從入門到放棄到砸鍵盤

類似快遞填單子,自動識別姓名手機號等複雜地址文本信息:

注意文本塊之間必須有空格等分隔符,只能多不能少,如果是混成一坨的密密麻麻文本塊,那得上AI加分詞,沒得玩。

作者:MC石頭 @ CSDN

import re
import datetime
import time
import sys
from openpyxl import Workbook
from tkinter import *

# coding:utf-8

#判斷一段文本中是否包含簡體中文
#zhmodel = re.compile(u'[\u4e00-\u9fa5]')  #檢查中文
zhmodel = re.compile(u'[^\u4e00-\u9fa5]')  #檢查非中文
uu = ["遼寧","吉林","黑龍江","江蘇","浙江","安徽","福建","江西","山東","內蒙","新疆","西藏","臺灣","河南","河北","山西","湖北","湖南","廣東","廣西","海南","四川","貴州","雲南","陝西","甘肅","寧夏","青海","北京","上海","重慶","天津","深圳"]


#檢測是否爲漢字
def is_Chinese(word):
    match = zhmodel.search(word)
    if match:
        return False
    else:
        return True

#檢測是否爲地址
def is_Address(adstr):
    getit = 0
    for m in uu:
        if adstr.find(m) >= 0:
            getit = 1

            
    if getit == 1:
        return True
    else:
        return False
        
    

#解析文本里,中文名,手機號,和地址,的函數
def extract_info(list_value):
    mobile = str()
    temp = str()
    temp2 = str()
    temp3 = str()
    name = address = ''
    for i in list_value:
            #第一步先提取手機號
        if i.isnumeric() and len(i) == 11:
            mobile = i
            #第二步提取中文姓名
        elif (is_Chinese(i)) and ( len(i) == 2  or len(i) == 3):
            name = i
            #第三步分析是否是地址 MC石頭CSDN
        elif is_Address(i):
            address = i
            #剩下的文本全部揉一起
        else:
            temp = ' '.join([temp, i])
    #print('ss1',temp)
    return name, mobile, address, temp

#準備好Excel
wb = Workbook()    #創建文件對象
# grab the active worksheet
ws = wb.active     # MC石頭CSDN 獲取第一個sheet 
#第一行寫時間頭
ws.append(['銷售整理表', '整理於:', time.strftime("%Y年%m月%d日 %H時%M分%S秒",time.localtime())])    #寫入多個單元格
ws.append(['姓名', '手機', '地址'])
a = datetime.date.today()


#準備好剪切板監控  
r = Tk()
last_string = r.clipboard_get()

while True:
    #監測頻率  MC石頭CSDN
    time.sleep(0.3)
    string = r.clipboard_get()
    #對比內容是否有變
    if string != last_string and string != '':
        address = string
        delivery_address = re.sub('[\s,,]+', ',', address).split(",")
        result = extract_info(delivery_address)

        ws.append([result[0], result[1], result[2], result[3]])

        #保存到日期命名的Excel
        wb.save("d:\\工作整理記錄 "+ a.__format__('%Y-%m-%d') +".xlsx")
        print('添加excel內容完成:', result)
                
        last_string = string




##address = "18119990001  劉上奇     北京市北京市東城區靜寧路昌運大廈4樓401號"
##delivery_address = re.sub('[\s,,]+', ',', address).split(",")
##result = extract_info(delivery_address)
##print(result)




### Data can be assigned directly to cells
##ws['A1'] = result[0]      #寫入數字
##ws['B1'] = result[1]
##ws['C1'] = result[2]
##ws['E1'] = "你好"+"automation test" #寫入中文(unicode中文也可)

### Python types will automatically be converted
##import datetime
##import time
##ws['A2'] = datetime.datetime.now()    #寫入一個當前時間
###寫入一個自定義的時間格式
##ws['A3'] =time.strftime("%Y年%m月%d日 %H時%M分%S秒",time.localtime())







 

作者:MC石頭 @ CSDN

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章