import pandas as pd
import requests
import urllib.parse
#調用接口示例 'https://restapi.amap.com/v3/geocode/geo?address=北京市朝陽區阜通東大街6號&output=XML&key=<用戶的key>'
#結構化地址信息:國家、省份、城市、區縣、城鎮、鄉村、街道、門牌號碼、屋邨、大廈。
#如果需要解析多個地址的話,請用"|"進行間隔,並且將 batch 參數設置爲 true,最多支持 10 個地址進進行"|"分割形式的請求。
#中文名 adcode citycode 深圳市 440300 0755;龍崗區 440307 0755
def get_addresses(filename):
df=pd.read_csv(filename,header=None)
addresses=[]
adds=list(df[3].drop_duplicates().values)
a=len(adds)//10
b=len(adds)%10
for i in range(a):
address='|'.join(adds[i*10:i*10+10])
addresses.append(address)
#print(address)
if b>0:
address='|'.join(adds[-b:])
addresses.append(address)
#print(address)
return addresses
def search_store(address,filename):
global writer
try:
address_list=address.split('|')
writer=open(filename,'a',encoding='utf-8')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
}
url='https://restapi.amap.com/v3/geocode/geo?address={}&key=86e677ba7863ae87c9b4a7be8fa0199f&batch=True'.format(urllib.parse.quote(address))
r=requests.get(url,headers=headers)
dic=r.json()
if dic['status']=='1':
geocodes=dic['geocodes']
i=0
for geocode in geocodes:
formatted_address=geocode['formatted_address']
if formatted_address==[]:
formatted_address=''
province=geocode['province']
if province==[]:
province=''
city=geocode['city']
if city==[]:
city=''
district=geocode['district']
if district==[]:
district=''
street=geocode['street']
if street==[]:
street=''
number=geocode['number']
if number==[]:
number=''
location=geocode['location']
if location==[]:
location=''
level=geocode['level']
if level==[]:
level=''
#print(formatted_address)
#print(address_list[i]+'\t'+formatted_address+'\t'+province+'\t'+city+'\t'+district+'\t'+street+'\t'+number+'\t'+location+'\t'+level)
writer.write(address_list[i]+'\t'+formatted_address+'\t'+province+'\t'+city+'\t'+district+'\t'+street+'\t'+number+'\t'+location+'\t'+level+'\n')
i+=1
except Exception as e:
print(e)
finally:
writer.close()
if __name__ == '__main__':
f1=r'C:\Users\szu\Desktop\企業\.txt'
f2=r'C:\Users\szu\Desktop\poi\.txt'
addresses=get_addresses(f1)
count=0
for address in addresses:
search_store(address,f2)
count+=10
if count%20==0:
print(count)
如需幫忙抓取數據,請私聊我