python_爬蟲爬取高德地理編碼

import pandas as pd
import requests
import urllib.parse

#調用接口示例 'https://restapi.amap.com/v3/geocode/geo?address=北京市朝陽區阜通東大街6號&output=XML&key=<用戶的key>'
#結構化地址信息:國家、省份、城市、區縣、城鎮、鄉村、街道、門牌號碼、屋邨、大廈。
#如果需要解析多個地址的話,請用"|"進行間隔,並且將 batch 參數設置爲 true,最多支持 10 個地址進進行"|"分割形式的請求。
#中文名	adcode citycode    深圳市 440300 0755;龍崗區 440307 0755


def get_addresses(filename):
    df=pd.read_csv(filename,header=None)
    addresses=[]
    adds=list(df[3].drop_duplicates().values)
    a=len(adds)//10
    b=len(adds)%10
    for i in range(a):
        address='|'.join(adds[i*10:i*10+10])
        addresses.append(address)
        #print(address)
    if b>0:
        address='|'.join(adds[-b:])
        addresses.append(address)
        #print(address)
    return addresses
        
def search_store(address,filename):
    global writer
    try:
        address_list=address.split('|')
        writer=open(filename,'a',encoding='utf-8')
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
            }
        url='https://restapi.amap.com/v3/geocode/geo?address={}&key=86e677ba7863ae87c9b4a7be8fa0199f&batch=True'.format(urllib.parse.quote(address))
        r=requests.get(url,headers=headers)
        dic=r.json()
        if dic['status']=='1':    
            geocodes=dic['geocodes']
            i=0
            for geocode in geocodes:
                formatted_address=geocode['formatted_address']
                if formatted_address==[]:
                    formatted_address=''
                province=geocode['province']
                if province==[]:
                    province=''
                city=geocode['city']
                if city==[]:
                    city=''
                district=geocode['district']
                if district==[]:
                    district=''
                street=geocode['street']
                if street==[]:
                    street=''
                number=geocode['number']
                if number==[]:
                    number=''
                location=geocode['location']
                if location==[]:
                    location=''
                level=geocode['level']
                if level==[]:
                    level=''
                #print(formatted_address)
                #print(address_list[i]+'\t'+formatted_address+'\t'+province+'\t'+city+'\t'+district+'\t'+street+'\t'+number+'\t'+location+'\t'+level)        
                writer.write(address_list[i]+'\t'+formatted_address+'\t'+province+'\t'+city+'\t'+district+'\t'+street+'\t'+number+'\t'+location+'\t'+level+'\n')
                i+=1
    except Exception as e:
            print(e)
    finally:
        writer.close()
    
if __name__ == '__main__':
    f1=r'C:\Users\szu\Desktop\企業\.txt'    
    f2=r'C:\Users\szu\Desktop\poi\.txt'
    addresses=get_addresses(f1)
    count=0    
    for address in addresses:
        search_store(address,f2)
        count+=10
        if count%20==0:
            print(count)

如需幫忙抓取數據,請私聊我

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章