2020 最新 Python3.8 爬國家統計局區域、省、市、區,街道鄉鎮代碼


import urllib.request
import time
from bs4 import BeautifulSoup
indexs = 'index.html'
url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/'
txt = urllib.request.urlopen(url + indexs).read().decode('gbk')
soup = BeautifulSoup(txt, 'html.parser')
lista = soup.find_all('a')
lista.pop()
for a in lista:
    print("========" +a['href'][0:2] + "," + a.text + "========" )
    time.sleep(1)
    txt = urllib.request.urlopen(url + a['href'],timeout=5000).read().decode('gbk')
    soup = BeautifulSoup(txt, 'html.parser')
    listb = soup.find_all('a')
    listb.pop()
    bb = {}
    l = len(listb)
    #print("----->>>>> "+str(l/2)+" <<<<<<------")
    strName = ''
    for i in range(0,l-1):
        if(listb[i].text == strName) :
            continue
        strIndex = listb[i]['href']
        code = listb[i].text
        strName = name = listb[i+1].text
        print(strIndex+","+code +"," + name)
        time.sleep(1)
        ctxt = urllib.request.urlopen(url + strIndex,timeout=5000).read().decode('gbk')
        soup = BeautifulSoup(ctxt, 'html.parser')
        listc = soup.find_all('a')
        listc.pop()
        lc = len(listc)
        print("----->>>>> "+str(lc/2)+" <<<<<<------")
        cstrName = ''
        for c in range(0,lc-1):
            if(listc[c].text == cstrName) :
                continue
            strIndex = listc[c]['href']
            code = listc[c].text
            cstrName = name = listc[c+1].text
            print("   ["+code +"," + name+"]")






 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章