python_爬蟲_百度地圖遷徙_遷入地來源_遷出目的地

百度地圖遷徙鏈接爲 :https://qianxi.baidu.com/
建議儘早爬取數據,以後可能會關閉

python爬蟲的代碼

import os
import random
import time
from urllib import request
import re

import requests
import xlwt

from utils.read_write import readTXT, writeOneJSON
os.chdir(r'D:\data\人口數據\百度遷徙大數據\全國城市省份市內流入流出\json')

def set_style(name, height, bold=False):
    style = xlwt.XFStyle()  # 初始化樣式
    font = xlwt.Font()  # 爲樣式創建字體
    font.name = name  # 'Times New Roman'
    font.bold = bold
    font.color_index = 4
    font.height = height
    style.font = font
    return style

f = xlwt.Workbook()
sheet2 = f.add_sheet(u'sheet2', cell_overwrite_ok=True)  # 創建sheet2
row0 = [u'遷入城市',u'所在城市',u'佔比',u'遷出城市',u'所在城市',u'佔比',u'遷入省份',u'所在城市',u'佔比',u'遷出省份',u'所在城市',u'佔比']
 # 生成第一行
for i in range(0, len(row0)):
    sheet2.write(0, i, row0[i], set_style('Times New Roman', 200, True))

headers = {"User-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 "
                        "(KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"}
opener = request.build_opener()
opener.add_headers = [headers]
request.install_opener(opener)


default = set_style('Times New Roman', 220)
date_list = []
lines = readTXT('D:\project\jianguiyuan\data\BaiduMap_cityCode_1102.txt')


for riqi in range(20200103, 20200132):
    date_list.append(str(riqi))
for riqi in range(20200201, 20200226):
    date_list.append(str(riqi))
for riqi in date_list:
    print(riqi)
    for i in range(1, 389):
        print(i)
        obj = lines[i].split(',')
        print(obj[0])
        print(obj[1])
        firsturl = "http://huiyan.baidu.com/migration/cityrank.jsonp?dt=country&id=" + obj[0] + "&type=move_in&date=" + riqi + "&callback=jsonp"
        randint_data = random.randint(3, 6)
        time.sleep(randint_data)
        data = request.urlopen(firsturl).read().decode("utf-8")
        data = data.encode("utf-8").decode("unicode_escape")
        writeOneJSON(data, "城市遷入_" +obj[1] + "_" +riqi + ".json")
        # 對Unicode編碼進行改造
        pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'
        pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'
        result = re.compile(pat).findall(str(data))
        result1 = re.compile(pat1).findall(str(data))
        column0 = result
        column1 = result1
        column2 = obj[1]
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * i + 1, 0, column0[i1], default)
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * i + 1, 1, column2, default)
        for i1 in range(0, len(column1)):
            sheet2.write(i1 + len(column0) * i + 1, 2, column1[i1], default)

        firsturl = "http://huiyan.baidu.com/migration/cityrank.jsonp?dt=country&" \
                   "id="+obj[0]+"&type=move_out&date="+ riqi+"&callback=jsonp"
        randint_data = random.randint(3, 7)
        time.sleep(randint_data)
        data2 = request.urlopen(firsturl).read().decode("utf-8")
        data2 = data2.encode("utf-8").decode("unicode_escape")  #
        writeOneJSON(data2, "城市遷出_" + obj[1] + "_" + riqi + ".json")
        #對Unicode編碼進行改造
        pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'
        pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'
        result2 = re.compile(pat).findall(str(data2))
        result12 = re.compile(pat1).findall(str(data2))
        column0 = result2
        column1 = result12
        column2 = obj[1]
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * i + 1, 3, column0[i1], default)
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * i + 1, 4, column2, default)
        for i1 in range(0, len(column1)):
            sheet2.write(i1 + len(column0) * i + 1, 5, column1[i1], default)

        firsturl = "http://huiyan.baidu.com/migration/provincerank.jsonp?dt=country&id=" +obj[0] + "&type=move_in&date=" +  riqi + "&callback=jsonp"
        randint_data = random.randint(3, 8)
        time.sleep(randint_data)
        data = request.urlopen(firsturl).read().decode("utf-8")
        data = data.encode("utf-8").decode("unicode_escape")
        writeOneJSON(data, "省份遷入_" + obj[1] + "_" + riqi + ".json")
        # 對Unicode編碼進行改造
        pat = '{"province_name":(.*?),"value":.*?}'
        pat1 = '{"province_name":".*?","value":(.*?)}'
        result = re.compile(pat).findall(str(data))
        result1 = re.compile(pat1).findall(str(data))
        column0 = result
        column1 = result1
        column2 = obj[1]
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * i + 1, 6, column0[i1], default)
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * i + 1, 7, column2, default)
        for i1 in range(0, len(column1)):
            sheet2.write(i1 + len(column0) * i + 1, 8, column1[i1], default)

        firsturl = "http://huiyan.baidu.com/migration/provincerank.jsonp?dt=country&" \
                   "id="+obj[0]+"&type=move_out&date="+ riqi+"&callback=jsonp"
        randint_data = random.randint(3, 6)
        time.sleep(randint_data)
        data2 = request.urlopen(firsturl).read().decode("utf-8")
        data2 = data2.encode("utf-8").decode("unicode_escape")  #
        writeOneJSON(data2, "省份遷出_" + obj[1] + "_" + riqi + ".json")
        #對Unicode編碼進行改造
        pat = '{"province_name":(.*?),"value":.*?}'
        pat1 = '{"province_name":".*?","value":(.*?)}'
        result2 = re.compile(pat).findall(str(data2))
        result12 = re.compile(pat1).findall(str(data2))
        column0 = result2
        column1 = result12
        column2 = obj[1]
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * i + 1, 9, column0[i1], default)
        for i1 in range(0, len(column0)):
            sheet2.write(i1 + len(column0) * i + 1, 10, column2, default)
        for i1 in range(0, len(column1)):
            sheet2.write(i1 + len(column0) * i + 1, 11, column1[i1], default)

    print("大吉大利,今晚喫雞啊!")
    filename = 'D:\data\人口數據\百度遷徙大數據\全國城市省份市內流入流出\\'+riqi+'.xls'
    f.save(filename)

其中涉及到的文件下載請點擊
我的資源
read_write.py文件鏈接:
python讀寫文件
百度城市編碼文件:
城市編碼
數據樣例
如需全國所有城市的城內出行強度數據、遷入地來源和遷出目的地數據
請私聊我獲取聯繫方式

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章