python_爬虫_百度地图迁徙的总体思路:
1、获取百度地图的城市编号和城市名对应关系
2、获取你想抓取的日期
3、使用request发送请求
4、将其写入到json文件中
遗憾的是百度地图只有每个城市迁入迁出占比,没有具体人数
import os
from datetime import datetime
from urllib import request
from utils.read_write import readTXT, writeOneJSON
from utils.time_change import getBetweenDay
os.chdir(r'D:\data\据\json_328_0602\\')
row0 = [u'迁入城市',u'所在城市',u'占比',u'迁出城市',u'所在城市',u'占比',u'迁入省份',u'所在城市',u'占比',u'迁出省份',u'所在城市',u'占比']
# 把txt文件读取成字符串数组
lines = readTXT('D:\project\jianguiyuan\data\BaiduMap_cityCode_1102.txt')
# 发送请求
def requerts_url(url,i,riqi):
try:
response = request.urlopen(url).read()
return response
except:
print(datetime.now())
print(i)
print(url)
city_range(i,riqi)
date_change(int(riqi)+1)
# 先将数据下载为json文件
def city_range(n,riqi):
for i in range(n, 327):
print(i)
# 把城市id号和城市名分开
obj = lines[i].split(',')
print(obj[1])
firsturl = "http://huiyan.baidu.com/migration/cityrank.jsonp?dt=city&id=" + obj[
0] + "&type=move_in&date=" + riqi + "&callback=jsonp_1584195671576_1286958"
data = requerts_url(firsturl,i,riqi)
# 将数据解码至中文
data = data.encode("utf-8")
# 写json文件
writeOneJSON(data, "城市迁入_" + obj[1] + "_" + riqi + ".json")
firsturl = "http://huiyan.baidu.com/migration/cityrank.jsonp?dt=city&" \
"id=" + obj[0] + "&type=move_out&date=" + riqi + "&callback=jsonp_1584195671576_1286958"
data2 = requerts_url(firsturl,i,riqi)
data2 = data2.encode("utf-8")
writeOneJSON(data2, "城市迁出_" + obj[1] + "_" + riqi + ".json")
firsturl = "http://huiyan.baidu.com/migration/provincerank.jsonp?dt=city&id=" + obj[
0] + "&type=move_in&date=" + riqi + "&callback=jsonp_1584195671576_1286958"
data = requerts_url(firsturl,i,riqi)
data = data.encode("utf-8").decode("unicode_escape")
writeOneJSON(data, "省份迁入_" + obj[1] + "_" + riqi + ".json")
firsturl = "http://huiyan.baidu.com/migration/provincerank.jsonp?dt=city&" \
"id=" + obj[0] + "&type=move_out&date=" + riqi + "&callback=jsonp_1584195671576_1286958"
data2 = requerts_url(firsturl,i,riqi)
data2 = data2.encode("utf-8").decode("unicode_escape") #
writeOneJSON(data2, "省份迁出_" + obj[1] + "_" + riqi + ".json")
def date_change(date):
date_list= getBetweenDay(date)
for riqi in date_list:
riqi = riqi.replace('-','')
print(riqi)
city_range(1,riqi)
print("大吉大利,今晚吃鸡啊!")
if __name__ == '__main__':
date_change('2020-04-01')
我已经有从今年1月到6月的所有数据,还有矩阵形式的,如需数据请私聊我。。。
遗憾的是百度地图只有每个城市迁入迁出占比,没有具体人数
如需帮忙爬取全国省份城市迁移的具体人数,请私聊我,我有办法。。。