python_將爬取的的百度地圖遷徙json數據寫成csv矩陣

爬取百度地圖遷徙數據的方法請參考這篇文章:
python_爬蟲_百度地圖遷徙_遷入地來源_遷出目的地

將json數據處理成excel請參考這篇文章:
python_將爬取的百度地圖遷徙json數據寫入到excel

原始數據格式:

“jsonp_1584195671576_1286958({“errno”:0,“errmsg”:“SUCCESS”,“data”:{“list”:[{“province_name”:“山東省”,“value”:42.64},{“province_name”:“河南省”,“value”:24.15},

{“province_name”:“青海省”,“value”:0.02},{“province_name”:“新疆維吾爾自治區”,“value”:0.01}]}})”

處理成功的數據格式,矩陣的格式可用於機器學習研究
矩陣數據參考格式

import os
import re

from utils.read_write import readTXT, writeOneJSON, eachFile, writeOneCSV

os.chdir(r'D:\data\百度遷徙大數據\最新城市省份流入流出數據\json')

# 把txt文件讀取成字符串數組
lines = readTXT('D:\project\jianguiyuan\data\BaiduMap_cityCode_1102.txt')
title = [0]
for i in range(1, 327):
    obj = lines[i].split(',')
    title.append(obj[1])

def writeTitle(riqi):
    writeOneCSV(title,dir+'各城市遷入矩陣'+ "_" + riqi +'.csv')
    writeOneCSV(title,dir+'各城市遷出矩陣'+ "_" + riqi +'.csv')
    # writeOneCSV(title,dir+'各省份遷入矩陣'+ "_" + riqi +'.csv')
    # writeOneCSV(title,dir+'各省份遷出矩陣'+ "_" + riqi +'.csv')

# 先將數據下載爲json文件
def city_range(n,riqi):

    shengqianru = []
    shengqianchu = []
    titles = title
    for i in range(n, 327):
        qianru = []
        qianchu = []
        # print(i)
        # 把城市id號和城市名分開
        obj = lines[i].split(',')
        # print(obj[1])
        fileline = readTXT("城市遷入_" + obj[1] + "_" + riqi + ".json")
        ner = fileline[0].replace('\\','')
        pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'
        pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'
        city_name = re.compile(pat).findall(ner)
        value = re.compile(pat1).findall(ner)
        qianru.append(obj[1])
        combine = []
        # 獲取每一列對應的索引
        for name in city_name:
            for k in range(1, len(title)):
                if title[k] == name:
                    combine.append(title.index(name))
        # 獲取數組索引所對應的值
        for m in range(1,327):
            if m in combine:
                col_value = value[combine.index(m)]
                qianru.append(float(col_value))
            else:
                qianru.append(0)

        fileline = readTXT("城市遷出_" + obj[1] + "_" + riqi + ".json")
        fileline[0] = fileline[0].replace('\\', '')
        pat = '{"city_name":"(.*?)","province_name":".*?","value":.*?}'
        pat1 = '{"city_name":".*?","province_name":".*?","value":(.*?)}'
        result2 = re.compile(pat).findall(fileline[0])
        result12 = re.compile(pat1).findall(fileline[0])
        qianchu.append(obj[1])
        combine = []
        for name in result2:
            for k in range(1, len(title)):
                if title[k] == name:
                    combine.append(title.index(name))
        for m in range(1,327):
            if m in combine:
                col_value = result12[combine.index(m)]
                qianchu.append(float(col_value))
            else:
                qianchu.append(0)

        writeOneCSV(qianru, dir + '各城市遷入矩陣' + "_" + riqi + '.csv')
        writeOneCSV(qianchu, dir + '各城市遷出矩陣' + "_" + riqi + '.csv')


def date_change(date):
    date_list=[]
    # 注意這個日期,一個月只有31天,爬取2月份的數據需要重新改
    for riqi in range(date, 20200131):
        date_list.append(str(riqi))
    for riqi in range(20200201, 20200230):
        date_list.append(str(riqi))
    for riqi in range(20200301, 20200328):
        date_list.append(str(riqi))
    for riqi in date_list:
        print(riqi)
        writeTitle(riqi)
        city_range(1,riqi)
        print("大吉大利,今晚喫雞啊!")


if __name__ == '__main__':
    dir = 'D:\data\人口數據\百度遷徙大數據\最新城市省份流入流出數據\矩陣\\'
    date_change(20200101)

其中的參考文件請移步到我的下載
我的下載

如需幫忙處理數據和爬取數據請私聊我。。。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章