ofo數據獲取&座標轉換&生成shp數據

隨着共享單車的不斷增加以及其重要性,很多人開始通過共享單車大數據對城市生活進行研究與分析。前段時間做ofo數據分析時一直被數據所困擾,通過學習與借鑑其他學者的研究,利用python對ofo單車數據進行爬取。

相對於其他爬取程序,該程序有以下特點:
1. 本程序利用python2.7進行編寫,考慮到arcpy模塊使用版本問題;
2. 由於爬取的是高德地圖上面的ofo定位數據,所以又將火星座標系下的定位數據轉換爲WGS84座標系下的定位數據;
3. 同時將轉換後的數據導出爲shp點數據,方便操作與研究。

首先呢,該ofo定位數據爬取程序是通過登錄ofo網頁爬取數據的,登錄地址:https://common.ofo.so/newdist/?Journey。進行爬取前必須獲取認證信息token,獲取方式如下圖所示:
獲取token
在下面的程序代碼中用的是我的token,爲了長久使用,大家可以自己登錄獲取token。

同時該程序是通過定義矩形區域進行爬取的,所以要事先查詢要爬取區域的左上角經緯度與右下角經緯度,調用start函數時填寫該數據。

詳細代碼如下:

#!coding=utf-8
from __future__ import division
import requests
import datetime
import threading
import json
import os
import pandas as pd
import numpy as np
import time
import sqlite3
import math
import arcpy
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from requests_toolbelt.multipart.encoder import MultipartEncoder
from concurrent.futures import ThreadPoolExecutor
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class Crawler:
    def __init__(self):
        self.start_time = datetime.datetime.now()
        self.db_name = "file:database?mode=memory&cache=shared"
        self.save_path = "./data/" + datetime.datetime.now().strftime("%Y%m%d")
        self.file_name = datetime.datetime.now().strftime("%Y%m%d_%H%M") + "_ofo"
        self.lock = threading.Lock()
        self.total = 0
        self.done = 0
        self.bikes_count = 0
        self.x_pi = 3.14159265358979324 * 3000.0 / 180.0
        self.pi = 3.1415926535897932384626  # π
        self.a = 6378245.0  # 長半軸
        self.ee = 0.00669342162296594323  # 偏心率平方
        self.message = os.path.isdir(self.save_path)

    def get_nearby_bikes(self, args):
        try:
            url = "https://san.ofo.so/ofo/Api/nearbyofoCar"

            headers = {
                'charset': "utf-8",
                'Accept': '*/*',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh-CN',
                'Content-Length': '516',
                'Content-Type': 'multipart/form-data; boundary=----ofo-boundary-MC4wOTk1ODUy',
                'Host': 'san.ofo.so',
                'Origin': 'https://common.ofo.so',
                'Referer': 'https://common.ofo.so/newdist/',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393'
            }

            self.request(headers, args, url)
        except Exception as ex:
            print(ex)

    def request(self, headers, args, url):
        multipart_encoder = MultipartEncoder(
            fields={
                "token": "e923c290-0d27-11e7-b9d2-b5e857d3318f",
                "source": "0",
                "source-version": "9999",
                # "lat": "36.103235",
                "lat": str(args[0]),
                # "lng":"103.709681"
                "lng": str(args[1])
                # file爲路徑
            },
            boundary='----ofo-boundary-MC4wOTk1ODUy'
        )
        response = requests.request(
            "POST", url, headers=headers,
            timeout=30,
            verify=False,
            data=multipart_encoder
        )

        with self.lock:
            with self.connect_db() as c:
                try:
                    decoded = json.loads(response.text)['values']['info']['cars']
                    self.done += 1

                    for x in decoded:
                        self.bikes_count += 1
                        at = self.gcj02_to_wgs84_lat(x['lng'], x['lat'])#將火星座標轉換爲WGS84--緯度
                        on = self.gcj02_to_wgs84_lng(x['lng'], x['lat'])#將火星座標轉換爲WGS84--經度
                        c.execute("INSERT OR IGNORE INTO ofo VALUES (%d,'%s',%f,%f)" % (
                            int(time.time()) * 1000, x['carno'], at, on))

                    timespent = datetime.datetime.now() - self.start_time
                    percent = self.done / self.total
                    total = timespent.total_seconds() / percent
                    print("位置 %s, 未去重單車數量 %s, 進度 %0.2f%%, 速度 %0.2f個/分鐘, 總時間 %s, 剩餘時間 %s" % (
                        args, self.bikes_count, percent * 100, self.done / timespent.total_seconds() * 60, total,
                        total - timespent.total_seconds()))
                except Exception as ex:
                    print(ex)

    def connect_db(self):
        return sqlite3.connect(self.db_name)

    def generate_create_table_sql(self, brand):
        return '''CREATE TABLE {0}
                (
                    "bikeId" VARCHAR(12),
                    lat DOUBLE,
                    lng DOUBLE,
                    CONSTRAINT "{0}_bikeId_lat_lon_pk"
                        PRIMARY KEY (bikeId, lat, lng)
                );'''.format(brand)

    #創建shp點數據
    def CreateFeaturclass(self, savepath, featurename, spatial):
        if arcpy.Exists(savepath + '\\' + featurename + '.shp') == False:
            arcpy.CreateFeatureclass_management(savepath, featurename, 'POINT', '', '', '', spatial)
        else:
            pass

    #添加字段
    def AddField(self, savepath, featurename):
        arcpy.AddField_management(savepath + '\\' + featurename + '.shp', 'bikeid', 'TEXT')
        arcpy.AddField_management(savepath + '\\' + featurename + '.shp', 'lon', 'TEXT')
        arcpy.AddField_management(savepath + '\\' + featurename + '.shp', 'lat', 'TEXT')

    #遍歷點並添加字段值
    def InsertRow(self, savepath, featurename, data):
        Insercur = arcpy.InsertCursor(savepath + '\\' + featurename + '.shp')

        for value in range(1,len(data)+1):
            point = arcpy.Point()
            newrow = Insercur.newRow()
            point.X = float(data.head(value)['lng'][value-1])
            point.Y = float(data.head(value)['lat'][value-1])
            newrow.setValue('Id', value)
            newrow.setValue('bikeid',data.head(value)['bikeId'][value-1])
            newrow.setValue('lon',data.head(value)['lng'][value-1])
            newrow.setValue('lat',data.head(value)['lat'][value-1])
            pointGeo = arcpy.PointGeometry(point)
            newrow.shape = pointGeo
            Insercur.insertRow(newrow)

    def gcj02_to_wgs84_lng(self, lng1, lat1):
        if self.out_of_china(lng1, lat1):  # 判斷是否在國內
            return lng1, lat1
        dlng = self.transformlng(lng1 - 105.0, lat1 - 35.0)
        radlat = lat1 / 180.0 * self.pi
        magic = math.sin(radlat)
        magic = 1 - self.ee * magic * magic
        sqrtmagic = math.sqrt(magic)
        dlng = (dlng * 180.0) / (self.a / sqrtmagic * math.cos(radlat) * self.pi)
        mglng = lng1 + dlng
        return lng1 * 2 - mglng

    def gcj02_to_wgs84_lat(self, lng1, lat1):
        if self.out_of_china(lng1, lat1):  # 判斷是否在國內
            return lng1, lat1
        dlat = self.transformlat(lng1 - 105.0, lat1 - 35.0)
        radlat = lat1 / 180.0 * self.pi
        magic = math.sin(radlat)
        magic = 1 - self.ee * magic * magic
        sqrtmagic = math.sqrt(magic)
        dlat = (dlat * 180.0) / ((self.a * (1 - self.ee)) / (magic * sqrtmagic) * self.pi)
        mglat = lat1 + dlat
        return lat1 * 2 - mglat

    def transformlat(self, lng1, lat1):
        ret = -100.0 + 2.0 * lng1 + 3.0 * lat1 + 0.2 * lat1 * lat1 + \
              0.1 * lng1 * lat1 + 0.2 * math.sqrt(math.fabs(lng1))
        ret += (20.0 * math.sin(6.0 * lng1 * self.pi) + 20.0 *
                math.sin(2.0 * lng1 * self.pi)) * 2.0 / 3.0
        ret += (20.0 * math.sin(lat1 * self.pi) + 40.0 *
                math.sin(lat1 / 3.0 * self.pi)) * 2.0 / 3.0
        ret += (160.0 * math.sin(lat1 / 12.0 * self.pi) + 320 *
                math.sin(lat1 * self.pi / 30.0)) * 2.0 / 3.0
        return ret

    def transformlng(self, lng1, lat1):
        ret = 300.0 + lng1 + 2.0 * lat1 + 0.1 * lng1 * lng1 + \
              0.1 * lng1 * lat1 + 0.1 * math.sqrt(math.fabs(lng1))
        ret += (20.0 * math.sin(6.0 * lng1 * self.pi) + 20.0 *
                math.sin(2.0 * lng1 * self.pi)) * 2.0 / 3.0
        ret += (20.0 * math.sin(lng1 * self.pi) + 40.0 *
                math.sin(lng1 / 3.0 * self.pi)) * 2.0 / 3.0
        ret += (150.0 * math.sin(lng1 / 12.0 * self.pi) + 300.0 *
                math.sin(lng1 / 30.0 * self.pi)) * 2.0 / 3.0
        return ret

    def out_of_china(self, lng1, lat1):
        return not (lng1 > 73.66 and lng1 < 135.05 and lat1 > 3.86 and lat1 < 53.55)

    def group_data(self):
        print("正在導出數據")
        conn = self.connect_db()
        self.export_to_shp(conn, "ofo")

    def export_to_shp(self, conn, brand):
        spRef = arcpy.SpatialReference(4326)
        df = pd.read_sql_query("SELECT * FROM %s" % brand, conn, parse_dates=True)
        self.CreateFeaturclass(self.save_path, self.file_name, spRef)
        self.AddField(self.save_path, self.file_name)
        self.InsertRow(self.save_path, self.file_name, df)

        print(brand)
        print ("去重後數量")
        print (len(df))

    def start(self, top_lng, top_lat, bottom_lng, bottom_lat):
        while True:
            self.__init__()
            if self.message == False:
                os.makedirs(self.save_path)#創建路徑

            try:
                with self.connect_db() as c:
                    c.execute(self.generate_create_table_sql('ofo'))
            except Exception as ex:
                print(ex)
                pass

            executor = ThreadPoolExecutor(max_workers=100)
            print("Start")

            self.total = 0
            offset = 0.002

            lat_range = np.arange(float(top_lat), float(bottom_lat), -offset)
            for lat in lat_range:
                lng_range = np.arange(float(top_lng), float(bottom_lng), offset)
                for lon in lng_range:
                    self.total += 1
                    executor.submit(self.get_nearby_bikes, (lat, lon))

            executor.shutdown()
            self.group_data()

            #是否繼續運行
            always_run = False
            if not always_run:
                break

            waittime = 1
            print("等待%s分鐘後繼續運行" % waittime)
            time.sleep(waittime * 60)

if __name__ == '__main__':
    c = Crawler()
    c.start(103.686592, 36.114191, 103.741781, 36.091515)#爬取範圍(左上角,右下角經緯度)
    print("完成")

最後將導出爲shp數據在ArcGIS中展示如下圖所示:


將導出的數據在ArcGIS中展示
屬性表

參考文獻
https://github.com/SilverBooker/ofoSpider

本文僅供參考學習,有不到之處,望大家諒解

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章