#!/usr/bin/env python
# -*- coding:utf-8 -*-
'''
@author:yann
@datetime:2019/1/29 3:50 PM
'''
import urllib2
import json
import hashlib
import MySQLdb
import sys
import multiprocessing
from datetime import datetime
reload(sys)
sys.setdefaultencoding('utf-8')
'''md5加密方法'''
def md5(s):
m = hashlib.md5()
m.update(s)
return m.hexdigest()
'''http請求接口數據'''
def http_post(url, data_json):
jdata = json.dumps(data_json)
req = urllib2.Request(url, jdata)
response = urllib2.urlopen(req)
return response.read()
'''
獲取此時間段的數據有多少條,按每頁1000頁返回多少頁
'''
def get_pages(startTime, endTime):
url = "******"
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
appId = "5c1ca7b478d11522e8743fb3"
appSecret = md5(md5(appId + ts) + ts)
cert = {"ts": ts, "appId": appId, "appSecret": appSecret}
page = 1
pageSize = 1000
data_json = {"certificate": cert, 'startTime': startTime, 'endTime': endTime, 'page': page, 'pageSize': pageSize}
resp = http_post(url, data_json)
res = json.loads(resp)
totalCount = res['totalCount']
pages = totalCount / 1000 + 1
print startTime, '---', endTime, '總共有', res['totalCount'], '條數據', '共分爲', str(pages), '頁來進行數據同步'
return pages
'''得到每頁的數據'''
def get_api_data(startTime, endTime, page):
url = "******"
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
appId = "5c1ca7b478d11522e8743fb3"
appSecret = md5(md5(appId + ts) + ts)
cert = {"ts": ts, "appId": appId, "appSecret": appSecret}
pageSize = 1000
data_json = {"certificate": cert, 'startTime': startTime, 'endTime': endTime, 'page': page, 'pageSize': pageSize}
resp = http_post(url, data_json)
res = json.loads(resp)
return res['data']
'''保存結果到mysql'''
def save_mysql(data):
db = MySQLdb.connect(host="******", port=3306, user="******",passwd="******", db="******", charset='utf8mb4')
cursor = db.cursor()
sql = "insert ignore into evl_survey (cId,cTime,cStatus,appId,appName,pId,pName,sId,sName,sHead,scenarioId,scenarioName,custId,customer) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
params = (data['cId'], data['cTime'], data['cStatus'], data['appId'], data['appName'], data['pId'], data['pName'],
data['sId'], data['sName'],
data['sHead'], data['scenarioId'], data['scenarioName'], data['custId'], data['customer'])
cursor.execute(sql, params)
db.commit()
cursor.close()
'''
按每頁1000條接受數據並保存到mysql
'''
def get_page_data(start_ym, end_ym):
startTime = start_ym + '-01 00:00:00'
endTime = end_ym + '-01 00:00:00'
pages = get_pages(startTime, endTime)
for page in range(1, pages + 1):
list_data = get_api_data(startTime, endTime, page)
for dict_data in list_data:
save_mysql(dict_data)
print start_ym, '-', end_ym, ': sync 第', str(page), '頁 success', str(len(list_data))
def main():
'''
多進程同步
:return:
'''
pool = multiprocessing.Pool(processes=10)
ym_list = ['2017-01', '2018-01', '2018-04', '2018-07', '2018-09', '2018-10', '2018-11', '2018-12', '2019-01','2019-03']
ranges = zip(ym_list[0:-1:1], ym_list[1::1])
for i, j in ranges:
pool.apply_async(get_page_data, (i, j))
pool.close()
pool.join()
if __name__ == '__main__':
main()