獲取實時數據
# -*- coding: utf-8 -*-
import requests,os
import re
import xlwt
import time
import json
class get_yq_info:
def get_data_html(self):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'}
response = requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0', headers=headers, timeout=3)
# 請求頁面
response = str(response.content, 'utf-8')
# 中文重新編碼
return response
#返回了HTML數據
def get_data_dictype(self):
areas_type_dic_raw = re.findall('try { window.getAreaStat = (.*?)}catch\(e\)',self.get_data_html())
areas_type_dic = json.loads(areas_type_dic_raw[0])
return areas_type_dic
#返回經過json轉換過的字典化的數據
def save_data_to_excle(self):
self.make_dir()
#調用方法檢查數據目錄是否存在,不存在則創建數據文件夾
count = 2
#數據寫入行數記錄
newworkbook = xlwt.Workbook()
worksheet = newworkbook.add_sheet('all_data')
# 打開工作簿,創建工作表
worksheet.write(1, 2, '省份名稱')
worksheet.write(1, 3, '省份簡稱或城市名稱')
worksheet.write(1, 4, '確診人數')
worksheet.write(1, 5, '疑似人數')
worksheet.write(1, 6, '治癒人數')
worksheet.write(1, 7, '死亡人數')
worksheet.write(1, 8, '地區ID編碼')
#寫入數據列標籤
for province_data in self.get_data_dictype():
provincename = province_data['provinceName']
provinceshortName = province_data['provinceShortName']
p_confirmedcount = province_data['confirmedCount']
p_suspectedcount = province_data['suspectedCount']
p_curedcount = province_data['curedCount']
p_deadcount = province_data['deadCount']
p_locationid = province_data['locationId']
#用循環獲取省級以及該省以下城市的數據
worksheet.write(count, 2, provincename)
worksheet.write(count, 3, provinceshortName)
worksheet.write(count, 4, p_confirmedcount)
worksheet.write(count, 5, p_suspectedcount)
worksheet.write(count, 6, p_curedcount)
worksheet.write(count, 7, p_deadcount)
worksheet.write(count, 8, p_locationid)
#在工作表裏寫入省級數據
count += 1
#此處爲寫入行數累加,province部分循環
for citiy_data in province_data['cities']:
cityname = citiy_data['cityName']
c_confirmedcount = citiy_data['confirmedCount']
c_suspectedcount = citiy_data['suspectedCount']
c_curedcount = citiy_data['curedCount']
c_deadcount = citiy_data['deadCount']
c_locationid = citiy_data['locationId']
#該部分獲取某個省下某城市的數據
worksheet.write(count, 3, cityname)
worksheet.write(count, 4, c_confirmedcount)
worksheet.write(count, 5, c_suspectedcount)
worksheet.write(count, 6, c_curedcount)
worksheet.write(count, 7, c_deadcount)
worksheet.write(count, 8, c_locationid)
#該部分在工作表裏寫入某城市的數據
count += 1
#此處爲寫入行數累加,cities部分循環
current_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
newworkbook.save('E:\人數採集3.0\實時採集v3.0-%s.xls' % (current_time))
print('======數據爬取成功======')
def make_dir(self):
file_path = 'E:/人數採集3.0/'
if not os.path.exists(file_path):
os.makedirs(file_path)
print('======數據文件夾不存在=======')
print('======數據文件夾創建成功======')
print('======創建目錄爲%s======'%(file_path))
else:
print('======數據保存在目錄:%s======' % (file_path))
#檢查並創建數據目錄
def exe_task(self):
times = int(input('執行採集次數:'))
interval_time = round(float(input('每次執行間隔時間(分鐘)')),1)
#round 方法保留一位小數
interval_time_min = interval_time * 60
for i in range(times):
get_yq_info().save_data_to_excle()
time.sleep(interval_time_min)
#執行完整採集任務
get_yq_info().exe_task()
執行後自定義要求: