首先,我是模仿这位大佬的博客写的,因为水平有限有些代码看不太懂,所以做了改动
https://blog.csdn.net/xufive/article/details/104093197
然后就是我没有系统的学过Python所以代码比较凌乱,有错误请指正,谢谢!
数据说明 数据来源:全部数据来源于国家卫健委、各省市区卫健委、各省市区政府以及港澳台官方渠道。 实时数据方面,腾讯新闻的统计方法如下: 1.国家卫健委公布数据时,全国总数与国家卫健委保持一致。 2.各省卫健委陆续公布数据,如果各省数据总和已经超过之前国家卫健委总数,则直接使用各省数据总和作为全国总数。(“疑似病例”仅使用国家卫健委每天公布的共有疑似病例总数,而不做新增累计) 3.“全国确诊”、“治愈人数”和“死亡人数”的“较上日”是指每两天间的新增数值,由当前的全国总数减去国家卫健委前一天公布的数据得到。这个值会随着全国总数的变动而实时变化。 4.疑似病例的“较上日”数据取自国家卫健委每日最新公布的“新增疑似病例数”。点击【疑似病例】可查看较上日现有疑似病例数的绝对值差额。 5.各省卫健委公布数据的发布时间和统计时间段各不相同,故而会在部分时段出现国家总数不等于分省数据之和。
目标网站是:https://news.qq.com/zt2020/page/feiyan.htm?from=timeline&isappinstalled=0#charts
首先,我想要获取的数据是全国确诊总人数、疑似病例人数、治愈人数和死亡人数,如下图。
还有各个省份的各个城市的相关数据。
整体思路就是获取-》解析-》保存-》发送
1. 获取
def get_html_page(url: str):
header = {
"Host": "view.inews.qq.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0",
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://news.qq.com/zt2020/page/feiyan.htm?from=timeline&isappinstalled=0",
"Cookie": "此处粘贴自己的cookies",
"TE": "Trailers"
}
request = requests.get(url=url, headers=header).text
get_data(request)
2. 解析并保存
解析出来的数据有各个城市和各个省份的,一个文件保存的各个省份的,一个文件保存的是各个城市和省份的。两个文件 都有汇总的数据。
def get_data(request: str):
data = re.findall(r"[(](.*?)[)]", request)[0]
json_data = json.loads(data)["data"]
json_data_chinaTotal = json.loads(json_data)["chinaTotal"]
json_data_lastUpdateTime = json.loads(json_data)["lastUpdateTime"]
total_data = {
"total_confirm": dict(json_data_chinaTotal).get("confirm"),
"total_suspect": dict(json_data_chinaTotal).get("suspect"),
"total_dead": dict(json_data_chinaTotal).get("dead"),
"total_heal": dict(json_data_chinaTotal).get("heal"),
"lastUpdateTime": json_data_lastUpdateTime
}
json_data_areaTree_children = json.loads(json_data)["areaTree"][0]
list_data_allProvince = dict(json_data_areaTree_children).get("children")
list_data_all_province = []
list_data_all_city = []
for dict_data_of_one_province in list_data_allProvince:
province_name = dict_data_of_one_province.get("name")
province_total_confirm = dict_data_of_one_province.get("total").get("confirm")
province_total_dead = dict_data_of_one_province.get("total").get("dead")
province__total_heal = dict_data_of_one_province.get("total").get("heal")
province_element = {
"name": province_name,
"confirm": province_total_confirm,
"dead": province_total_dead,
"heal": province__total_heal
}
list_data_all_province.append(province_element)
list_data_citys_in_province = dict_data_of_one_province.get("children")
for dict_data_city in list_data_citys_in_province:
city_name = dict_data_city.get("name")
city_total_confirm = dict_data_city.get("total").get("confirm")
city_total_dead = dict_data_city.get("total").get("dead")
city_total_heal = dict_data_city.get("total").get("heal")
city_elemet = {
"name": city_name,
"confirm": city_total_confirm,
"dead": city_total_dead,
"heal": city_total_heal
}
list_data_all_city.append(city_elemet)
data = {
"total": total_data,
"else": list_data_all_city + list_data_all_province
}
province_data = {
"total": total_data,
"else": list_data_all_province
}
save_data(data, "data_1.json")
save_data(province_data, "province_data.json")
def save_data(data, file_path):
f = open(file_path, "w", encoding="utf-8")
json.dump(data, f)
f.close()
3. 发送数据
这里的发送数据有两个,包括发送所有的数据(各个省份、各个城市)、发送各个省份的数据。
def read_file(path):
f = open(path, "r", encoding="utf-8")
data = json.load(f)
f.close()
return data
def send_province_data():
userfinfo = itchat.search_friends("T@") # 为好友暱称
userid = userfinfo[0]["UserName"] # 获取用户id
data = dict(read_file("province_data.json"))
total_confirm = data.get("total").get("total_confirm")
total_suspect = data.get("total").get("total_suspect")
total_dead = data.get("total").get("total_heal")
total_heal = data.get("total").get("total_heal")
total_last_update_time = data.get("total").get("lastUpdateTime")
itchat.send(
"全国确诊:{}\n全国疑似:{}\n全国死亡:{}\n全国治愈:{}\n最后更新时间:{}".format(total_confirm, total_suspect, total_dead,
total_heal, total_last_update_time),
userid) # 通过用户id发送信息
list_data_else = list(data.get("else"))
for province_data in list_data_else:
time.sleep(0.9)
province_data = dict(province_data)
itchat.send(
"地区:{}\n确诊:{}\n死亡:{}\n治愈:{}".format(province_data.get("name"), province_data.get("confirm"), province_data.get("dead"),
province_data.get("heal")), userid)
def send_all_data():
userfinfo = itchat.search_friends("虎子1") # 为好友暱称
userid = userfinfo[0]["UserName"] # 获取用户id
data = dict(read_file())
total_confirm = data.get("total").get("total_confirm")
total_suspect = data.get("total").get("total_suspect")
total_dead = data.get("total").get("total_heal")
total_heal = data.get("total").get("total_heal")
total_last_update_time = data.get("total").get("lastUpdateTime")
itchat.send(
"全国确诊:{}\n全国疑似:{}\n全国死亡:{}\n全国治愈:{}\n最后更新时间:{}".format(total_confirm, total_suspect, total_dead,
total_heal, total_last_update_time),
userid) # 通过用户id发送信息
list_data_else = list(data.get("else"))
for city_data in list_data_else:
time.sleep(0.1)
city_data = dict(city_data)
itchat.send(
"名称:{}\n确诊:{}\n死亡:{}\n治愈:{}".format(city_data.get("name"), city_data.get("confirm"), city_data.get("dead"),
city_data.get("heal")), userid)
4. 主函数
if __name__ == '__main__':
itchat.auto_login(hotReload=True)
get_html_page(
"https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=jQuery34106528980082724699_1581051212376&_=" + str(
int(time.time() * 1000)))
send_province_data()
最后献上今日壁纸