該項目是浙江大學地理空間數據庫課程作業8:空間分析中,使用 flask + pyecharts 搭建的簡單新冠肺炎疫情數據可視化交互分析平臺的一部分,完整的實現包含疫情數據獲取、態勢感知、預測分析、輿情監測等任務;
包含完整代碼、數據集和實現的github地址:
https://github.com/yunwei37/COVID-19-NLP-vis
項目分析報告已部署到網頁端,可點擊http://flask.yunwei123.tech/進行查看,數據已更新到6.17
本項目採用flask作爲後端,使用pyecharts進行數據可視化,通過ajax實現動態交互可視化效果;
疫情數據曲線圖、日曆圖
疫情數據曲線圖:可選擇國家
疫情新增確診病例日曆圖:
pyecharts 代碼實現:
import time, json
import pandas as pd
import pyecharts.options as opts
from pyecharts.charts import Line
from pyecharts.commons.utils import JsCode
country_name = '中國'
def render_lines(country_name):
#-------------------------------------------------------------------------------------
# 第一步:讀取數據
#-------------------------------------------------------------------------------------
n = "dataSets\\countrydata.csv"
data = pd.read_csv(n)
data = data[data['countryName'] == country_name]
date_list = list(data['dateId'])
date_list = list(map(lambda x:str(x),date_list))
confirm_list = list(data['confirmedCount'])
current_list = list(data['currentConfirmedCount'])
dead_list = list(data['deadCount'])
heal_list = list(data['curedCount'])
print(len(date_list))
#print(date_list) # 日期
#print(confirm_list) # 確診數據
#print(current_list) # 疑似數據
#print(dead_list) # 死亡數據
#print(heal_list) # 治癒數據
#-------------------------------------------------------------------------------------
# 第二步:繪製折線面積圖
#-------------------------------------------------------------------------------------
line = (
Line()
.add_xaxis(date_list)
# 平均線 最大值 最小值
.add_yaxis('確診數據', confirm_list, is_smooth=True,
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max"),
opts.MarkPointItem(type_="min")]))
.add_yaxis('現存確診數據', current_list, is_smooth=True,
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max"),
opts.MarkPointItem(type_="min")]))
.add_yaxis('死亡數據', dead_list, is_smooth=True,
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max"),
opts.MarkPointItem(type_="min")]))
.add_yaxis('治癒數據', heal_list, is_smooth=True,
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max"),
opts.MarkPointItem(type_="min")]))
# 隱藏數字 設置面積
.set_series_opts(
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
label_opts=opts.LabelOpts(is_show=False))
# 設置x軸標籤旋轉角度
.set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),
yaxis_opts=opts.AxisOpts(name='人數', min_=3),
title_opts=opts.TitleOpts(title='2019-nCoV'+country_name+'疫情數據曲線圖'))
)
return line
import datetime
from pyecharts import options as opts
from pyecharts.charts import Calendar
def calendar_base() -> Calendar:
begin = datetime.date(2020, 1, 19) #設置起始日期
end = datetime.date(2020, 6, 17) #設置終止日期
n = "dataSets\\countrydata.csv"
data = pd.read_csv(n)
data = data[data['countryName'] == country_name]
date_list = list(data['dateId'])
date_list = list(map(lambda x:str(x),date_list))
confirm_list = list(data['confirmedIncr'])
data =[
[str(begin + datetime.timedelta(days=i)), confirm_list[i]] #設置日期間隔,步數範圍
for i in range((end - begin).days - 3)
]
print(len(data))
c = (
Calendar()
.add('', data, calendar_opts=opts.CalendarOpts(range_=['2020-1','2020-6'])) #添加到日曆圖,指定顯示2019年數據
.set_global_opts( #設置底部顯示條,解釋數據
title_opts=opts.TitleOpts(title='全國疫情每日新增確診病例日曆圖',subtitle='From Weix'),
visualmap_opts=opts.VisualMapOpts(
pieces=[
{'min': 13000, 'color': '#7f1818'}, #不指定 max
{'min': 1000, 'max': 10000},
{'min': 500, 'max': 999},
{'min': 100, 'max': 499},
{'min': 10, 'max': 99},
{'min': 0, 'max': 9} ],
orient='vertical', #設置垂直顯示
pos_top='230px',
pos_left='100px',
is_piecewise=True #是否連續
)
)
)
return c
if __name__ == "__main__":
calendar_base().render('全國疫情每日新增確診病例日曆圖.html')
前端html:
<label>請選擇國家: </label>
<select id="selectCountrys">
{% for cate in cates %}
<option value="{{cate}}" >{{cate}}</option>
{% endfor %}
</select>
<div id="lines" style="width:500px; height:300px;display: inline-block;"></div>
<div id="04cd225d9bb642288bef9788ed998f30" class="chart-container" style="width:500px; height:300px;display: inline-block;"></div>
疫情數據分析詞雲圖:
pyecharts 代碼實現:
# coding=utf-8
import jieba
import re
import time
from collections import Counter
import pandas as pd
import datetime
#------------------------------------中文分詞------------------------------------
#截取該日期前後的10%文章
#percent = 0-90
def generatewordData(percent):
cut_words = ""
all_words = ""
data = pd.read_csv('dataSets\\中國社會組織_疫情防控-5_21.csv')
percent = percent / 10
num = data.shape[0]/10
data = data.iloc[int(num*percent):int(num*percent+num),]
print(data.shape[0])
print(list(data['時間'])[0])
print(list(data['時間'])[-1])
for line in data['正文內容']:
line = str(line)
seg_list = jieba.cut(line,cut_all=False)
cut_words = (" ".join(seg_list))
all_words += cut_words
# 輸出結果
all_words = all_words.split()
# 詞頻統計
c = Counter()
for x in all_words:
if len(x)>1 and x != '\r\n':
c[x] += 1
words = []
for (k,v) in c.most_common(50):
# print(k, v)
words.append((k,v))
words = words[1:]
return words,list(data['時間'])[0],list(data['時間'])[-1]
# 渲染圖
from pyecharts import options as opts
from pyecharts.charts import WordCloud
from pyecharts.globals import SymbolType
#import wordData
# percent 0-90
def render_wordcloud(percent = 0) -> WordCloud:
from scripts.wordData import date_data
words = date_data[int(percent)][0]
c = (
WordCloud()
.add("", words, word_size_range=[20, 100], shape=SymbolType.ROUND_RECT)
.set_global_opts(title_opts=opts.TitleOpts(title='全國新型冠狀病毒疫情新聞詞雲圖'+' '+date_data[int(percent)][1]+' - '+date_data[int(percent)][2]))
)
return c
# 生成圖
if __name__ == "__main__":
date_words = []
for i in range(0,91):
print(i)
words,date_start,date_end = generatewordData(i)
date_words.append([words,date_start,date_end])
with open("wordData.py",'w',encoding='utf-8') as f:
f.write("date_data="+str(date_words))
f.close()
微博情感分析曲線圖
`pyecharts 代碼實現:
# dateId: 0-50
def weiboWordcloud(dateId):
from scripts.weiboWordData import date_data
words = date_data[int(dateId)][1]
date = date_data[int(dateId)][0]
c = (
WordCloud()
.add("", words, word_size_range=[20, 100], shape=SymbolType.ROUND_RECT)
.set_global_opts(title_opts=opts.TitleOpts(title='全國新型冠狀病毒疫情微博每日主題詞詞雲圖 '+str(date)))
)
return c