這個例子主要是用來熟悉: 1、數據提取 2、數據正則表達式匹配3、數據展現
不足之處:1、沒有數據處理過程 2、數據展現可以寫成一個方法,直接拷貝上一個例子裏的代碼了
依據李剛的教程撰寫的例子:
#天氣數據獲取
import urllib.request,re
import datetime
import pygal
#1、下載提取數據
def get_html (city, year, month):
url = ('https://m.tianqi.com/lishi/%s/%s%s.html' % (city, year, month))
request = urllib.request.Request(url)
request.add_header('User-Agent','Mozilla/5.0')
return urllib.request.urlopen(request).read().decode('utf-8')
#上面get_html函數在另一臺機器上編譯總是有問題
#print(get_html('jinan','2019','08'))
#初始化開始日期
dates, highs, lows = [], [], []
city = 'jinan'
year = '2018'
months = ['%02d' % i for i in range(1, 13)]
prev = datetime.datetime(2017, 12, 31)
for month in months:
html = get_html(city, year, month)
nospace_text = ''.join(html.split())
pattern = re.compile('<divclass="weatherbox">(.*?)</div><divclass="clearline1">')
div_list = re.findall(pattern, nospace_text)
#標籤dl
pattern_dl= re.compile('<dlclass="table_day15">(.*?)</dl>')
dls = re.findall(pattern_dl, div_list[0])
#再次獲取dls獲取每天的數據
for dl in dls:
#日期
date_pattern = re.compile('<ddclass="date">(.*?)</dd>')
date_dd = re.findall(date_pattern,dl)
#日期字符串格式化
d_str = year + '/' +date_dd[0][0:5]
#獲取溫度
temp_pattern = re.compile('<ddclass="txt2">(.*?)</dd>')
temp_dd = re.findall(temp_pattern,dl)
#獲取最低溫度
low_pattern = re.compile('^(.*?)~')
low_temp = re.findall(low_pattern, temp_dd[0])
#獲取溫度
high_pattern = re.compile('<b>(.*?)</b>')
high_temp = re.findall(high_pattern, temp_dd[0])
#2、數據清洗,此處先略過了;需要對數據是否全進行處理;這個網站的數據少每個月31號的
dates.append(d_str)
lows.append(int(low_temp[0]))
highs.append(int(high_temp[0]))
#3、按照柱狀圖
bar = pygal.Line()
bar.title = '2018年天氣-溫度'
#print(lows)
#添加數據
bar.add('低溫度', lows)
bar.add('高溫度', highs)
#配置
bar.x_labels = dates
bar.x_labels_major = dates[::30]#設置主刻度,每隔30天取一個
bar.show_minor_x_labels = False#設置副刻度不顯示
bar.x_title = '日期'
bar.y_title = '溫度'
bar.x_label_rotation = 45
bar.legend_at_bottom = True
bar.show_y_guides = True
bar.show_x_guides = False
#輸出到圖片
bar.render_to_file('Temperture.svg')