这个例子主要是用来熟悉: 1、数据提取 2、数据正则表达式匹配3、数据展现
不足之处:1、没有数据处理过程 2、数据展现可以写成一个方法,直接拷贝上一个例子里的代码了
依据李刚的教程撰写的例子:
#天气数据获取
import urllib.request,re
import datetime
import pygal
#1、下载提取数据
def get_html (city, year, month):
url = ('https://m.tianqi.com/lishi/%s/%s%s.html' % (city, year, month))
request = urllib.request.Request(url)
request.add_header('User-Agent','Mozilla/5.0')
return urllib.request.urlopen(request).read().decode('utf-8')
#上面get_html函数在另一台机器上编译总是有问题
#print(get_html('jinan','2019','08'))
#初始化开始日期
dates, highs, lows = [], [], []
city = 'jinan'
year = '2018'
months = ['%02d' % i for i in range(1, 13)]
prev = datetime.datetime(2017, 12, 31)
for month in months:
html = get_html(city, year, month)
nospace_text = ''.join(html.split())
pattern = re.compile('<divclass="weatherbox">(.*?)</div><divclass="clearline1">')
div_list = re.findall(pattern, nospace_text)
#标签dl
pattern_dl= re.compile('<dlclass="table_day15">(.*?)</dl>')
dls = re.findall(pattern_dl, div_list[0])
#再次获取dls获取每天的数据
for dl in dls:
#日期
date_pattern = re.compile('<ddclass="date">(.*?)</dd>')
date_dd = re.findall(date_pattern,dl)
#日期字符串格式化
d_str = year + '/' +date_dd[0][0:5]
#获取温度
temp_pattern = re.compile('<ddclass="txt2">(.*?)</dd>')
temp_dd = re.findall(temp_pattern,dl)
#获取最低温度
low_pattern = re.compile('^(.*?)~')
low_temp = re.findall(low_pattern, temp_dd[0])
#获取温度
high_pattern = re.compile('<b>(.*?)</b>')
high_temp = re.findall(high_pattern, temp_dd[0])
#2、数据清洗,此处先略过了;需要对数据是否全进行处理;这个网站的数据少每个月31号的
dates.append(d_str)
lows.append(int(low_temp[0]))
highs.append(int(high_temp[0]))
#3、按照柱状图
bar = pygal.Line()
bar.title = '2018年天气-温度'
#print(lows)
#添加数据
bar.add('低温度', lows)
bar.add('高温度', highs)
#配置
bar.x_labels = dates
bar.x_labels_major = dates[::30]#设置主刻度,每隔30天取一个
bar.show_minor_x_labels = False#设置副刻度不显示
bar.x_title = '日期'
bar.y_title = '温度'
bar.x_label_rotation = 45
bar.legend_at_bottom = True
bar.show_y_guides = True
bar.show_x_guides = False
#输出到图片
bar.render_to_file('Temperture.svg')