Python實戰---使用BeautifulSoup和pyecharts爬取中國天氣網並展示

使用BeautifulSoup和pyecharts爬取中國天氣網並展示

1、使用requests進行頁面抓取
2、使用BeautifulSoup進行頁面分析
3、使用pyecharts進行數據展示(柱狀圖)
爬取的頁面爲:
在這裏插入圖片描述
爬取的數據字段:
城市 city
最低溫度 min_temperature
在這裏插入圖片描述
實現代碼:

'''
@Description: 爬取中國天氣網
@Author: sikaozhifu
@Date: 2020-06-08 13:53:15
@LastEditTime: 2020-06-08 16:17:11
@LastEditors: Please set LastEditors
'''
import requests
from bs4 import BeautifulSoup
from pyecharts.charts import Bar
from pyecharts import options 

city_data = []


def parse_weather(url):
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    text = response.content.decode('utf-8')
    # soup = BeautifulSoup(text, 'lxml') # 港澳臺地區的天氣頁面不規範
    soup = BeautifulSoup(text, 'html5lib')
    conMidtab = soup.find('div', class_='conMidtab')
    tables = conMidtab.find_all('table')
    for table in tables:
        trs = table.find_all('tr')[2:]
        for index, tr in enumerate(trs):
            city = ''
            if index == 0:
                city = list(tr.find_all('td')[1].stripped_strings)[0]
            else:
                city = list(tr.find_all('td')[0].stripped_strings)[0]
            min_temperature = list(tr.find_all('td')[-2].stripped_strings)[0]
            # print({'city': city, 'min_temperature': int(min_temperature)})
            city_data.append({
                'city': city,
                'min_temperature': int(min_temperature)
            })


def weather_spider_main():
    urls = [
        'http://www.weather.com.cn/textFC/hb.shtml',
        'http://www.weather.com.cn/textFC/db.shtml',
        'http://www.weather.com.cn/textFC/hd.shtml',
        'http://www.weather.com.cn/textFC/hz.shtml',
        'http://www.weather.com.cn/textFC/hn.shtml',
        'http://www.weather.com.cn/textFC/xb.shtml',
        'http://www.weather.com.cn/textFC/xn.shtml',
        'http://www.weather.com.cn/textFC/gat.shtml'
    ]
    for url in urls:
        parse_weather(url)
    # print(city_data)


def show_weather():
    city_data.sort(key=lambda data: data['min_temperature'])
    data = city_data[0:10]
    cities = list(map(lambda temp_data: temp_data['city'], data))
    min_temperatures = list(map(lambda temp_data: temp_data['min_temperature'], data))
    bar = Bar()
    bar.add_xaxis(cities)
    bar.add_yaxis('溫度', min_temperatures)
    bar.set_global_opts(title_opts=options.TitleOpts(title='中國溫度最低的十大城市'))
    bar.render('bar_min_temperature.html')


if __name__ == "__main__":
    weather_spider_main()
    show_weather()

效果圖展示:
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章