Python爬取PM2.5數據併入mysql庫

爬取網站:http://www.pm25.in/jiangyinhttp://www.pm25.in/suzhou

完整代碼:

# 導入模塊
import datetime

from bs4 import BeautifulSoup
import requests
import pymysql

# 打開數據庫連接,並使用cursor()建立一個遊標對象
conn = pymysql.connect(host='localhost', user='root', passwd='root', db='mysql', port=3306, charset='utf8')

cursor = conn.cursor()

# 創建request對象,指定url和請求頭(user-agent),目的是爲了更真實的模擬瀏覽器
def get_temperature(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    }  # 設置頭文件信息

    response = requests.get(url, headers=headers).content  # 提交requests.get請求,傳遞url和headers
    soup = BeautifulSoup(response, "lxml")  # 用Beautifulsoup 進行解析

    conmid = soup.find('div', class_='table')
    condate = soup.find('div', class_='live_data_time')
    condate2 = condate.find('p')
    city = soup.find('div', class_='city_name')
    conmid2 = conmid.find_all('tbody')

    for info in conmid2:
        tr_list = info.find_all('tr')[0:]  # 使用切片取到第三個tr標籤
        for index, tr in enumerate(tr_list):  # enumerate可以返回元素的位置及內容
            td_list = tr.find_all('td')

            POSITION = td_list[0].text.replace('\n', '')
            date = condate2.text[7:26]
            print(date)
            AQI = td_list[1].text.replace('\n', '')
            GRADE = td_list[2].text.replace('\n', '')
            PM25 = td_list[4].text.replace('\n', '')
            PM10 = td_list[5].text.replace('\n', '')
            SO2 = td_list[10].text.replace('\n', '')
            CO = td_list[6].text.replace('\n', '')
            NO2 = td_list[7].text.replace('\n', '')
            O3_8h = td_list[9].text.replace('\n', '')
            CITYNAME = city.text

            sql = "INSERT INTO aqidata(POSITION, DATE, AQI, GRADE, PM25,PM10,SO2,CO,NO2,O3_8h,CITY) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
                POSITION, date, AQI, GRADE, PM25, PM10, SO2, CO, NO2, O3_8h, CITYNAME)
            cursor.execute(sql)


if __name__ == '__main__':
    urls = ['http://www.pm25.in/jiangyin', 'http://www.pm25.in/suzhou']

    for url in urls:
        get_temperature(url)
    conn.commit()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章