用python爬取時光網top100電視劇存入mysql數據庫

大家好,我是天空之城,今天給大家帶來用python爬取時光網top100電視劇存入mysql數據庫,注意這裏有個if條件判斷語句,因爲有的簡介是空值,所以這裏加了一個判斷。
自己先建立表格。


import pymysql

conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的數據庫密碼', db='你的數據庫名稱', charset='utf8')

cursor = conn.cursor()


sql = """create table movies7(
        m_id int primary key auto_increment,
        movie_name varchar (100) not null,
        movie_daoyan varchar(200) null,
        movie_zhuyan varchar(110) not null,
        movie_jianjie varchar(200) null
        )"""

cursor.execute(sql)

cursor.close()
conn.close()


以下是爬取和存儲數據部分



import requests,pymysql
from bs4 import BeautifulSoup

#連接數據庫
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的數據庫密碼', db='你的數據庫名稱', charset='utf8')

cursor = conn.cursor()


headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0',
           'Referer': 'http://www.mtime.com/top/tv/top100/',
           'Cookie': '_userCode_=202052221483601; _userIdentity_=202052221485994; DefaultCity-CookieKey=627; DefaultDistrict-CookieKey=0; _tt_=6A0D5B7802C889B92ADDF4B9DA5330DC; Hm_lvt_6dd1e3b818c756974fb222f0eae5512e=1588428109,1588460637; __utma=196937584.392826187.1588428110.1588463461.1588470412.4; __utmz=196937584.1588428110.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); waf_cookie=14c7c021-c3bd-4e2a998ff60044124738825863a8e64fcc61; Hm_lpvt_6dd1e3b818c756974fb222f0eae5512e=1588470414; __utmc=196937584; _ydclearance=e11e8c3d48a0aef51a6ece4e-dd11-4511-ae6f-98e6a44ceddd-1588477611; __utmb=196937584.4.10.1588470412; __utmt=1; __utmt_~1=1'
           }

for i in range(2,5):
    res = requests.get(headers=headers, url='http://www.mtime.com/top/tv/top100/index-{}.html'.format(str(i)))
    # print(res.status_code)
    res.content.decode('utf-8')
    soup = BeautifulSoup(res.text, 'html.parser')
    items = soup.find('div', class_='top_list')
    list = items.find_all('li')
    for film in list:
        name1 = film.find('h2').text#名稱
        abc = film.find_all('p')
        daoyan = abc[0].text.strip()#導演
        zhuyan = abc[1].text.strip()#主演
        jianjie1 = film.find('p', class_="mt3")#注意這裏還沒有加text,後面有判斷
        if jianjie1 !=None:
            jianjie1=jianjie1.text.strip()
        else:
            jianjie1=''

        sql = 'insert into movies7 (movie_name, movie_daoyan, movie_zhuyan,movie_jianjie) values ("%s","%s","%s","%s")' % (name1, daoyan, zhuyan, jianjie1)

        cursor.execute(sql)


conn.commit()
cursor.close()
conn.close()



看下截圖
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章