大家好,我是天空之城,今天給大家帶來用python爬取時光網top100電視劇存入mysql數據庫,注意這裏有個if條件判斷語句,因爲有的簡介是空值,所以這裏加了一個判斷。
自己先建立表格。
import pymysql
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的數據庫密碼', db='你的數據庫名稱', charset='utf8')
cursor = conn.cursor()
sql = """create table movies7(
m_id int primary key auto_increment,
movie_name varchar (100) not null,
movie_daoyan varchar(200) null,
movie_zhuyan varchar(110) not null,
movie_jianjie varchar(200) null
)"""
cursor.execute(sql)
cursor.close()
conn.close()
以下是爬取和存儲數據部分
import requests,pymysql
from bs4 import BeautifulSoup
#連接數據庫
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的數據庫密碼', db='你的數據庫名稱', charset='utf8')
cursor = conn.cursor()
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0',
'Referer': 'http://www.mtime.com/top/tv/top100/',
'Cookie': '_userCode_=202052221483601; _userIdentity_=202052221485994; DefaultCity-CookieKey=627; DefaultDistrict-CookieKey=0; _tt_=6A0D5B7802C889B92ADDF4B9DA5330DC; Hm_lvt_6dd1e3b818c756974fb222f0eae5512e=1588428109,1588460637; __utma=196937584.392826187.1588428110.1588463461.1588470412.4; __utmz=196937584.1588428110.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); waf_cookie=14c7c021-c3bd-4e2a998ff60044124738825863a8e64fcc61; Hm_lpvt_6dd1e3b818c756974fb222f0eae5512e=1588470414; __utmc=196937584; _ydclearance=e11e8c3d48a0aef51a6ece4e-dd11-4511-ae6f-98e6a44ceddd-1588477611; __utmb=196937584.4.10.1588470412; __utmt=1; __utmt_~1=1'
}
for i in range(2,5):
res = requests.get(headers=headers, url='http://www.mtime.com/top/tv/top100/index-{}.html'.format(str(i)))
# print(res.status_code)
res.content.decode('utf-8')
soup = BeautifulSoup(res.text, 'html.parser')
items = soup.find('div', class_='top_list')
list = items.find_all('li')
for film in list:
name1 = film.find('h2').text#名稱
abc = film.find_all('p')
daoyan = abc[0].text.strip()#導演
zhuyan = abc[1].text.strip()#主演
jianjie1 = film.find('p', class_="mt3")#注意這裏還沒有加text,後面有判斷
if jianjie1 !=None:
jianjie1=jianjie1.text.strip()
else:
jianjie1=''
sql = 'insert into movies7 (movie_name, movie_daoyan, movie_zhuyan,movie_jianjie) values ("%s","%s","%s","%s")' % (name1, daoyan, zhuyan, jianjie1)
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
看下截圖