import requests
import random
from bs4 import BeautifulSoup
import lxml
'''
https://movie.douban.com/top250
https://movie.douban.com/top250?start=25
https://movie.douban.com/top250?start=50&filter=
'''
header1 = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Host': "movie.douban.com"
} #谷歌
header2 = {
'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362",
'Host': "movie.douban.com"
} # ie
header3 = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0",
'Host': "movie.douban.com"
}
header_list = [header1, header2, header3]
datas = {}
comedy = {} #喜劇
love = {} #愛情
sci_fi = {} #科幻
thriller = {} #驚悚
crime = {} #犯罪
animation = {} #動畫
for i in range(1, 11):
if i == 1:
url = "https://movie.douban.com/top250"
else:
url = 'https://movie.douban.com/top250?start=%d&filter='%((i-1)*25)
header = header_list[random.randint(0, 2)]
req = requests.get(url, headers = header)
html = req.text
bf = BeautifulSoup(html, 'lxml')
soup = bf.find_all('div', class_ = 'info')
for item in soup:
data = {}
movie_name = item.find('a').find('span').string
score_str = item.find('div', class_= 'star').find('span', class_ = 'rating_num').string
score = float(score_str)
director_str = item.find('div', class_ = 'bd').find('p')
director_str = str(director_str)
director_str = director_str.replace(' ', '')
director_str = director_str.replace('<pclass="">', '')
director_str = director_str.replace('TimRobbins/...<br/>', '')
director_str = director_str.replace('</p>', '')
director_str = director_str.replace('...<br/>', '')
director_str = director_str.split()
director = director_str[0]
starring = director_str[1]
time = director_str[2]
type = director_str[-1]
data['name'] = movie_name
data['director'] = director[3 : ]
data['type'] = type
data['time'] = time
data['score'] = score
datas[movie_name] = data
if '喜劇' in type and score >= 9.0:
comedy[movie_name] = data
if '愛情' in type and score >= 9.0:
love[movie_name] = data
if '科幻' in type and score >= 9.0:
sci_fi[movie_name] = data
if '驚悚' in type and score >= 9.0:
thriller[movie_name] = data
if '犯罪' in type and score >= 9.0:
crime[movie_name] = data
if '動畫' in type and score >= 9.0:
animation[movie_name] = data
#超級推薦:
print("豆瓣評分最高" + '>'*10)
datas = sorted(datas.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("電影名稱", "評分", chr(12288)))
for value in datas:
print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))
i += 1
if i == 10:
break
print()
#喜劇電影
print("喜劇電影推薦" + '>'*10)
comedy = sorted(comedy.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("電影名稱", "評分", chr(12288)))
for value in comedy:
print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))
i += 1
if i == 10:
break
print()
#愛情電影
print("愛情電影推薦" + '>'*10)
love = sorted(love.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("電影名稱", "評分", chr(12288)))
for value in love:
print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))
i += 1
if i == 10:
break
print()
#科幻電影
print("科幻電影推薦" + '>'*10)
sci_fi = sorted(sci_fi.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("電影名稱", "評分", chr(12288)))
for value in sci_fi:
print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))
i += 1
if i == 10:
break
print()
#驚悚電影
print("驚悚電影推薦" + '>'*10)
thriller = sorted(thriller.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("電影名稱", "評分", chr(12288)))
for value in thriller:
print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))
i += 1
if i == 10:
break
print()
#犯罪電影
print("犯罪電影推薦" + '>'*10)
crime = sorted(crime.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("電影名稱", "評分", chr(12288)))
for value in crime:
print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))
i += 1
if i == 10:
break
print()
#動畫電影
print("動畫電影推薦" + '>'*10)
animation = sorted(animation.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("電影名稱", "評分", chr(12288)))
for value in animation:
print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))
i += 1
if i == 10:
break
print()