import requests
import time
from bs4 import BeautifulSoup
import re
movie=[]
def search(url):
try:
headers={'user-agent':'Mozilla/5.0'}
r=requests.get(url,headers=headers)
return r.text
except:
search(url)
def parse_page(url,k):
html=search(url)
sum_tmp=[]
pattern1=re.compile('class="channel-detail movie-item-title" title="(.*?)">',re.S)
movie_name=re.findall(pattern1,html)
pattern2=re.compile('class="movie-item-cat">(.*?)</div>')
movie_lb=re.findall(pattern2,html)
pattern3=re.compile('class="movie-item-pub">(.*?)</div>')
movie_time=re.findall(pattern3,html)
if len(movie_name)==0:
print('爬取結束')
print('共'+str(len(movie))+'部')
exit()
for i in range(len(movie_name)):
movie_tmp = []
if movie_time[i] == '':
movie_time[i]='暫無'
movie_tmp.append(movie_name[i])
movie_tmp.append(movie_lb[i])
movie_tmp.append(movie_time[i])
movie.append(movie_tmp)
for i in range(k+1, len(movie) + 1):
print('第' + str(i) + '部:')
print('電影名稱:'+movie[i-1][0]+' 電影類型:'+movie[i-1][1]+' 上映時間:'+movie[i-1][2])
print('----------------------------------------------------------------------------')
def main(k,sname,offset):
url = 'https://maoyan.com/query?kw='+sname+'&type=0&offset='+str(offset)
parse_page(url,k)
if __name__ == '__main__':
sname=input("請輸入電影信息:")
print('-----------------------------開始爬取---------------------------------')
time.sleep(1)
for i in range(20):
k=i*20
main(k,sname,offset=i*20)
time.sleep(1)