Python爬蟲示例1:獲取豆瓣電影正在熱播的電影信息

個人的第一個python爬蟲程序,如果錯誤,請指正。

程序用於抓取豆瓣電影正在熱播的電影信息,詳細代碼如下所示:

 

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import os,requests
from bs4 import BeautifulSoup

def get_url_page(url,headers):
    try:
       response = requests.get(url,headers = headers)
       if response.status_code == 200:
          return response.text
       else:
          raise Exception('獲取頁面信息異常,response_stauts = %s '%response.status_code)
    except RequestException: 
       return None

if __name__ == '__main__':
    print('採集豆瓣電影信息')   
    ##獲取頁面信息
    url = 'https://movie.douban.com/'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'} 
    page_content = get_url_page(url,headers)
    ##切換路徑
    folder_path,file_name = os.path.split(os.path.realpath(__file__)) 
    os.chdir(folder_path)
    ##存儲網頁html
    html_file = os.path.splitext(file_name)[0] + '.html'        
    file = open(html_file,'w+',encoding='utf-8')
    file.truncate()       
    file.write(page_content)
    file.close()
       
    ##使用beautifulsoup解析
    soup = BeautifulSoup(open(html_file,encoding='utf-8'),features='html.parser')
    ##soup = BeautifulSoup(page_content,'lxml')
    for i,row in  enumerate(soup.body.findAll('div',attrs = {"id":"screening"})): 
       movies_dict = {} 
       for j,col in  enumerate(row.findAll('li',attrs = {"class":"ui-slide-item"})): 
          if 'data-director' in col.attrs:
             ##返回上級目錄
             os.chdir(folder_path)
             movies_path = os.path.join(folder_path,col.ul.li.a.img['alt'])           
             if not os.path.isdir(movies_path):
                 os.mkdir(movies_path)
             ##進入電影目錄,下載電影圖片   
             os.chdir(movies_path)
             movie_name = col.ul.li.a.img['alt']
             img_link = col.ul.li.a.img['src']
             img = requests.get(img_link)             
             if img.status_code == 200:
                open('1.jpg', 'wb').write(img.content)  # 寫入文件
             else:
                print('圖片不存在')
             #寫入電影信息
             fo = open(movie_name + '.txt','w',encoding='utf-8')
             fo.truncate()
             fo.writelines('電影標題:' + col['data-title'] + '\n')
             fo.writelines('導演:' + col['data-director'] + '\n')
             fo.writelines('時長:' + col['data-duration'] + '\n') 
             fo.writelines('國家:' + col['data-region'] + '\n') 
             fo.writelines('評分:' + col['data-rate'] + '\n')
             fo.close()
             

代碼運行效果如下所示:

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章