爬蟲(四)下載視頻

1.相關網址和庫

參考文章:視頻下載,那些事兒!
網址

http://www.jisudhw.com/

在這裏插入圖片描述
需要用到的庫
requests、beautifulsoup、ffmpy3

2.代碼實現

import os
import requests
import ffmpy3
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool as ThreadPool


class VideoSpider(object):
    def __init__(self):
        self.server = 'http://www.jisudhw.com'
        self.search_url = 'http://www.jisudhw.com/index.php'
        self.search_keyword = '越獄第一季'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45',
            'Referer': 'http://www.jisudhw.com/',
            'Origin': 'http://www.jisudhw.com',
            'Host': 'www.jisudhw.com'
        }
        self.search_params = {'m': 'vod-search'}
        self.form_data = {
            'wd': self.search_keyword,
            'submit': 'search'
        }

        self.res_dict = {}  # 只獲取第一條返回結果
        self.down_list = []

        self.video_save_dir = ''

    # 1.發送請求
    def post_request(self, url):
        response = requests.post(url, params=self.search_params,
                                 headers=self.headers, data=self.form_data)
        data = response.content.decode('utf-8')

        return data

    def get_response(self, url):
        response = requests.get(url, headers=self.headers)
        data = response.content.decode('utf-8')

        return data

    # 2.解析數據
    # 2.1 解析搜索結果
    def parse_list_data(self, data):
        soup = BeautifulSoup(data, 'lxml')
        spans_html = soup.find_all('span', attrs={'class': 'xing_vb4'})
        for span in spans_html:
            self.res_dict['name'] = span.a.string
            self.res_dict['url'] = self.server + span.a.get('href')

    # 2.2 解析詳細頁面,獲取下載鏈接
    def parse_detail_data(self, video_data):
        soup = BeautifulSoup(video_data, 'lxml')
        m3u8_list = soup.select_one('div[id="1"]')
        urls_list = m3u8_list.select('li')
        for urls in urls_list:
            down_dict = {}
            down_dict['chapter'] = urls.text.split('$')[0]
            down_dict['url'] = urls.input.get('value')
            self.down_list.append(down_dict)

    # 3.下載視頻
    def downVideo(self, down_data):
        url = down_data['url']
        chapter = os.path.join(self.video_save_dir, down_data['chapter'])
        ffmpy3.FFmpeg(inputs={url: None}, outputs={chapter:None}).run()

    def run(self):
        data = self.post_request(self.search_url)
        self.parse_list_data(data)
        # print(self.res_dict)
        video_data = self.get_response(self.res_dict['url'])
        self.parse_detail_data(video_data)
        # print(self.down_list)

        video_name = self.res_dict['name']
        self.video_save_dir = os.path.join('./output', video_name)
        if not os.path.exists(self.video_save_dir):
            os.mkdir(self.video_save_dir)

        # 多線程下載所有視頻
        # pool = ThreadPool(8)
        # results = pool.map(self.downVideo, self.down_list)
        # pool.close()
        # pool.join()

        # 只下載第一個視頻
        results = self.downVideo(self.down_list[0])


if __name__ == '__main__':
    VideoSpider().run()

3.小結

  • 利用 ffmpeg 下載視頻
  • 多線程下載
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章