1.相關網址和庫
參考文章:視頻下載,那些事兒!
網址
http://www.jisudhw.com/
需要用到的庫
requests、beautifulsoup、ffmpy3
2.代碼實現
import os
import requests
import ffmpy3
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool as ThreadPool
class VideoSpider(object):
def __init__(self):
self.server = 'http://www.jisudhw.com'
self.search_url = 'http://www.jisudhw.com/index.php'
self.search_keyword = '越獄第一季'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45',
'Referer': 'http://www.jisudhw.com/',
'Origin': 'http://www.jisudhw.com',
'Host': 'www.jisudhw.com'
}
self.search_params = {'m': 'vod-search'}
self.form_data = {
'wd': self.search_keyword,
'submit': 'search'
}
self.res_dict = {} # 只獲取第一條返回結果
self.down_list = []
self.video_save_dir = ''
# 1.發送請求
def post_request(self, url):
response = requests.post(url, params=self.search_params,
headers=self.headers, data=self.form_data)
data = response.content.decode('utf-8')
return data
def get_response(self, url):
response = requests.get(url, headers=self.headers)
data = response.content.decode('utf-8')
return data
# 2.解析數據
# 2.1 解析搜索結果
def parse_list_data(self, data):
soup = BeautifulSoup(data, 'lxml')
spans_html = soup.find_all('span', attrs={'class': 'xing_vb4'})
for span in spans_html:
self.res_dict['name'] = span.a.string
self.res_dict['url'] = self.server + span.a.get('href')
# 2.2 解析詳細頁面,獲取下載鏈接
def parse_detail_data(self, video_data):
soup = BeautifulSoup(video_data, 'lxml')
m3u8_list = soup.select_one('div[id="1"]')
urls_list = m3u8_list.select('li')
for urls in urls_list:
down_dict = {}
down_dict['chapter'] = urls.text.split('$')[0]
down_dict['url'] = urls.input.get('value')
self.down_list.append(down_dict)
# 3.下載視頻
def downVideo(self, down_data):
url = down_data['url']
chapter = os.path.join(self.video_save_dir, down_data['chapter'])
ffmpy3.FFmpeg(inputs={url: None}, outputs={chapter:None}).run()
def run(self):
data = self.post_request(self.search_url)
self.parse_list_data(data)
# print(self.res_dict)
video_data = self.get_response(self.res_dict['url'])
self.parse_detail_data(video_data)
# print(self.down_list)
video_name = self.res_dict['name']
self.video_save_dir = os.path.join('./output', video_name)
if not os.path.exists(self.video_save_dir):
os.mkdir(self.video_save_dir)
# 多線程下載所有視頻
# pool = ThreadPool(8)
# results = pool.map(self.downVideo, self.down_list)
# pool.close()
# pool.join()
# 只下載第一個視頻
results = self.downVideo(self.down_list[0])
if __name__ == '__main__':
VideoSpider().run()
3.小結
- 利用 ffmpeg 下載視頻
- 多線程下載