賊快
代碼:
def thread(url):
r = requests.get(url, headers=None, stream=True, timeout=30)
# print(r.status_code, r.headers)
headers = {}
all_thread = 1
# 獲取視頻大小
file_size = int(r.headers['content-length'])
# 如果獲取到文件大小,創建一個和需要下載文件一樣大小的文件
if file_size:
fp = open('2.mp4', 'wb')
fp.truncate(file_size)
print('視頻大小:' + str(int(file_size / 1024 / 1024)) + "MB")
fp.close()
# 每個線程每次下載大小爲5M
size = 5242880
# 當前文件大小需大於5M
if file_size > size:
# 獲取總線程數
all_thread = int(file_size / size)
# 設最大線程數爲10,如總線程數大於10
# 線程數爲10
if all_thread > 10:
all_thread = 10
part = file_size // all_thread
threads = []
starttime = datetime.datetime.now().replace(microsecond=0)
for i in range(all_thread):
# 獲取每個線程開始時的文件位置
start = part * i
# 獲取每個文件結束位置
if i == all_thread - 1:
end = file_size
else:
end = start + part
if i > 0:
start += 1
headers = headers.copy()
headers['Range'] = "bytes=%s-%s" % (start, end)
t = threading.Thread(target=Handler, name='th-' + str(i),
kwargs={'start': start, 'end': end, 'url': url, 'filename': '2.mp4', 'headers': headers})
t.setDaemon(True)
threads.append(t)
# 線程開始
for t in threads:
time.sleep(0.2)
t.start()
# 等待所有線程結束
for t in threads:
t.join()
endtime = datetime.datetime.now().replace(microsecond=0)
print('用時:%s' % (endtime - starttime))
def Handler(start, end, url, filename, headers={}):
tt_name = threading.current_thread().getName()
print(tt_name + ' is begin')
r = requests.get(url, headers=headers, stream=True)
total_size = end - start
downsize = 0
startTime = time.time()
with open(filename, 'r+b') as fp:
fp.seek(start)
var = fp.tell()
for chunk in r.iter_content(204800):
if chunk:
fp.write(chunk)
downsize += len(chunk)
line = tt_name + '-downloading %d KB/s - %.2f MB, 共 %.2f MB'
line = line % (
downsize / 1024 / (time.time() - startTime), downsize / 1024 / 1024,
total_size / 1024 / 1024)
print(line, end='\r')
if __name__ == '__main__':
url = input('輸入視頻鏈接(請輸入視頻原鏈):')
thread(url)
效果:
可以看見,38MB,一秒下完。
唯一的缺點就是,要有視頻原鏈,而一般這個視頻原鏈都是不會輕易被找到的,這就叫反爬。
找視頻原鏈,就找爬蟲,視頻爬蟲只是爬蟲的一種。
可以根據視頻大小,改變線程數。