大文件分片下載併發

import requests from concurrent.futures import ThreadPoolExecutor import os from pathlib import Path from loguru import logger import traceback from tqdm import tqdm from threading import Lock from functools import partial # url = "https://scontent.xx.fbcdn.net/m1/v/t6/An_YmP5OIPXun-vu3hkckAZZ2s4lPYoVkiyvCcWiVY21mu1Ng5_1HeCa2CWiSTsskj8HQ8bN013HxNpYDdSC_7jWQq_svcg.tar?ccb=10-5&oh=00_AfBn8XrMhiHu6w1KuS1X8rkuLzzZJnRs8B9jFMvVRfQnfg&oe=64659C28&_nc_sid=fb0754" # # tar_path = Path("/Users/chennan/Desktop/sa_000020.tar") # fetching_path = Path(f"{tar_path.as_posix()}.fetch") lock = Lock() downloaded = 0 url = "https://images.pexels.com/photos/15983035/pexels-photo-15983035.jpeg" tar_path = Path("/Users/chennan/Desktop/pexels-photo-15983035.jpeg") fetching_path = Path(f"{tar_path.as_posix()}.fetch") pbar_threads = [] def insert_data(info): headers, start, end = info pbar_thread = tqdm(total=end - start) pbar_threads.append(pbar_thread) with requests.get(url, stream=True, headers=headers) as response: with tar_path.open('rb+') as f: # path 文件保存路徑 # 從文件的start位置開始寫入 f.seek(start) for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) # pbar.update(len(chunk)) pbar_thread.update(len(chunk)) pbar_thread.close() def get_file_length(): req = requests.get(url, stream=True) return int(req.headers['content-Length']) def fetch_one(content_length): all_thread = 64 # 線程數量 part = content_length // all_thread # 每個線程請求的大小 for i in range(all_thread): # 每個線程開始, 結束爬取的位置 start = part * i if i == all_thread - 1: end = content_length else: end = start + part if i > 0: start += 1 # 設置從網站請求開始和結束的位置 headers = { 'Range': f'bytes={start}-{end}', } yield headers, start, end if __name__ == '__main__': content_length = get_file_length() if not tar_path.exists(): with tar_path.open('wb') as f: f.seek(content_length - 1) f.write(b'\0') if not fetching_path.exists(): with fetching_path.open("wb") as fs: fs.write(b"\0") try: with ThreadPoolExecutor(max_workers=16) as pool: pool.map(insert_data, fetch_one(content_length)) with tqdm(total=content_length) as pbar: progress = sum(pbar_thread.n for pbar_thread in pbar_threads) pbar.update(progress) fetching_path.unlink() logger.info("下載完成") except Exception: logger.error(f"{traceback.format_exc()}")
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章