大文件分片下载并发

import requests from concurrent.futures import ThreadPoolExecutor import os from pathlib import Path from loguru import logger import traceback from tqdm import tqdm from threading import Lock from functools import partial # url = "https://scontent.xx.fbcdn.net/m1/v/t6/An_YmP5OIPXun-vu3hkckAZZ2s4lPYoVkiyvCcWiVY21mu1Ng5_1HeCa2CWiSTsskj8HQ8bN013HxNpYDdSC_7jWQq_svcg.tar?ccb=10-5&oh=00_AfBn8XrMhiHu6w1KuS1X8rkuLzzZJnRs8B9jFMvVRfQnfg&oe=64659C28&_nc_sid=fb0754" # # tar_path = Path("/Users/chennan/Desktop/sa_000020.tar") # fetching_path = Path(f"{tar_path.as_posix()}.fetch") lock = Lock() downloaded = 0 url = "https://images.pexels.com/photos/15983035/pexels-photo-15983035.jpeg" tar_path = Path("/Users/chennan/Desktop/pexels-photo-15983035.jpeg") fetching_path = Path(f"{tar_path.as_posix()}.fetch") pbar_threads = [] def insert_data(info): headers, start, end = info pbar_thread = tqdm(total=end - start) pbar_threads.append(pbar_thread) with requests.get(url, stream=True, headers=headers) as response: with tar_path.open('rb+') as f: # path 文件保存路径 # 从文件的start位置开始写入 f.seek(start) for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) # pbar.update(len(chunk)) pbar_thread.update(len(chunk)) pbar_thread.close() def get_file_length(): req = requests.get(url, stream=True) return int(req.headers['content-Length']) def fetch_one(content_length): all_thread = 64 # 线程数量 part = content_length // all_thread # 每个线程请求的大小 for i in range(all_thread): # 每个线程开始, 结束爬取的位置 start = part * i if i == all_thread - 1: end = content_length else: end = start + part if i > 0: start += 1 # 设置从网站请求开始和结束的位置 headers = { 'Range': f'bytes={start}-{end}', } yield headers, start, end if __name__ == '__main__': content_length = get_file_length() if not tar_path.exists(): with tar_path.open('wb') as f: f.seek(content_length - 1) f.write(b'\0') if not fetching_path.exists(): with fetching_path.open("wb") as fs: fs.write(b"\0") try: with ThreadPoolExecutor(max_workers=16) as pool: pool.map(insert_data, fetch_one(content_length)) with tqdm(total=content_length) as pbar: progress = sum(pbar_thread.n for pbar_thread in pbar_threads) pbar.update(progress) fetching_path.unlink() logger.info("下载完成") except Exception: logger.error(f"{traceback.format_exc()}")
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章