Python 多線程圖像下載和壓縮 腳本

圖像下載腳本:

#!/usr/bin/env python
# -- coding: utf-8 --
"""
Copyright (c) 2018. All rights reserved.
Created by C. L. Wang on 2018/7/9
"""

import argparse
import os
import shutil
from datetime import datetime
from multiprocessing.pool import Pool

import requests


def get_current_time_str():
    """
    輸入當天的日期格式, 20170718_1137
    :return: 20170718_1137
    """
    return datetime.now().strftime('%Y%m%d%H%M%S')


logfile = 'download_log_{}.log'.format(get_current_time_str())  # 日誌文件


def download_img(img_url, out_folder, imgs_names, img_name=None):
    """
    下載圖片
    :param img_url: 圖片URL
    :param out_folder: 輸出文件夾
    :param imgs_names: 已有圖片
    :param img_name: 圖片名稱
    :return: None
    """
    if not img_name:
        img_name = img_url.split('/')[-1]  # 圖片文件名

    if img_name in imgs_names:
        print_info('圖片已存在: %s' % img_name)
        return

    img_data = requests.get(img_url).content

    out_file = os.path.join(out_folder, img_name)  # 輸出文件

    with open(out_file, 'wb') as hl:
        hl.write(img_data)
        print_info('圖片已下載: %s' % img_name)


def download_imgs_for_mp(img_file, out_folder, n_prc=10, prefix=None):
    """
    多線程下載
    :param img_file: 圖片文件
    :param out_folder: 輸出文件夾
    :param prefix: 圖片前綴
    :param n_prc: 進程數, 默認40個
    :return: None
    """
    print_info('進程總數: %s' % n_prc)
    pool = Pool(processes=n_prc)  # 多線程下載
    paths_list = read_file(img_file)
    print_info('文件數: %s' % len(paths_list))

    _, imgs_names = traverse_dir_files(out_folder)

    for (index, path) in enumerate(paths_list):
        if prefix:
            pool.apply_async(download_img, (path, out_folder, imgs_names, prefix + '_' + str(index) + '.jpg'))
        else:
            pool.apply_async(download_img, (path, out_folder, imgs_names))

    pool.close()
    pool.join()

    # _, imgs_names = traverse_dir_files(out_folder)
    # print_info('圖片總數: %s' % len(imgs_names))
    print_info('全部下載完成')


def parse_args():
    """
    處理腳本參數,支持相對路徑
    img_file 文件路徑,默認文件夾:img_downloader/urls
    out_folder 輸出文件夾,默認文件夾:img_data
    :return: arg_img,文件路徑;out_folder,輸出文件夾
    """
    parser = argparse.ArgumentParser(description='下載數據腳本')
    parser.add_argument('-i', dest='img_file', required=True, help='文件路徑', type=str)
    parser.add_argument('-o', dest='out_folder', required=True, help='輸出文件夾', type=str)
    parser.add_argument('-p', dest='n_prc', required=False, default=20, help='進程數', type=str)

    args = parser.parse_args()

    arg_img = args.img_file
    print_info("文件路徑:%s" % arg_img)

    arg_out = args.out_folder
    print_info("輸出文件夾:%s" % arg_out)

    arg_npc = args.n_prc
    print_info("進程數:%s" % arg_npc)
    return arg_img, arg_out, arg_npc


def write_line(file_name, line):
    """
    將行數據寫入文件
    :param file_name: 文件名
    :param line: 行數據
    :return: None
    """
    if file_name == "":
        return
    with open(file_name, "a+", encoding='utf8') as fs:
        if type(line) is (tuple or list):
            fs.write("%s\n" % ", ".join(line))
        else:
            fs.write("%s\n" % line)


def print_info(log_str):
    """
    打印日誌
    :param log_str: 日誌信息
    :return: None
    """
    log_str = u'[Info {}] {}'.format(get_current_time_str(), str(log_str))
    write_line(logfile, log_str)
    print(log_str)


def mkdir_if_not_exist(dir_name, is_delete=False):
    """
    創建文件夾
    :param dir_name: 文件夾
    :param is_delete: 是否刪除
    :return: 是否成功
    """
    try:
        if is_delete:
            if os.path.exists(dir_name):
                shutil.rmtree(dir_name)
                print('[Info] 文件夾 "%s" 存在, 刪除文件夾.' % dir_name)

        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
            print('[Info] 文件夾 "%s" 不存在, 創建文件夾.' % dir_name)
        return True
    except Exception as e:
        print('[Exception] %s' % e)
        return False


def traverse_dir_files(root_dir, ext=None):
    """
    列出文件夾中的文件, 深度遍歷
    :param root_dir: 根目錄
    :param ext: 後綴名
    :return: [文件路徑列表, 文件名稱列表]
    """
    names_list = []
    paths_list = []
    for parent, _, fileNames in os.walk(root_dir):
        for name in fileNames:
            if name.startswith('.'):  # 去除隱藏文件
                continue
            if ext:  # 根據後綴名搜索
                if name.endswith(tuple(ext)):
                    names_list.append(name)
                    paths_list.append(os.path.join(parent, name))
            else:
                names_list.append(name)
                paths_list.append(os.path.join(parent, name))
    if not names_list:  # 文件夾爲空
        return paths_list, names_list
    paths_list, names_list = sort_two_list(paths_list, names_list)
    return paths_list, names_list


def sort_two_list(list1, list2):
    """
    排序兩個列表
    :param list1: 列表1
    :param list2: 列表2
    :return: 排序後的兩個列表
    """
    list1, list2 = (list(t) for t in zip(*sorted(zip(list1, list2))))
    return list1, list2


def read_file(data_file, mode='more'):
    """
    讀文件, 原文件和數據文件
    :return: 單行或數組
    """
    try:
        with open(data_file, 'r') as f:
            if mode == 'one':
                output = f.read()
                return output
            elif mode == 'more':
                output = f.readlines()
                output = [o.strip() for o in output]
                return output
            else:
                return list()
    except IOError:
        return list()


def main():
    """
    入口函數
    """
    arg_img, arg_out, arg_npc = parse_args()
    mkdir_if_not_exist(arg_out)  # 新建文件夾
    download_imgs_for_mp(arg_img, arg_out, arg_npc)


if __name__ == '__main__':
    main()

圖像壓縮腳本:

#!/usr/bin/env python
# -- coding: utf-8 --
"""
Copyright (c) 2018. All rights reserved.
Created by C. L. Wang on 2018/8/8
"""

import os
import shutil
import argparse

from multiprocessing import Pool
from PIL import Image


def sort_two_list(list1, list2):
    """
    排序兩個列表
    :param list1: 列表1
    :param list2: 列表2
    :return: 排序後的兩個列表
    """
    list1, list2 = (list(t) for t in zip(*sorted(zip(list1, list2))))
    return list1, list2


def mkdir_if_not_exist(dir_name, is_delete=False):
    """
    創建文件夾
    :param dir_name: 文件夾
    :param is_delete: 是否刪除
    :return: 是否成功
    """
    try:
        if is_delete:
            if os.path.exists(dir_name):
                shutil.rmtree(dir_name)
                print('[Info] 文件夾 "%s" 存在, 刪除文件夾.' % dir_name)

        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
            print('[Info] 文件夾 "%s" 不存在, 創建文件夾.' % dir_name)
        return True
    except Exception as e:
        print('[Exception] %s' % e)
        return False


def traverse_dir_files(root_dir, ext=None):
    """
    列出文件夾中的文件, 深度遍歷
    :param root_dir: 根目錄
    :param ext: 後綴名
    :return: [文件路徑列表, 文件名稱列表]
    """
    names_list = []
    paths_list = []
    for parent, _, fileNames in os.walk(root_dir):
        for name in fileNames:
            if name.startswith('.'):  # 去除隱藏文件
                continue
            if ext:  # 根據後綴名搜索
                if name.endswith(tuple(ext)):
                    names_list.append(name)
                    paths_list.append(os.path.join(parent, name))
            else:
                names_list.append(name)
                paths_list.append(os.path.join(parent, name))
    if not names_list:  # 文件夾爲空
        return paths_list, names_list
    paths_list, names_list = sort_two_list(paths_list, names_list)
    return paths_list, names_list


def compress_img(in_path, out_path, size=1024):
    img = Image.open(in_path)
    img.thumbnail((size, size))
    img.save(out_path)
    print('Processed: {}'.format(out_path))


def process_folder(in_folder, out_folder, size=1024, n_prc=20):
    mkdir_if_not_exist(out_folder)  # 創建文件夾
    path_list, name_list = traverse_dir_files(in_folder)
    pool = Pool(processes=n_prc)  # 多線程下載

    for in_path, name in zip(path_list, name_list):
        out_path = os.path.join(out_folder, name)

        pool.apply_async(compress_img, (in_path, out_path, size))

    pool.close()
    pool.join()
    print('全部處理完成')


def parse_args():
    """
    處理腳本參數,支持相對路徑
    :return: in_folder 輸入文件夾, out_folder 輸出文件夾, size 尺寸, n_prc 進程數
    """
    parser = argparse.ArgumentParser(description='壓縮圖片腳本')
    parser.add_argument('-i', dest='in_folder', required=True, help='輸入文件夾', type=str)
    parser.add_argument('-o', dest='out_folder', required=True, help='輸出文件夾', type=str)
    parser.add_argument('-s', dest='size', required=False, default=1024, help='最長邊', type=str)
    parser.add_argument('-p', dest='n_prc', required=False, default=20, help='進程數', type=str)
    args = parser.parse_args()

    in_folder = args.in_folder
    print("文件路徑:%s" % in_folder)

    out_folder = args.out_folder
    print("輸出文件夾:%s" % out_folder)
    size = int(args.size)
    n_prc = int(args.n_prc)

    print('圖片尺寸: {}, 進程數: {}'.format(size, n_prc))

    return in_folder, out_folder, size, n_prc


def main():
    arg_img, arg_out, size, n_prc = parse_args()
    mkdir_if_not_exist(arg_out)  # 新建文件夾
    process_folder(arg_img, arg_out, size, n_prc)


if __name__ == '__main__':
    main()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章