圖像下載腳本:
#!/usr/bin/env python
# -- coding: utf-8 --
"""
Copyright (c) 2018. All rights reserved.
Created by C. L. Wang on 2018/7/9
"""
import argparse
import os
import shutil
from datetime import datetime
from multiprocessing.pool import Pool
import requests
def get_current_time_str():
"""
輸入當天的日期格式, 20170718_1137
:return: 20170718_1137
"""
return datetime.now().strftime('%Y%m%d%H%M%S')
logfile = 'download_log_{}.log'.format(get_current_time_str()) # 日誌文件
def download_img(img_url, out_folder, imgs_names, img_name=None):
"""
下載圖片
:param img_url: 圖片URL
:param out_folder: 輸出文件夾
:param imgs_names: 已有圖片
:param img_name: 圖片名稱
:return: None
"""
if not img_name:
img_name = img_url.split('/')[-1] # 圖片文件名
if img_name in imgs_names:
print_info('圖片已存在: %s' % img_name)
return
img_data = requests.get(img_url).content
out_file = os.path.join(out_folder, img_name) # 輸出文件
with open(out_file, 'wb') as hl:
hl.write(img_data)
print_info('圖片已下載: %s' % img_name)
def download_imgs_for_mp(img_file, out_folder, n_prc=10, prefix=None):
"""
多線程下載
:param img_file: 圖片文件
:param out_folder: 輸出文件夾
:param prefix: 圖片前綴
:param n_prc: 進程數, 默認40個
:return: None
"""
print_info('進程總數: %s' % n_prc)
pool = Pool(processes=n_prc) # 多線程下載
paths_list = read_file(img_file)
print_info('文件數: %s' % len(paths_list))
_, imgs_names = traverse_dir_files(out_folder)
for (index, path) in enumerate(paths_list):
if prefix:
pool.apply_async(download_img, (path, out_folder, imgs_names, prefix + '_' + str(index) + '.jpg'))
else:
pool.apply_async(download_img, (path, out_folder, imgs_names))
pool.close()
pool.join()
# _, imgs_names = traverse_dir_files(out_folder)
# print_info('圖片總數: %s' % len(imgs_names))
print_info('全部下載完成')
def parse_args():
"""
處理腳本參數,支持相對路徑
img_file 文件路徑,默認文件夾:img_downloader/urls
out_folder 輸出文件夾,默認文件夾:img_data
:return: arg_img,文件路徑;out_folder,輸出文件夾
"""
parser = argparse.ArgumentParser(description='下載數據腳本')
parser.add_argument('-i', dest='img_file', required=True, help='文件路徑', type=str)
parser.add_argument('-o', dest='out_folder', required=True, help='輸出文件夾', type=str)
parser.add_argument('-p', dest='n_prc', required=False, default=20, help='進程數', type=str)
args = parser.parse_args()
arg_img = args.img_file
print_info("文件路徑:%s" % arg_img)
arg_out = args.out_folder
print_info("輸出文件夾:%s" % arg_out)
arg_npc = args.n_prc
print_info("進程數:%s" % arg_npc)
return arg_img, arg_out, arg_npc
def write_line(file_name, line):
"""
將行數據寫入文件
:param file_name: 文件名
:param line: 行數據
:return: None
"""
if file_name == "":
return
with open(file_name, "a+", encoding='utf8') as fs:
if type(line) is (tuple or list):
fs.write("%s\n" % ", ".join(line))
else:
fs.write("%s\n" % line)
def print_info(log_str):
"""
打印日誌
:param log_str: 日誌信息
:return: None
"""
log_str = u'[Info {}] {}'.format(get_current_time_str(), str(log_str))
write_line(logfile, log_str)
print(log_str)
def mkdir_if_not_exist(dir_name, is_delete=False):
"""
創建文件夾
:param dir_name: 文件夾
:param is_delete: 是否刪除
:return: 是否成功
"""
try:
if is_delete:
if os.path.exists(dir_name):
shutil.rmtree(dir_name)
print('[Info] 文件夾 "%s" 存在, 刪除文件夾.' % dir_name)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
print('[Info] 文件夾 "%s" 不存在, 創建文件夾.' % dir_name)
return True
except Exception as e:
print('[Exception] %s' % e)
return False
def traverse_dir_files(root_dir, ext=None):
"""
列出文件夾中的文件, 深度遍歷
:param root_dir: 根目錄
:param ext: 後綴名
:return: [文件路徑列表, 文件名稱列表]
"""
names_list = []
paths_list = []
for parent, _, fileNames in os.walk(root_dir):
for name in fileNames:
if name.startswith('.'): # 去除隱藏文件
continue
if ext: # 根據後綴名搜索
if name.endswith(tuple(ext)):
names_list.append(name)
paths_list.append(os.path.join(parent, name))
else:
names_list.append(name)
paths_list.append(os.path.join(parent, name))
if not names_list: # 文件夾爲空
return paths_list, names_list
paths_list, names_list = sort_two_list(paths_list, names_list)
return paths_list, names_list
def sort_two_list(list1, list2):
"""
排序兩個列表
:param list1: 列表1
:param list2: 列表2
:return: 排序後的兩個列表
"""
list1, list2 = (list(t) for t in zip(*sorted(zip(list1, list2))))
return list1, list2
def read_file(data_file, mode='more'):
"""
讀文件, 原文件和數據文件
:return: 單行或數組
"""
try:
with open(data_file, 'r') as f:
if mode == 'one':
output = f.read()
return output
elif mode == 'more':
output = f.readlines()
output = [o.strip() for o in output]
return output
else:
return list()
except IOError:
return list()
def main():
"""
入口函數
"""
arg_img, arg_out, arg_npc = parse_args()
mkdir_if_not_exist(arg_out) # 新建文件夾
download_imgs_for_mp(arg_img, arg_out, arg_npc)
if __name__ == '__main__':
main()
圖像壓縮腳本:
#!/usr/bin/env python
# -- coding: utf-8 --
"""
Copyright (c) 2018. All rights reserved.
Created by C. L. Wang on 2018/8/8
"""
import os
import shutil
import argparse
from multiprocessing import Pool
from PIL import Image
def sort_two_list(list1, list2):
"""
排序兩個列表
:param list1: 列表1
:param list2: 列表2
:return: 排序後的兩個列表
"""
list1, list2 = (list(t) for t in zip(*sorted(zip(list1, list2))))
return list1, list2
def mkdir_if_not_exist(dir_name, is_delete=False):
"""
創建文件夾
:param dir_name: 文件夾
:param is_delete: 是否刪除
:return: 是否成功
"""
try:
if is_delete:
if os.path.exists(dir_name):
shutil.rmtree(dir_name)
print('[Info] 文件夾 "%s" 存在, 刪除文件夾.' % dir_name)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
print('[Info] 文件夾 "%s" 不存在, 創建文件夾.' % dir_name)
return True
except Exception as e:
print('[Exception] %s' % e)
return False
def traverse_dir_files(root_dir, ext=None):
"""
列出文件夾中的文件, 深度遍歷
:param root_dir: 根目錄
:param ext: 後綴名
:return: [文件路徑列表, 文件名稱列表]
"""
names_list = []
paths_list = []
for parent, _, fileNames in os.walk(root_dir):
for name in fileNames:
if name.startswith('.'): # 去除隱藏文件
continue
if ext: # 根據後綴名搜索
if name.endswith(tuple(ext)):
names_list.append(name)
paths_list.append(os.path.join(parent, name))
else:
names_list.append(name)
paths_list.append(os.path.join(parent, name))
if not names_list: # 文件夾爲空
return paths_list, names_list
paths_list, names_list = sort_two_list(paths_list, names_list)
return paths_list, names_list
def compress_img(in_path, out_path, size=1024):
img = Image.open(in_path)
img.thumbnail((size, size))
img.save(out_path)
print('Processed: {}'.format(out_path))
def process_folder(in_folder, out_folder, size=1024, n_prc=20):
mkdir_if_not_exist(out_folder) # 創建文件夾
path_list, name_list = traverse_dir_files(in_folder)
pool = Pool(processes=n_prc) # 多線程下載
for in_path, name in zip(path_list, name_list):
out_path = os.path.join(out_folder, name)
pool.apply_async(compress_img, (in_path, out_path, size))
pool.close()
pool.join()
print('全部處理完成')
def parse_args():
"""
處理腳本參數,支持相對路徑
:return: in_folder 輸入文件夾, out_folder 輸出文件夾, size 尺寸, n_prc 進程數
"""
parser = argparse.ArgumentParser(description='壓縮圖片腳本')
parser.add_argument('-i', dest='in_folder', required=True, help='輸入文件夾', type=str)
parser.add_argument('-o', dest='out_folder', required=True, help='輸出文件夾', type=str)
parser.add_argument('-s', dest='size', required=False, default=1024, help='最長邊', type=str)
parser.add_argument('-p', dest='n_prc', required=False, default=20, help='進程數', type=str)
args = parser.parse_args()
in_folder = args.in_folder
print("文件路徑:%s" % in_folder)
out_folder = args.out_folder
print("輸出文件夾:%s" % out_folder)
size = int(args.size)
n_prc = int(args.n_prc)
print('圖片尺寸: {}, 進程數: {}'.format(size, n_prc))
return in_folder, out_folder, size, n_prc
def main():
arg_img, arg_out, size, n_prc = parse_args()
mkdir_if_not_exist(arg_out) # 新建文件夾
process_folder(arg_img, arg_out, size, n_prc)
if __name__ == '__main__':
main()