目錄
引言
最近在做自己的數據集,發現採集到的圖像尺寸很大(大部分是手機拍攝的)、圖片格式不統一。
我們在做深度學習和機器學習的時候,自己製作數據集格式應該是統一的,本文的腳本的功能就是對數據集中的圖像進行統一式整理。
功能包括:
批量圖像重命名、批量圖像尺寸轉換、批量圖像剪切、批量圖像格式轉換、測試圖像大小、測試標籤情況。
程序介紹
程序1:批量調整圖像尺寸
功能:在編輯器內運行,批量resize圖像,並另保存。
函數輸入:image_path=image_path, save_path=save_path, x=1920, y=1080, quality=95, dpi_x=72, dpi_y=72
對應:輸入圖像路徑、輸出圖像路徑、x,y,質量、dpi的x值,dpi的y值;若沒有輸入某些參數,將按默認值運行
示例:
resize(image_path=image_path, save_path=save_path, x=1920, y=1080, quality=95, dpi_x=72, dpi_y=72)
代碼:
import cv2
import os
from PIL import Image
image_path = './image/' # 圖像輸入路徑
save_path = './resize/' # 圖像輸出路徑
# 路徑不要有中文
def resize(image_path, save_path, x=256, y=256, quality=95, dpi_x=72.0, dpi_y=72.0):
# 設置圖像的輸入、輸出、需要resize的大小,質量、和dpi值
im_name = os.listdir(image_path)
paths = []
for name in im_name:
path = os.path.join(image_path, name)
paths += [path]
for i, path in enumerate(paths):
im = cv2.imread(path) # 讀取圖像
print("Extract %s image, %d of %d images" % (im_name[i], i + 1, len(paths)))
im_resize = cv2.resize(im, (x, y)) # resize圖像大小
im_dpi = Image.fromarray(cv2.cvtColor(im_resize, cv2.COLOR_BGR2RGB))
im_dpi.save(save_path + im_name[i], quality=quality, dpi=(dpi_x, dpi_y)) # 保存
resize(image_path=image_path, save_path=save_path, x=1920, y=1080, quality=95, dpi_x=72, dpi_y=72)
運行結果,以其中一個圖像爲例:
程序2:批量圖像尺寸調整
功能:使用命令行運行,批量resize圖像,並另保存。
函數輸入:image_path=image_path, save_path=save_path, x=1920, y=1080, quality=95, dpi_x=72, dpi_y=72
對應:輸入圖像路徑、輸出圖像路徑、x,y,質量、dpi的x值,dpi的y值;若沒有輸入某些參數,將按默認值運行。
示例:在命令行輸入
python image_resize_args.py --image_path ./image/ --save_path ./resize/ -x 1024 -y 720 -quality 95 -dpi_x 72 -dpi_y 72
代碼:
import cv2
import os
from PIL import Image
import argparse as ap
# Get the path of the training set
parser = ap.ArgumentParser()
parser.add_argument("--image_path", help='輸入圖片來源路徑', required="True")
parser.add_argument("--save_path", help='輸入圖片存儲路徑', required="True")
parser.add_argument("-x", type=int, help='輸入圖像的長度', default=256, required="True")
parser.add_argument("-y", type=int, default=256, help='輸入圖像的寬度', required="True")
parser.add_argument("-quality", type=int, default=95, help='輸入圖像存儲質量', required = "True")
parser.add_argument("-dpi_x", type=int, default=72, help='輸入圖像dpi_x', required="True")
parser.add_argument("-dpi_y", type=int, default=72, help='輸入圖像dpi_y', required="True")
args = vars(parser.parse_args())
image_path = args["image_path"]
save_path = args["save_path"]
x = args["x"]
y = args["y"]
quality = args["quality"]
dpi_x = args["dpi_x"]
dpi_y = args["dpi_y"]
def resize(image_path, save_path, x=256, y=256, quality=95, dpi_x=72, dpi_y=72):
im_name = os.listdir(image_path)
paths = []
for name in im_name:
path = os.path.join(image_path, name)
paths += [path]
for i, path in enumerate(paths):
im = cv2.imread(path)
print("Extract %s image, %d of %d images" % (im_name[i], i + 1, len(paths)))
im_resize = cv2.resize(im, (x, y))
im_dpi = Image.fromarray(cv2.cvtColor(im_resize, cv2.COLOR_BGR2RGB))
im_dpi.save(save_path + im_name[i], quality=quality, dpi=(dpi_x, dpi_y))
print(image_path, save_path, x, y, quality, dpi_x, dpi_y)
print(type(image_path), type(save_path), type(x), type(y), type(quality), type(dpi_x), type(dpi_y))
resize(image_path=image_path, save_path=save_path, x=x, y=y, quality=quality, dpi_x=dpi_x, dpi_y=dpi_y)
運行結果,以其中一個圖像爲例:
程序3:批量圖像重命名
功能:對文件夾內的圖像進行重名,
函數輸入:無,只需調整腳本里的path和Newdir = os.path.join(path, str(count).zfill(4) + filetype)中的4這個位置,換成你想要的位數。
代碼:
# 按順序修改圖片的名字
# 修改一個文件夾下所有圖片的名字,修改成000000.jpg格式
import os
path = './image'
filelist = os.listdir(path)
# filelist.sort()
count = 0
for file in filelist:
print(file)
Olddir = os.path.join(path, file)
if os.path.isdir(Olddir):
continue
filename = os.path.splitext(file)[0]
filetype = os.path.splitext(file)[1]
Newdir = os.path.join(path, str(count).zfill(4) + filetype)
os.rename(Olddir, Newdir)
count += 1
zfill(5)的運行結果
程序4:圖像裁剪
功能:將原先圖像裁剪成自定義的尺寸
函數輸入:res = cut(image_path, save_path, 832, 832),注意修改路徑和尺寸
對應:圖像路徑,裁剪後的保存路徑,需要得到的x,需要得到的y
代碼:
# -*- coding:utf-8 -*-
from PIL import Image
import os
def cut(image_path, save_path, vx, vy):
count = 0
im_name = os.listdir(image_path)
paths = []
for name in im_name:
path = os.path.join(image_path, name)
paths += [path]
for i, path in enumerate(paths):
name = (path.split('/')[-1]).split('.')[0]
name2 = save_path + name + '_'
im = Image.open(path)
w = im.size[0]
h = im.size[1]
# print(w, h)
# 偏移量
dx = 300
dy = 300
n = 1
# 左上角切割
x1 = 0
y1 = 0
x2 = vx
y2 = vy
# 縱向
while x2 <= h:
while y2 <= w:
name3 = name2 + '%06d' % (n) + ".jpg"
# print(n, x1, y1, x2, y2)
im2 = im.crop((y1, x1, y2, x2))
im2.save(name3)
y1 = y1 + dy
y2 = y1 + vy
n = n + 1
if y2 >= w:
name3 = name2 + '%06d' % (n) + ".jpg"
# print(n, x1, y1, x2, y2)
y1 = w - vy
y2 = w
im2 = im.crop((y1, x1, y2, x2))
im2.save(name3)
# print n, x1, y1, x2, y2
n = n + 1
x1 = x1 + dx
x2 = x1 + vx
y1 = 0
y2 = vy
x1 = h - vx
x2 = h
y1 = 0
y2 = vy
while y2 <= w:
name3 = name2 + '%06d' % (n) + ".jpg"
# print(n, x1, y1, x2, y2)
im2 = im.crop((y1, x1, y2, x2))
im2.save(name3)
y1 = y1 + dy
y2 = y1 + vy
n = n + 1
if y2 >= w:
name3 = name2 + '%06d' % (n) + ".jpg"
# print(n, x1, y1, x2, y2)
y1 = w - vy
y2 = w
im2 = im.crop((y1, x1, y2, x2))
im2.save(name3)
n = n + 1
print(path + "切割成功,切割得到的子圖片數爲", n - 1, ':', i + 1, '/', len(paths))
count += n
return count
if __name__ == "__main__":
image_path = './image/'
save_path = './image_cut/'
# 切割圖片的面積 vx,vy
# 大
res = cut(image_path, save_path, 832, 832)
# 中
# res = cut(id,120,120)
# 小
# res = cut(id,80,80)
print('all sub image:', res)
程序5:圖像格式轉換
功能:圖像的格式有png jpg等,把路徑裏的png轉成jpg或者 JPEG轉成jpg等,需要怎麼轉換,可以自己定義。
注意:dirName是圖像存在的路徑,newname[-1] == "png"是需要轉換的格式(轉換之前),newname[-1] = "jpg"是想要轉換的格式(轉換之後)
下面腳本實現的是把,png轉成.jpg格式。
代碼:
# 把某種類型的圖片改爲.jpg格式
import os
import string
dirName = './image/'
li = os.listdir(dirName)
for filename in li:
newname = filename
newname = newname.split(".")
if newname[-1] == "png": # 這裏是你圖片的原格式的後綴
newname[-1] = "jpg"
newname = str.join(".", newname) # 這裏要用str.join
filename = dirName + filename
newname = dirName + newname
os.rename(filename, newname)
print(newname, "updated successfully")
程序6:測試圖像尺寸是否是指定大小
功能:測試文件下圖像尺寸是否是指定大小,將尺寸不正確的圖像名輸出
代碼:
import os
import cv2
im_path = 'myData/VOC2007/JPEGImages'
x = 1920
y = 1080
ims = os.listdir(im_path)
ims.sort()
for i, name in enumerate(ims):
im = cv2.imread(im_path + '/' + name)
n, m, r = im.shape
if n != y:
print(name)
elif m != x:
print(name)
else:
continue
print('over')
程序7:測試圖像的ground truth
功能:測試使用LabelImg打過標籤後的標註情況,查看標籤是否正確,程序運行後,按空格繼續查看。
代碼:
# -*- coding: utf-8 -*-
import os
import random
import cv2 as cv
import matplotlib.pyplot as plt
labels = ["TA"]
color_list = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (0, 255, 255)]
img_dir = "myData/VOC2007/JPEGImages"
yolo_txt_dir = "myData/VOC2007/labels"
# result_dst_dir = "/home/youyheng/DJIdata/robomaster_Final_Tournament/check_label_result"
scale_percent = 80
# rates that represent the imgs of all datasets
# 1 for all imgs, 0.5 for half of the imgs
check_rate = 1
random_check = False
def cv_imread(file_path):
img = plt.imread(file_path)
img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
return img_rgb
def my_line(img, start, end):
thickness = 2
line_type = 8
cv.line(img,
start,
end,
(0, 0, 0),
thickness,
line_type)
# draw rectangle with the data caught in the data file
# And set the name of the label to it
def draw_label_rec(img, label_index, label_info_list, img_name):
global labels
img_height = img.shape[0]
img_width = img.shape[1]
x = float(label_info_list[0])
y = float(label_info_list[1])
w = float(label_info_list[2])
h = float(label_info_list[3])
x_center = x * img_width
y_center = y * img_height
xmax = int(x_center + w * img_width / 2)
xmin = int(x_center - w * img_width / 2)
ymax = int(y_center + w * img_height / 2)
ymin = int(y_center - w * img_height / 2)
# Set font
font = cv.FONT_HERSHEY_SIMPLEX
global color_list
# draw_rectangle
cv.rectangle(img, # img to paint on
(xmin, ymin), # bottom top
(xmax, ymax), # bottom right
color_list[int(label_index)], # bgr color
2) # line thickness
###########need perfection
cv.putText(img, str(img_name), (5, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
def main():
global img_dir, yolo_txt_dir, labels, random_check
origin_window = "Origin Window"
# Load all imgs with label info
img_name_list = os.listdir(img_dir)
img_name_list.sort()
if random_check is True:
random.shuffle(img_name_list)
check_max_times = int(check_rate * len(img_name_list))
for index, img_name in enumerate(img_name_list):
if not img_name.endswith('jpg'):
continue
# Checked for max_times and quit
if index >= check_max_times:
return
print("**check img : {0} **".format(os.path.join(img_dir, img_name)))
# Open IMG
src_image = cv_imread(os.path.join(img_dir, img_name))
# Open yolo label txt
if os.path.exists(os.path.join(yolo_txt_dir, img_name.rpartition(".")[0] + ".txt")):
file_reader = open(os.path.join(yolo_txt_dir, img_name.rpartition(".")[0] + ".txt"), "r")
else:
continue
## Dada loaded ##
if src_image is None:
print("Open image Error")
return
if file_reader is None:
print("Open txt error")
return
# Pre-handling for Img
src_height = src_image.shape[0]
src_width = src_image.shape[1]
# percent of original size
global scale_percent
width = int(src_width * scale_percent / 100)
height = int(src_height * scale_percent / 100)
dim = (width, height)
# Decode the data
while True:
line = file_reader.readline()
if not line:
break
label_info_list = line.split()
# Get 5 nums in labeled_obj_info_list:
# labels[label_info_list[0]] obj type : 0 ArmorBlue, 1 ArmorRed, 2 Base, 3 Watcher
# label_info_list[1] x
# label_info_list[2] y
# label_info_list[3] w
# label_info_list[4] h
label_index = int(label_info_list[0])
x = label_info_list[1]
y = label_info_list[2]
w = label_info_list[3]
h = label_info_list[4]
########################
# need perfection
draw_label_rec(src_image, label_index, [x, y, w, h], img_name)
resized_src = cv.resize(src_image, dim, interpolation=cv.INTER_CUBIC)
# show the result
cv.imshow(origin_window, resized_src)
cv.waitKey(0)
# Debug
# print("src_height = {0}".format(src_height))
# print("src_width = {0}".format(src_width))
cv.destroyAllWindows()
file_reader.close()
print("**check over**")
if __name__ == "__main__":
main()
注意事項
首先要安裝需要的庫,其次注意在命令行輸入時不要多輸入、漏輸入空格。
這些腳本可以二次開發,只需按照自己的要求修改,就可以實現對其他文件的類似操作
------------------------------------------------------------------------------------------------------------------------------------
2020-6-11更新:增加圖像剪切、圖像重命名、圖像格式轉換功能、尺寸測試、xml測試,同時修改標題、引言部分。