圖像增廣(image augmentation)技術通過對訓練圖像做一系列隨機改變,來產生相似但又不同的訓練樣本,從而擴大訓練數據集的規模。
圖像增廣的另一種解釋是,隨機改變訓練樣本可以降低模型對某些屬性的依賴,從而提高模型的泛化能力。
工作流程
- 單一圖片增廣處理流程
- 整個文件夾的圖片增廣處理流程
輔助工作
讀取圖片
使用OpenCV讀取
filename = "image/test.jpg"
image = cv2.imread(filename)
讀取標註XML文件
根據XML標註文件得到標註列表[x_min, y_min, x_max, y_max, cat_name]的列表
def getBoxes(self, image_name):
"""
根據XML標註文件得到標註列表[x_min, y_min, x_max, y_max, cat_name]的列表
:param image_name:
:return:
"""
tree = ET.parse(image_name + '.xml')
root = tree.getroot()
boxes = []
for object in root.findall('object'):
temp_list = []
name = object.find('name').text
for coordinate in object.find('bndbox'):
temp_list.append(int(coordinate.text))
temp_list.append(name)
boxes.append(temp_list)
# print(boxes)
return boxes
保存變換後的圖片
使用OpenCV保存
current_time = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
current_num = str(random.randint(0, 9999))
save_image_name = folder + '/' + current_time + '_' + str(i) + current_num + '.jpg'
print("save image name: " + save_image_name)
cv2.imwrite(save_image_name, change_img)
保存更新過的標註
def saveXML(self, image_name, xml_name, boxes, shape1, shape0):
print("xml name ====================================" + xml_name)
print("image name ====================================" + xml_name)
folder = ET.Element('folder')
folder.text = 'image'
filename = ET.Element('filename')
filename.text = image_name
path = ET.Element('path')
curr_path = os.getcwd()
path.text = curr_path + '\\image\\' + image_name
source = ET.Element('source')
database = ET.SubElement(source, 'database')
database.text = 'Unknown'
size = ET.Element('size')
width = ET.SubElement(size, 'width')
width.text = str(shape1)
height = ET.SubElement(size, 'height')
height.text = str(shape0)
depth = ET.SubElement(size, 'depth')
depth.text = '3'
segmented = ET.Element('segmented')
segmented.text = '0'
root = ET.Element('annotation')
root.extend((folder, filename, path))
root.extend((source, size, segmented))
for box in boxes:
object = ET.Element('object')
name = ET.SubElement(object, 'name')
name.text = box[4]
pose = ET.SubElement(object, 'pose')
pose.text = 'Unspecified'
truncated = ET.SubElement(object, 'truncated')
truncated.text = '0'
difficult = ET.SubElement(object, 'difficult')
difficult.text = '0'
bndbox = ET.SubElement(object, 'bndbox')
xmin = ET.SubElement(bndbox, 'xmin')
xmin.text = str(box[0])
ymin = ET.SubElement(bndbox, 'ymin')
ymin.text = str(box[1])
xmax = ET.SubElement(bndbox, 'xmax')
xmax.text = str(box[2])
ymax = ET.SubElement(bndbox, 'ymax')
ymax.text = str(box[3])
root.extend((object,))
tree = ET.ElementTree(root)
tree.write(xml_name)
tree = ET.parse(xml_name) # 解析movies.xml這個文件
root = tree.getroot() # 得到根元素,Element類
self.pretty_xml(root, '\t', '\n') # 執行美化方法
tree.write(xml_name)
美化更新過的XML
def pretty_xml(self, element, indent, newline, level=0): # elemnt爲傳進來的Elment類,參數indent用於縮進,newline用於換行
if element: # 判斷element是否有子元素
if (element.text is None) or element.text.isspace(): # 如果element的text沒有內容
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# else: # 此處兩行如果把註釋去掉,Element的text也會另起一行
# element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # 將element轉成list
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最後一個元素,說明下一個行是同級別元素的起始,縮進應一致
subelement.tail = newline + indent * (level + 1)
else: # 如果是list的最後一個元素, 說明下一行是母元素的結束,縮進應該少一個
subelement.tail = newline + indent * level
self.pretty_xml(subelement, indent, newline, level=level + 1) # 對子元素進行遞歸操作
圖像及座標變換
1-裁切
def __cropImage(self, img, boxes):
"""
裁切
:param img: 圖像
:param bboxes: 該圖像包含的所有boundingboxes,一個list,每個元素爲[x_min,y_min,x_max,y_max]
:return: crop_img:裁剪後的圖像;crop_bboxes:裁剪後的boundingbox的座標,list
"""
# 裁剪圖像
w = img.shape[1]
h = img.shape[0]
x_min = w
x_max = 0
y_min = h
y_max = 0
# 最小區域
for bbox in boxes:
x_min = min(x_min, bbox[0])
y_min = min(y_min, bbox[1])
x_max = max(x_max, bbox[2])
y_max = max(y_max, bbox[3])
name = bbox[4]
# 包含所有目標框的最小框到各個邊的距離
d_to_left = x_min
d_to_right = w - x_max
d_to_top = y_min
d_to_bottom = h - y_max
# 隨機擴展這個最小範圍
crop_x_min = int(x_min - random.uniform(0, d_to_left))
crop_y_min = int(y_min - random.uniform(0, d_to_top))
crop_x_max = int(x_max + random.uniform(0, d_to_right))
crop_y_max = int(y_max + random.uniform(0, d_to_bottom))
# 確保不出界
crop_x_min = max(0, crop_x_min)
crop_y_min = max(0, crop_y_min)
crop_x_max = min(w, crop_x_max)
crop_y_max = min(h, crop_y_max)
crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
# 裁剪bounding boxes
crop_bboxes = list()
for bbox in boxes:
crop_bboxes.append([int(bbox[0] - crop_x_min), int(bbox[1] - crop_y_min),
int(bbox[2] - crop_x_min), int(bbox[3] - crop_y_min), bbox[4]])
return crop_img, crop_bboxes
2-平移
def __translationImage(self, img, boxes):
"""
平移
:param img: img
:param bboxes: bboxes:該圖像包含的所有boundingboxes,一個list,每個元素爲[x_min,y_min,x_max,y_max]
:return: shift_img:平移後的圖像array;shift_bboxes:平移後的boundingbox的座標,list
"""
# 平移圖像
w = img.shape[1]
h = img.shape[0]
x_min = w
x_max = 0
y_min = h
y_max = 0
for bbox in boxes:
x_min = min(x_min, bbox[0])
y_min = min(y_min, bbox[1])
x_max = max(x_max, bbox[2])
y_max = max(x_max, bbox[3])
name = bbox[4]
# 包含所有目標框的最小框到各個邊的距離,即每個方向的最大移動距離
d_to_left = x_min
d_to_right = w - x_max
d_to_top = y_min
d_to_bottom = h - y_max
# 在矩陣第一行中表示的是[1,0,x],其中x表示圖像將向左或向右移動的距離,如果x是正值,則表示向右移動,如果是負值的話,則表示向左移動。
# 在矩陣第二行表示的是[0,1,y],其中y表示圖像將向上或向下移動的距離,如果y是正值的話,則向下移動,如果是負值的話,則向上移動。
x = random.uniform(-(d_to_left / 3), d_to_right / 3)
y = random.uniform(-(d_to_top / 3), d_to_bottom / 3)
M = np.float32([[1, 0, x], [0, 1, y]])
# 仿射變換
shift_img = cv2.warpAffine(img, M,
(img.shape[1], img.shape[0])) # 第一個參數表示我們希望進行變換的圖片,第二個參數是我們的平移矩陣,第三個希望展示的結果圖片的大小
# 平移boundingbox
shift_bboxes = list()
for bbox in boxes:
shift_bboxes.append([int(bbox[0] + x), int(bbox[1] + y), int(bbox[2] + x), int(bbox[3] + y), bbox[4]])
return shift_img, shift_bboxes
3-改變亮度
def __changeLightofImage(self, img, boxes):
"""
改變亮度
:param img: 圖像
:return: img:改變亮度後的圖像array
"""
'''
adjust_gamma(image, gamma=1, gain=1)函數:
gamma>1時,輸出圖像變暗,小於1時,輸出圖像變亮
'''
flag = random.uniform(0.5, 1.5) ##flag>1爲調暗,小於1爲調亮
newBoxes = copy.deepcopy(boxes)
newImage = exposure.adjust_gamma(img, flag)
return newImage, newBoxes
4-高斯噪聲
def __addNoiseToImage(self, img, boxes):
"""
加入噪聲
:param img: 圖像
:return: img:加入噪聲後的圖像array,由於輸出的像素是在[0,1]之間,所以得乘以255
"""
newBoxes = copy.deepcopy(boxes)
newImage = random_noise(img, mode='gaussian', clip=True) * 255
return newImage, newBoxes
5-旋轉
def __rotateImage(self, img, boxes):
"""
旋轉
:param img: 圖像
:param boxes:
:param angle: 旋轉角度
:param scale: 默認1
:return: rot_img:旋轉後的圖像array;rot_bboxes:旋轉後的boundingbox座標list
"""
'''
輸入:
img:array,(h,w,c)
bboxes:該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
angle:
scale:默認1
輸出:
'''
# 旋轉圖像
w = img.shape[1]
h = img.shape[0]
angle = random.uniform(-45, 45)
scale = random.uniform(0.5, 1.5)
# 角度變弧度
rangle = np.deg2rad(angle)
# 計算新圖像的寬度和高度,分別爲最高點和最低點的垂直距離
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
# 獲取圖像繞着某一點的旋轉矩陣
# getRotationMatrix2D(Point2f center, double angle, double scale)
# Point2f center:表示旋轉的中心點
# double angle:表示旋轉的角度
# double scale:圖像縮放因子
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale) # 返回 2x3 矩陣
# 新中心點與舊中心點之間的位置
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# 仿射變換
rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
flags=cv2.INTER_LANCZOS4) # ceil向上取整
# 矯正boundingbox
# rot_mat是最終的旋轉矩陣
# 獲取原始bbox的四個中點,然後將這四個點轉換到旋轉後的座標系下
rot_bboxes = list()
for bbox in boxes:
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
name = bbox[4]
point1 = np.dot(rot_mat, np.array([(x_min + x_max) / 2, y_min, 1]))
point2 = np.dot(rot_mat, np.array([x_max, (y_min + y_max) / 2, 1]))
point3 = np.dot(rot_mat, np.array([(x_min + x_max) / 2, y_max, 1]))
point4 = np.dot(rot_mat, np.array([x_min, (y_min + y_max) / 2, 1]))
# 合併np.array
concat = np.vstack((point1, point2, point3, point4)) # 在豎直方向上堆疊
# 改變array類型
concat = concat.astype(np.int32)
# 得到旋轉後的座標
rx, ry, rw, rh = cv2.boundingRect(concat)
rx_min = rx
ry_min = ry
rx_max = rx + rw
ry_max = ry + rh
# 加入list中
rot_bboxes.append([rx_min, ry_min, rx_max, ry_max, name])
return rot_img, rot_bboxes
6-鏡像
def __flipImage(self, img, bboxes):
"""
鏡像
:param self:
:param img:
:param bboxes:
:return:
"""
'''
鏡像後的圖片要包含所有的框
輸入:
img:圖像array
bboxes:該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
輸出:
flip_img:鏡像後的圖像array
flip_bboxes:鏡像後的bounding box的座標list
'''
# 鏡像圖像
import copy
flip_img = copy.deepcopy(img)
if random.random() < 0.5:
horizon = True
else:
horizon = False
h, w, _ = img.shape
if horizon: # 水平翻轉
flip_img = cv2.flip(flip_img, -1)
else:
flip_img = cv2.flip(flip_img, 0)
# ---------------------- 矯正boundingbox ----------------------
flip_bboxes = list()
for bbox in bboxes:
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
name = bbox[4]
if horizon:
flip_bboxes.append([w - x_max, y_min, w - x_min, y_max, name])
else:
flip_bboxes.append([x_min, h - y_max, x_max, h - y_min, name])
return flip_img, flip_bboxes
完整代碼
import os
import cv2
import math
import argparse
import numpy as np
import copy
from skimage import exposure
from skimage.util import random_noise
import random
import datetime
import xml.etree.ElementTree as ET
"""
YOLO圖像增廣
"""
class ImageAugmentation:
def __init__(self):
pass
def getBoxes(self, image_name):
"""
根據XML標註文件得到標註列表[x_min, y_min, x_max, y_max, cat_name]的列表
:param image_name:
:return:
"""
tree = ET.parse(image_name + '.xml')
root = tree.getroot()
boxes = []
for object in root.findall('object'):
temp_list = []
name = object.find('name').text
for coordinate in object.find('bndbox'):
temp_list.append(int(coordinate.text))
temp_list.append(name)
boxes.append(temp_list)
# print(boxes)
return boxes
def saveXML(self, image_name, xml_name, boxes, shape1, shape0):
print("xml name ====================================" + xml_name)
print("image name ====================================" + xml_name)
folder = ET.Element('folder')
folder.text = 'image'
filename = ET.Element('filename')
filename.text = image_name
path = ET.Element('path')
curr_path = os.getcwd()
path.text = curr_path + '\\image\\' + image_name
source = ET.Element('source')
database = ET.SubElement(source, 'database')
database.text = 'Unknown'
size = ET.Element('size')
width = ET.SubElement(size, 'width')
width.text = str(shape1)
height = ET.SubElement(size, 'height')
height.text = str(shape0)
depth = ET.SubElement(size, 'depth')
depth.text = '3'
segmented = ET.Element('segmented')
segmented.text = '0'
root = ET.Element('annotation')
root.extend((folder, filename, path))
root.extend((source, size, segmented))
for box in boxes:
object = ET.Element('object')
name = ET.SubElement(object, 'name')
name.text = box[4]
pose = ET.SubElement(object, 'pose')
pose.text = 'Unspecified'
truncated = ET.SubElement(object, 'truncated')
truncated.text = '0'
difficult = ET.SubElement(object, 'difficult')
difficult.text = '0'
bndbox = ET.SubElement(object, 'bndbox')
xmin = ET.SubElement(bndbox, 'xmin')
xmin.text = str(box[0])
ymin = ET.SubElement(bndbox, 'ymin')
ymin.text = str(box[1])
xmax = ET.SubElement(bndbox, 'xmax')
xmax.text = str(box[2])
ymax = ET.SubElement(bndbox, 'ymax')
ymax.text = str(box[3])
root.extend((object,))
tree = ET.ElementTree(root)
tree.write(xml_name)
tree = ET.parse(xml_name) # 解析movies.xml這個文件
root = tree.getroot() # 得到根元素,Element類
self.pretty_xml(root, '\t', '\n') # 執行美化方法
tree.write(xml_name)
def pretty_xml(self, element, indent, newline, level=0): # elemnt爲傳進來的Elment類,參數indent用於縮進,newline用於換行
if element: # 判斷element是否有子元素
if (element.text is None) or element.text.isspace(): # 如果element的text沒有內容
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# else: # 此處兩行如果把註釋去掉,Element的text也會另起一行
# element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # 將element轉成list
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最後一個元素,說明下一個行是同級別元素的起始,縮進應一致
subelement.tail = newline + indent * (level + 1)
else: # 如果是list的最後一個元素, 說明下一行是母元素的結束,縮進應該少一個
subelement.tail = newline + indent * level
self.pretty_xml(subelement, indent, newline, level=level + 1) # 對子元素進行遞歸操作
def changeImages(self, folder, function_name, image_name, n):
if function_name == "crop":
function = self.__cropImage
elif function_name == "tran":
function = self.__translationImage
elif function_name == "light":
function = self.__changeLightofImage
elif function_name == "noise":
function = self.__addNoiseToImage
elif function_name == "rotate":
function = self.__rotateImage
elif function_name == "flip":
function = self.__flipImage
image = cv2.imread(image_name + '.jpg')
boxes = self.getBoxes(image_name)
for i in range(1, n + 1):
print(function_name + " image #" + str(i))
change_img, change_boxes = function(copy.deepcopy(image), copy.deepcopy(boxes))
print("Old boxes: ", boxes)
print("New boxes: ", change_boxes)
current_time = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
current_num = str(random.randint(0, 9999))
save_image_name = folder + '/' + current_time + '_' + str(i) + current_num + '.jpg'
save_xml_name = folder + '/' + current_time + '_' + str(i) + current_num + '.xml'
print(image_name)
print("save image name: " + save_image_name)
print("save xml name: " + save_xml_name)
cv2.imwrite(save_image_name, change_img)
self.saveXML(save_image_name, save_xml_name, change_boxes, change_img.shape[1], change_img.shape[0])
print("Save new image to current path: " + save_image_name)
print("Save new xml to current path: " + save_xml_name)
print("\n")
# 1 裁切
def __cropImage(self, img, boxes):
"""
裁切
:param img: 圖像
:param bboxes: 該圖像包含的所有boundingboxes,一個list,每個元素爲[x_min,y_min,x_max,y_max]
:return: crop_img:裁剪後的圖像;crop_bboxes:裁剪後的boundingbox的座標,list
"""
# 裁剪圖像
w = img.shape[1]
h = img.shape[0]
x_min = w
x_max = 0
y_min = h
y_max = 0
# 最小區域
for bbox in boxes:
x_min = min(x_min, bbox[0])
y_min = min(y_min, bbox[1])
x_max = max(x_max, bbox[2])
y_max = max(y_max, bbox[3])
name = bbox[4]
# 包含所有目標框的最小框到各個邊的距離
d_to_left = x_min
d_to_right = w - x_max
d_to_top = y_min
d_to_bottom = h - y_max
# 隨機擴展這個最小範圍
crop_x_min = int(x_min - random.uniform(0, d_to_left))
crop_y_min = int(y_min - random.uniform(0, d_to_top))
crop_x_max = int(x_max + random.uniform(0, d_to_right))
crop_y_max = int(y_max + random.uniform(0, d_to_bottom))
# 確保不出界
crop_x_min = max(0, crop_x_min)
crop_y_min = max(0, crop_y_min)
crop_x_max = min(w, crop_x_max)
crop_y_max = min(h, crop_y_max)
crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
# 裁剪bounding boxes
crop_bboxes = list()
for bbox in boxes:
crop_bboxes.append([int(bbox[0] - crop_x_min), int(bbox[1] - crop_y_min),
int(bbox[2] - crop_x_min), int(bbox[3] - crop_y_min), bbox[4]])
return crop_img, crop_bboxes
# 2-平移
def __translationImage(self, img, boxes):
"""
平移
:param img: img
:param bboxes: bboxes:該圖像包含的所有boundingboxes,一個list,每個元素爲[x_min,y_min,x_max,y_max]
:return: shift_img:平移後的圖像array;shift_bboxes:平移後的boundingbox的座標,list
"""
# 平移圖像
w = img.shape[1]
h = img.shape[0]
x_min = w
x_max = 0
y_min = h
y_max = 0
for bbox in boxes:
x_min = min(x_min, bbox[0])
y_min = min(y_min, bbox[1])
x_max = max(x_max, bbox[2])
y_max = max(x_max, bbox[3])
name = bbox[4]
# 包含所有目標框的最小框到各個邊的距離,即每個方向的最大移動距離
d_to_left = x_min
d_to_right = w - x_max
d_to_top = y_min
d_to_bottom = h - y_max
# 在矩陣第一行中表示的是[1,0,x],其中x表示圖像將向左或向右移動的距離,如果x是正值,則表示向右移動,如果是負值的話,則表示向左移動。
# 在矩陣第二行表示的是[0,1,y],其中y表示圖像將向上或向下移動的距離,如果y是正值的話,則向下移動,如果是負值的話,則向上移動。
x = random.uniform(-(d_to_left / 3), d_to_right / 3)
y = random.uniform(-(d_to_top / 3), d_to_bottom / 3)
M = np.float32([[1, 0, x], [0, 1, y]])
# 仿射變換
shift_img = cv2.warpAffine(img, M,
(img.shape[1], img.shape[0])) # 第一個參數表示我們希望進行變換的圖片,第二個參數是我們的平移矩陣,第三個希望展示的結果圖片的大小
# 平移boundingbox
shift_bboxes = list()
for bbox in boxes:
shift_bboxes.append([int(bbox[0] + x), int(bbox[1] + y), int(bbox[2] + x), int(bbox[3] + y), bbox[4]])
return shift_img, shift_bboxes
# 3-改變亮度
def __changeLightofImage(self, img, boxes):
"""
改變亮度
:param img: 圖像
:return: img:改變亮度後的圖像array
"""
'''
adjust_gamma(image, gamma=1, gain=1)函數:
gamma>1時,輸出圖像變暗,小於1時,輸出圖像變亮
'''
flag = random.uniform(0.5, 1.5) ##flag>1爲調暗,小於1爲調亮
newBoxes = copy.deepcopy(boxes)
newImage = exposure.adjust_gamma(img, flag)
return newImage, newBoxes
# 4-添加高斯噪聲
def __addNoiseToImage(self, img, boxes):
"""
加入噪聲
:param img: 圖像
:return: img:加入噪聲後的圖像array,由於輸出的像素是在[0,1]之間,所以得乘以255
"""
newBoxes = copy.deepcopy(boxes)
newImage = random_noise(img, mode='gaussian', clip=True) * 255
return newImage, newBoxes
# 5-旋轉
def __rotateImage(self, img, boxes):
"""
旋轉
:param img: 圖像
:param boxes:
:param angle: 旋轉角度
:param scale: 默認1
:return: rot_img:旋轉後的圖像array;rot_bboxes:旋轉後的boundingbox座標list
"""
'''
輸入:
img:array,(h,w,c)
bboxes:該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
angle:
scale:默認1
輸出:
'''
# 旋轉圖像
w = img.shape[1]
h = img.shape[0]
angle = random.uniform(-45, 45)
scale = random.uniform(0.5, 1.5)
# 角度變弧度
rangle = np.deg2rad(angle)
# 計算新圖像的寬度和高度,分別爲最高點和最低點的垂直距離
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
# 獲取圖像繞着某一點的旋轉矩陣
# getRotationMatrix2D(Point2f center, double angle, double scale)
# Point2f center:表示旋轉的中心點
# double angle:表示旋轉的角度
# double scale:圖像縮放因子
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale) # 返回 2x3 矩陣
# 新中心點與舊中心點之間的位置
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# 仿射變換
rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
flags=cv2.INTER_LANCZOS4) # ceil向上取整
# 矯正boundingbox
# rot_mat是最終的旋轉矩陣
# 獲取原始bbox的四個中點,然後將這四個點轉換到旋轉後的座標系下
rot_bboxes = list()
for bbox in boxes:
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
name = bbox[4]
point1 = np.dot(rot_mat, np.array([(x_min + x_max) / 2, y_min, 1]))
point2 = np.dot(rot_mat, np.array([x_max, (y_min + y_max) / 2, 1]))
point3 = np.dot(rot_mat, np.array([(x_min + x_max) / 2, y_max, 1]))
point4 = np.dot(rot_mat, np.array([x_min, (y_min + y_max) / 2, 1]))
# 合併np.array
concat = np.vstack((point1, point2, point3, point4)) # 在豎直方向上堆疊
# 改變array類型
concat = concat.astype(np.int32)
# 得到旋轉後的座標
rx, ry, rw, rh = cv2.boundingRect(concat)
rx_min = rx
ry_min = ry
rx_max = rx + rw
ry_max = ry + rh
# 加入list中
rot_bboxes.append([rx_min, ry_min, rx_max, ry_max, name])
return rot_img, rot_bboxes
# 6-鏡像
def __flipImage(self, img, bboxes):
"""
鏡像
:param self:
:param img:
:param bboxes:
:return:
"""
'''
鏡像後的圖片要包含所有的框
輸入:
img:圖像array
bboxes:該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
輸出:
flip_img:鏡像後的圖像array
flip_bboxes:鏡像後的bounding box的座標list
'''
# 鏡像圖像
import copy
flip_img = copy.deepcopy(img)
if random.random() < 0.5:
horizon = True
else:
horizon = False
h, w, _ = img.shape
if horizon: # 水平翻轉
flip_img = cv2.flip(flip_img, -1)
else:
flip_img = cv2.flip(flip_img, 0)
# ---------------------- 矯正boundingbox ----------------------
flip_bboxes = list()
for bbox in bboxes:
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
name = bbox[4]
if horizon:
flip_bboxes.append([w - x_max, y_min, w - x_min, y_max, name])
else:
flip_bboxes.append([x_min, h - y_max, x_max, h - y_min, name])
return flip_img, flip_bboxes
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Give old image folder.')
parser.add_argument('folder', help='old image folder')
args = parser.parse_args()
demo = ImageAugmentation()
folder = os.listdir(args.folder)
for filename in folder:
if os.path.splitext(filename)[1] == '.jpg': # 目錄下包含.json的文件
name = str(args.folder) + '/' + os.path.splitext(filename)[0]
demo.changeImages(str(args.folder), "crop", name, 5)
demo.changeImages(str(args.folder), "tran", name, 5)
demo.changeImages(str(args.folder), "light", name, 5)
demo.changeImages(str(args.folder), "noise", name, 5)
demo.changeImages(str(args.folder), "rotate", name, 5)