Tensorflow 構建自己的目標檢測與識別模型之數據增強(二)
上次的博客中對如何安裝Tensorflow Object Detection API的步驟及所遇到的問題進行說明。見鏈接:https://blog.csdn.net/weixin_41644725/article/details/83007901
接下來,對圖像數據進行圖像增強。雖然在配置.config文件(後面會說到)時,其中會提到數據增強(data argumentation),但是若是想手動實現,可參考本文,若不想則跳過即可。
1.用labelImage工具生成.xml文件。
該工具的界面如圖所示,關於如何安裝labelImage,可參考網上的相關博客,在windows和Linux下都有相應的安裝過程,此處不敘述安裝過程。其中“Open Dir”爲打開存儲所有圖像文件的文件夾。“Change Save Dir”爲將生成的.xml文件存儲在指定文件夾下面。“Save”表示保存當前的.xml文件。
xml文件的格式如下圖所示:
2. xml 轉成csv文件
(1)將xml文件轉成csv文件代碼如下:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def main():
xml_path = './xml' #存儲xml的文件夾
xml_df = xml_to_csv(xml_path)
xml_df.to_csv('./csv/class.csv', index=None) #生成csv文件並存儲在該路徑下
print('Successfully converted xml to csv.')
main()
(2)得到該圖像中對應類的邊界框(bounding box),代碼如下:
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt
def get_bbox(image_name,csv_path):
full_labels = pd.read_csv(csv_path)
selected_value = full_labels[full_labels.filename == image_name]
images_bbox = []
img_class = ''
for index,row in selected_value.iterrows():
list_bbox = []
list_bbox.append(row['xmin'])
list_bbox.append(row['ymin'])
list_bbox.append(row['xmax'])
list_bbox.append(row['ymax'])
list_bbox.append(image_name)
img_class = row['class']
images_bbox.append(list_bbox)
return images_bbox,img_class
img_path = '023.jpg'
csv_path = ''./csv/class.csv''
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
plt.subplot(111), plt.imshow(image), plt.title('original', fontsize='medium')
plt.show()
輸出結果如下:
3.圖像數據增強
(1)調整圖像亮度
代碼如下:
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt
'''調整亮度'''
def changeLight(img,bboxes):
flag = random.uniform(1.5, 2) # flag>1爲調暗,小於1爲調亮
img = exposure.adjust_gamma(img, flag)
cv2.imwrite('./1.jpg', img)
img = cv2.imread('./1.jpg')
os.remove('./1.jpg')
for i in range(len(bboxes)):
bbox = bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
return img
img_path = '023.jpg'
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
img = cv2.GaussianBlur(img, (3, 3), 0)
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
'''調整亮度'''
change_light_img = changeLight(img=img, bboxes=coords)
plt.subplot(121), plt.imshow(image), plt.title('original', fontsize='medium')
plt.subplot(122), plt.imshow(change_light_img), plt.title('change light', fontsize='medium')
plt.show()
輸出結果如下:
(2)cutout
代碼如下:
'''cutout'''
def cutout(img, bboxes, length=100, n_holes=1, threshold=0.5):
'''
原版本:https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py
Randomly mask out one or more patches from an image.
Args:
img : a 3D numpy array,(h,w,c)
bboxes : 框的座標
n_holes (int): Number of patches to cut out of each image.
length (int): The length (in pixels) of each square patch.
'''
def cal_iou(boxA, boxB):
'''
boxA, boxB爲兩個框,返回iou
boxB爲bouding box
'''
# determine the (x, y)-coordinates of the intersection rectangle
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
if xB <= xA or yB <= yA:
return 0.0
# compute the area of intersection rectangle
interArea = (xB - xA + 1) * (yB - yA + 1)
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = interArea / float(boxAArea + boxBArea - interArea)
#iou = interArea / float(boxBArea)
# return the intersection over union value
return iou
# 得到h和w
if img.ndim == 3:
h, w, c = img.shape
else:
_, h, w, c = img.shape
mask = np.ones((h, w, c), np.float32)
for n in range(n_holes):
chongdie = True # 看切割的區域是否與box重疊太多
while chongdie:
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - length // 2, 0,
h) # numpy.clip(a, a_min, a_max, out=None), clip這個函數將將數組中的元素限制在a_min, a_max之間,大於a_max的就使得它等於 a_max,小於a_min,的就使得它等於a_min
y2 = np.clip(y + length // 2, 0, h)
x1 = np.clip(x - length // 2, 0, w)
x2 = np.clip(x + length // 2, 0, w)
chongdie = False
for box in bboxes:
if cal_iou([x1, y1, x2, y2], box) > threshold:
chongdie = True
break
mask[y1: y2, x1: x2, :] = 0.
# mask = np.expand_dims(mask, axis=0)
img = img * mask
for i in range(len(bboxes)):
bbox = bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
cv2.imwrite('./1.jpg', img)
img = cv2.imread('./1.jpg')
os.remove('./1.jpg')
return img
img_path = '023.jpg'
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
img = cv2.GaussianBlur(img, (3, 3), 0)
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
'''調整亮度'''
cut_out_img = cutout(img=img, bboxes=coords)
plt.subplot(121), plt.imshow(image), plt.title('original', fontsize='medium')
plt.subplot(122), plt.imshow(cut_out_img), plt.title('cutout', fontsize='medium')
plt.show()
輸出結果如下:
(3)旋轉
代碼如下:
'''旋轉'''
def rotate_img_bbox(img, bboxes, angle=5, scale=1.):
'''
參考:https://blog.csdn.net/u014540717/article/details/53301195crop_rate
輸入:
img:圖像array,(h,w,c)
bboxes:該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
angle:旋轉角度
scale:默認1
輸出:
rot_img:旋轉後的圖像array
rot_bboxes:旋轉後的boundingbox座標list
'''
# ---------------------- 旋轉圖像 ----------------------
w = img.shape[1]
h = img.shape[0]
# 角度變弧度
rangle = np.deg2rad(angle) # angle in radians
# now calculate new image width and height
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# 仿射變換
rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
# ---------------------- 矯正bbox座標 ----------------------
# rot_mat是最終的旋轉矩陣
# 獲取原始bbox的四個中點,然後將這四個點轉換到旋轉後的座標系下
rot_bboxes = list()
for bbox in bboxes:
xmin = bbox[0]
ymin = bbox[1]
xmax = bbox[2]
ymax = bbox[3]
point1 = np.dot(rot_mat, np.array([(xmin + xmax) / 2, ymin, 1]))
point2 = np.dot(rot_mat, np.array([xmax, (ymin + ymax) / 2, 1]))
point3 = np.dot(rot_mat, np.array([(xmin + xmax) / 2, ymax, 1]))
point4 = np.dot(rot_mat, np.array([xmin, (ymin + ymax) / 2, 1]))
# 合併np.array
concat = np.vstack((point1, point2, point3, point4))
# 改變array類型
concat = concat.astype(np.int32)
# 得到旋轉後的座標
rx, ry, rw, rh = cv2.boundingRect(concat)
rx_min = rx
ry_min = ry
rx_max = rx + rw
ry_max = ry + rh
# 加入list中
rot_bboxes.append([rx_min, ry_min, rx_max, ry_max])
for i in range(len(rot_bboxes)):
bbox = rot_bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(rot_img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
cv2.imwrite('./1.jpg', rot_img)
rot_img = cv2.imread('./1.jpg')
os.remove('./1.jpg')
return rot_img
img_path = '023.jpg'
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
img = cv2.GaussianBlur(img, (3, 3), 0)
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
'''調整亮度'''
rotate_img = rotate_img_bbox(img=img, bboxes=coords)
plt.subplot(121), plt.imshow(image), plt.title('original', fontsize='medium')
plt.subplot(122), plt.imshow(rotate_img), plt.title('rotate', fontsize='medium')
plt.show()
輸出結果如下:
(4)裁剪
代碼如下:
'''裁剪'''
def crop_img_bboxes(img, bboxes):
'''
裁剪後的圖片要包含所有的框
輸入:
img:圖像array
bboxes:該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
輸出:
crop_img:裁剪後的圖像array
crop_bboxes:裁剪後的bounding box的座標list
'''
# ---------------------- 裁剪圖像 ----------------------
w = img.shape[1]
h = img.shape[0]
x_min = w # 裁剪後的包含所有目標框的最小的框
x_max = 0
y_min = h
y_max = 0
for bbox in bboxes:
x_min = min(x_min, bbox[0])
y_min = min(y_min, bbox[1])
x_max = max(x_max, bbox[2])
y_max = max(y_max, bbox[3])
d_to_left = x_min # 包含所有目標框的最小框到左邊的距離
d_to_right = w - x_max # 包含所有目標框的最小框到右邊的距離
d_to_top = y_min # 包含所有目標框的最小框到頂端的距離
d_to_bottom = h - y_max # 包含所有目標框的最小框到底部的距離
# 隨機擴展這個最小框
crop_x_min = int(x_min - random.uniform(0, d_to_left))
crop_y_min = int(y_min - random.uniform(0, d_to_top))
crop_x_max = int(x_max + random.uniform(0, d_to_right))
crop_y_max = int(y_max + random.uniform(0, d_to_bottom))
# 隨機擴展這個最小框 , 防止別裁的太小
# crop_x_min = int(x_min - random.uniform(d_to_left//2, d_to_left))
# crop_y_min = int(y_min - random.uniform(d_to_top//2, d_to_top))
# crop_x_max = int(x_max + random.uniform(d_to_right//2, d_to_right))
# crop_y_max = int(y_max + random.uniform(d_to_bottom//2, d_to_bottom))
# 確保不要越界
crop_x_min = max(0, crop_x_min)
crop_y_min = max(0, crop_y_min)
crop_x_max = min(w, crop_x_max)
crop_y_max = min(h, crop_y_max)
crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
# ---------------------- 裁剪boundingbox ----------------------
# 裁剪後的boundingbox座標計算
crop_bboxes = list()
for bbox in bboxes:
crop_bboxes.append([bbox[0] - crop_x_min, bbox[1] - crop_y_min, bbox[2] - crop_x_min, bbox[3] - crop_y_min])
for i in range(len(crop_bboxes)):
bbox = crop_bboxes[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(crop_img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
cv2.imwrite('./1.jpg', crop_img)
crop_img = cv2.imread('./1.jpg')
os.remove('./1.jpg')
return crop_img
img_path = '023.jpg'
img = cv2.imread(img_path)
b, g, r = cv2.split(img)
img = cv2.merge([r, g, b])
img = cv2.GaussianBlur(img, (3, 3), 0)
image = cv2.GaussianBlur(img, (3, 3), 0)
coords = get_bbox(img_path)
coords = [coord[:4] for coord in coords]
for i in range(len(coords)):
bbox = coords[i]
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[2]
y_max = bbox[3]
cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 3)
'''調整亮度'''
crop_img = crop_img_bboxes(img=img, bboxes=coords)
plt.subplot(121), plt.imshow(image), plt.title('original', fontsize='medium')
plt.subplot(122), plt.imshow(crop_img), plt.title('crop', fontsize='medium')
plt.show()
輸出結果如下: