目錄
目標檢測訓練中,我們的數據集尺寸大部分時侯都是不符合網絡輸入的,需要對尺寸進行修改,下面我介紹兩種常用尺寸變換方法:
1. 比例縮放
這種方法就是簡單的對圖片尺寸進行比例縮放,一般使用cv2.resize()對圖片進行縮放,然後計算長寬縮放比例,再通過比例來縮放標註的目標框尺寸。具體代碼如下:
def read_and_resize_picture(img_path, img_boxes):
"""
:function:讀取圖片,並縮放圖片尺寸到指定尺寸,同時修改目標框尺寸
:param img_path: 單張圖片路徑
:param img_boxes: 對應圖片目標框[[xmin,ymin,xmax,ymax],...,[xmin,ymin,xmax,ymax]]
:return:圖片(已經讀取啦),修改的目標框
"""
img = cv2.imread(img_path,flags=1)
height, width = img.shape[0:2]
if height == 416 and width == 416:
return img, img_boxes
else:
h_s = height / 416.
w_s = width / 416.
# box = img_boxes
# 將原圖resize成300,300
img_resize = cv2.resize(img, dsize=(416, 416), interpolation=cv2.INTER_LINEAR)
# imgs.append(img_resize)
img_boxes[:, 0:3:2] = img_boxes[:, 0:3:2] / w_s
img_boxes[:, 1:4:2] = img_boxes[:, 1:4:2] / h_s
# boxes.append(box)
return img_resize, img_boxes
這種方法修改圖片尺寸方便,但是會改變圖片特徵
2. 使用letterbox
先對圖片填充黑邊,使圖片尺寸爲正方形,然後再縮放,這樣不會改變圖片尺寸,代碼如下:
import numpy as np
import cv2
import math
def cv2_letterbox_image(image, expected_size, box):
"""
function:使用letterbox填充並修改圖片尺寸
:param image: 原圖
:param expected_size: 想要修改後的尺寸
:param box: 原圖目標框
:return: 修改後的圖片,目標框
"""
ih, iw = image.shape[0:2]
ew, eh = expected_size
scale = min(eh / ih, ew / iw)
nh = int(ih * scale)
nw = int(iw * scale)
for i in range(4):
box[i] = box[i] * scale
image = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_CUBIC)
top = (eh - nh) // 2
box[1] = box[1] + top
box[3] = box[3] + top
bottom = eh - nh - top
left = (ew - nw) // 2
box[0] = box[0] + left
box[2] = box[2] + left
right = ew - nw - left
new_img = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT)
return new_img, box
def cv2_deletterbox_image(image, current_size, box):
"""
function:去除圖像灰邊,並還原目標框尺寸
:param image: 原圖
:param current_size: 當前圖片尺寸(網絡輸入尺寸)
:param box: 網絡輸入尺寸下的目標框
:return: 匹配原圖的目標框
"""
ih, iw = image.shape[0:2]
ew, eh = current_size
scale = min(eh / ih, ew / iw)
nh = int(ih * scale)
nw = int(iw * scale)
top = (eh - nh) // 2
box[1] = box[1] - top
box[3] = box[3] - top
left = (ew - nw) // 2
box[0] = box[0] - left
box[2] = box[2] - left
for i in range(4):
box[i] = box[i] / scale
return box
def view_1():
box = np.array([147, 336, 402, 437])
ptr1 = (box[0], box[1])
ptr2 = (box[2], box[3])
color = (0, 0, 255)
path = "YOLOV3_high/data/val/JPEG/image_1.jpg"
img = cv2.imread(path)
dst_img, box_new = cv2_letterbox_image(img, (415, 415), box)
cv2.rectangle(img, ptr1, ptr2, color, 2)
cv2.imshow('src_img', img)
cv2.rectangle(dst_img, (box_new[0], box_new[1]), (box_new[2], box_new[3]), color, 2)
cv2.imshow("dst_img", dst_img)
# cv2.imwrite("pred_img.jpg",dst_img)
cv2.waitKey(0)
def view_2():
box = np.array([147, 336, 402, 437])
ptr1 = (box[0], box[1])
ptr2 = (box[2], box[3])
color = (0, 0, 255)
path = "YOLOV3_high/data/val/JPEG/image_1.jpg"
img = cv2.imread(path)
img_src = img.copy()
dst_img, box_new = cv2_letterbox_image(img, (416, 416), box)
cv2.rectangle(img, ptr1, ptr2, color, 2)
cv2.imshow('src_img', img)
cv2.rectangle(dst_img, (box_new[0], box_new[1]), (box_new[2], box_new[3]), color, 2)
cv2.imshow("dst_img", dst_img)
box_src = cv2_deletterbox_image(img_src, (416,416), box_new)
cv2.rectangle(img_src, (box_src[0], box_src[1]), (box_src[2], box_src[3]), color, 2)
cv2.imshow("img_src", img_src)
cv2.waitKey(0)
if __name__ == "__main__":
# view_1()
view_2()