Yolo圖像增廣

圖像及座標變換

完整代碼

圖像增廣（image augmentation）技術通過對訓練圖像做一系列隨機改變，來產生相似但又不同的訓練樣本，從而擴大訓練數據集的規模。
圖像增廣的另一種解釋是，隨機改變訓練樣本可以降低模型對某些屬性的依賴，從而提高模型的泛化能力。

工作流程

單一圖片增廣處理流程

整個文件夾的圖片增廣處理流程

輔助工作

讀取圖片

使用OpenCV讀取

filename = "image/test.jpg"
image = cv2.imread(filename)

讀取標註XML文件

根據XML標註文件得到標註列表[x_min, y_min, x_max, y_max, cat_name]的列表

    def getBoxes(self, image_name):
        """
        根據XML標註文件得到標註列表[x_min, y_min, x_max, y_max, cat_name]的列表
        :param image_name:
        :return:
        """
        tree = ET.parse(image_name + '.xml')
        root = tree.getroot()
        boxes = []
        for object in root.findall('object'):
            temp_list = []
            name = object.find('name').text
            for coordinate in object.find('bndbox'):
                temp_list.append(int(coordinate.text))
            temp_list.append(name)
            boxes.append(temp_list)
        # print(boxes)
        return boxes

保存變換後的圖片

使用OpenCV保存

current_time = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
current_num = str(random.randint(0, 9999))
save_image_name = folder + '/' + current_time + '_' + str(i) + current_num + '.jpg'
print("save image name: " + save_image_name)
cv2.imwrite(save_image_name, change_img)

保存更新過的標註

    def saveXML(self, image_name, xml_name, boxes, shape1, shape0):
        print("xml name ====================================" + xml_name)
        print("image name ====================================" + xml_name)
        folder = ET.Element('folder')
        folder.text = 'image'

        filename = ET.Element('filename')
        filename.text = image_name

        path = ET.Element('path')
        curr_path = os.getcwd()

        path.text = curr_path + '\\image\\' + image_name

        source = ET.Element('source')
        database = ET.SubElement(source, 'database')
        database.text = 'Unknown'

        size = ET.Element('size')
        width = ET.SubElement(size, 'width')

        width.text = str(shape1)
        height = ET.SubElement(size, 'height')
        height.text = str(shape0)
        depth = ET.SubElement(size, 'depth')
        depth.text = '3'
        segmented = ET.Element('segmented')
        segmented.text = '0'

        root = ET.Element('annotation')
        root.extend((folder, filename, path))
        root.extend((source, size, segmented))

        for box in boxes:
            object = ET.Element('object')
            name = ET.SubElement(object, 'name')
            name.text = box[4]
            pose = ET.SubElement(object, 'pose')
            pose.text = 'Unspecified'
            truncated = ET.SubElement(object, 'truncated')
            truncated.text = '0'
            difficult = ET.SubElement(object, 'difficult')
            difficult.text = '0'
            bndbox = ET.SubElement(object, 'bndbox')
            xmin = ET.SubElement(bndbox, 'xmin')
            xmin.text = str(box[0])
            ymin = ET.SubElement(bndbox, 'ymin')
            ymin.text = str(box[1])
            xmax = ET.SubElement(bndbox, 'xmax')
            xmax.text = str(box[2])
            ymax = ET.SubElement(bndbox, 'ymax')
            ymax.text = str(box[3])
            root.extend((object,))

        tree = ET.ElementTree(root)
        tree.write(xml_name)

        tree = ET.parse(xml_name)  # 解析movies.xml這個文件
        root = tree.getroot()  # 得到根元素，Element類
        self.pretty_xml(root, '\t', '\n')  # 執行美化方法
        tree.write(xml_name)

美化更新過的XML

    def pretty_xml(self, element, indent, newline, level=0):  # elemnt爲傳進來的Elment類，參數indent用於縮進，newline用於換行
        if element:  # 判斷element是否有子元素
            if (element.text is None) or element.text.isspace():  # 如果element的text沒有內容
                element.text = newline + indent * (level + 1)
            else:
                element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
                # else:  # 此處兩行如果把註釋去掉，Element的text也會另起一行
                # element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
        temp = list(element)  # 將element轉成list
        for subelement in temp:
            if temp.index(subelement) < (len(temp) - 1):  # 如果不是list的最後一個元素，說明下一個行是同級別元素的起始，縮進應一致
                subelement.tail = newline + indent * (level + 1)
            else:  # 如果是list的最後一個元素， 說明下一行是母元素的結束，縮進應該少一個
                subelement.tail = newline + indent * level
            self.pretty_xml(subelement, indent, newline, level=level + 1)  # 對子元素進行遞歸操作

圖像及座標變換

1-裁切

    def __cropImage(self, img, boxes):
        """
        裁切
        :param img: 圖像
        :param bboxes: 該圖像包含的所有boundingboxes，一個list，每個元素爲[x_min,y_min,x_max,y_max]
        :return: crop_img：裁剪後的圖像；crop_bboxes：裁剪後的boundingbox的座標，list
        """
        # 裁剪圖像
        w = img.shape[1]
        h = img.shape[0]

        x_min = w
        x_max = 0
        y_min = h
        y_max = 0

        # 最小區域
        for bbox in boxes:
            x_min = min(x_min, bbox[0])
            y_min = min(y_min, bbox[1])
            x_max = max(x_max, bbox[2])
            y_max = max(y_max, bbox[3])
            name = bbox[4]

        # 包含所有目標框的最小框到各個邊的距離
        d_to_left = x_min
        d_to_right = w - x_max
        d_to_top = y_min
        d_to_bottom = h - y_max

        # 隨機擴展這個最小範圍
        crop_x_min = int(x_min - random.uniform(0, d_to_left))
        crop_y_min = int(y_min - random.uniform(0, d_to_top))
        crop_x_max = int(x_max + random.uniform(0, d_to_right))
        crop_y_max = int(y_max + random.uniform(0, d_to_bottom))

        # 確保不出界
        crop_x_min = max(0, crop_x_min)
        crop_y_min = max(0, crop_y_min)
        crop_x_max = min(w, crop_x_max)
        crop_y_max = min(h, crop_y_max)

        crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]

        # 裁剪bounding boxes
        crop_bboxes = list()
        for bbox in boxes:
            crop_bboxes.append([int(bbox[0] - crop_x_min), int(bbox[1] - crop_y_min),
                                int(bbox[2] - crop_x_min), int(bbox[3] - crop_y_min), bbox[4]])

        return crop_img, crop_bboxes

2-平移

    def __translationImage(self, img, boxes):
        """
        平移
        :param img: img
        :param bboxes: bboxes：該圖像包含的所有boundingboxes，一個list，每個元素爲[x_min,y_min,x_max,y_max]
        :return: shift_img：平移後的圖像array；shift_bboxes：平移後的boundingbox的座標，list
        """

        # 平移圖像
        w = img.shape[1]
        h = img.shape[0]

        x_min = w
        x_max = 0
        y_min = h
        y_max = 0

        for bbox in boxes:
            x_min = min(x_min, bbox[0])
            y_min = min(y_min, bbox[1])
            x_max = max(x_max, bbox[2])
            y_max = max(x_max, bbox[3])
            name = bbox[4]

        # 包含所有目標框的最小框到各個邊的距離，即每個方向的最大移動距離
        d_to_left = x_min
        d_to_right = w - x_max
        d_to_top = y_min
        d_to_bottom = h - y_max

        # 在矩陣第一行中表示的是[1,0,x],其中x表示圖像將向左或向右移動的距離，如果x是正值，則表示向右移動，如果是負值的話，則表示向左移動。
        # 在矩陣第二行表示的是[0,1,y],其中y表示圖像將向上或向下移動的距離，如果y是正值的話，則向下移動，如果是負值的話，則向上移動。
        x = random.uniform(-(d_to_left / 3), d_to_right / 3)
        y = random.uniform(-(d_to_top / 3), d_to_bottom / 3)
        M = np.float32([[1, 0, x], [0, 1, y]])

        # 仿射變換
        shift_img = cv2.warpAffine(img, M,
                    (img.shape[1], img.shape[0]))  # 第一個參數表示我們希望進行變換的圖片，第二個參數是我們的平移矩陣，第三個希望展示的結果圖片的大小

        # 平移boundingbox
        shift_bboxes = list()
        for bbox in boxes:
            shift_bboxes.append([int(bbox[0] + x), int(bbox[1] + y), int(bbox[2] + x), int(bbox[3] + y), bbox[4]])

        return shift_img, shift_bboxes

3-改變亮度

    def __changeLightofImage(self, img, boxes):
        """
        改變亮度
        :param img: 圖像
        :return: img：改變亮度後的圖像array
        """
        '''
        adjust_gamma(image, gamma=1, gain=1)函數:
        gamma>1時，輸出圖像變暗，小於1時，輸出圖像變亮
        '''
        flag = random.uniform(0.5, 1.5)  ##flag>1爲調暗,小於1爲調亮
        newBoxes = copy.deepcopy(boxes)
        newImage = exposure.adjust_gamma(img, flag)
        return newImage, newBoxes

4-高斯噪聲

    def __addNoiseToImage(self, img, boxes):
        """
        加入噪聲
        :param img: 圖像
        :return: img：加入噪聲後的圖像array,由於輸出的像素是在[0,1]之間,所以得乘以255
        """
        newBoxes = copy.deepcopy(boxes)
        newImage = random_noise(img, mode='gaussian', clip=True) * 255
        return newImage, newBoxes

5-旋轉

    def __rotateImage(self, img, boxes):
        """
        旋轉
        :param img: 圖像
        :param boxes:
        :param angle: 旋轉角度
        :param scale: 默認1
        :return: rot_img：旋轉後的圖像array；rot_bboxes：旋轉後的boundingbox座標list
        """
        '''
        輸入:
            img:array,(h,w,c)
            bboxes:該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
            angle:
            scale:默認1
        輸出:
            
        '''
        # 旋轉圖像
        w = img.shape[1]
        h = img.shape[0]
        angle = random.uniform(-45, 45)
        scale = random.uniform(0.5, 1.5)
        # 角度變弧度
        rangle = np.deg2rad(angle)
        # 計算新圖像的寬度和高度，分別爲最高點和最低點的垂直距離
        nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
        nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
        # 獲取圖像繞着某一點的旋轉矩陣
        # getRotationMatrix2D(Point2f center, double angle, double scale)
        # Point2f center：表示旋轉的中心點
        # double angle：表示旋轉的角度
        # double scale：圖像縮放因子
        rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)  # 返回 2x3 矩陣
        # 新中心點與舊中心點之間的位置
        rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # 仿射變換
        rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
                                 flags=cv2.INTER_LANCZOS4)  # ceil向上取整

        # 矯正boundingbox
        # rot_mat是最終的旋轉矩陣
        # 獲取原始bbox的四個中點，然後將這四個點轉換到旋轉後的座標系下
        rot_bboxes = list()
        for bbox in boxes:
            x_min = bbox[0]
            y_min = bbox[1]
            x_max = bbox[2]
            y_max = bbox[3]
            name = bbox[4]
            point1 = np.dot(rot_mat, np.array([(x_min + x_max) / 2, y_min, 1]))
            point2 = np.dot(rot_mat, np.array([x_max, (y_min + y_max) / 2, 1]))
            point3 = np.dot(rot_mat, np.array([(x_min + x_max) / 2, y_max, 1]))
            point4 = np.dot(rot_mat, np.array([x_min, (y_min + y_max) / 2, 1]))

            # 合併np.array
            concat = np.vstack((point1, point2, point3, point4))  # 在豎直方向上堆疊
            # 改變array類型
            concat = concat.astype(np.int32)
            # 得到旋轉後的座標
            rx, ry, rw, rh = cv2.boundingRect(concat)
            rx_min = rx
            ry_min = ry
            rx_max = rx + rw
            ry_max = ry + rh
            # 加入list中
            rot_bboxes.append([rx_min, ry_min, rx_max, ry_max, name])
        return rot_img, rot_bboxes

6-鏡像

def __flipImage(self, img, bboxes):
    """
    鏡像
    :param self:
    :param img:
    :param bboxes:
    :return:
    """
    '''
    鏡像後的圖片要包含所有的框
    輸入：
        img：圖像array
        bboxes：該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
    輸出:
        flip_img:鏡像後的圖像array
        flip_bboxes:鏡像後的bounding box的座標list
    '''
    # 鏡像圖像
    import copy
    flip_img = copy.deepcopy(img)
    if random.random() < 0.5:
        horizon = True
    else:
        horizon = False
    h, w, _ = img.shape
    if horizon:  # 水平翻轉
        flip_img = cv2.flip(flip_img, -1)
    else:
        flip_img = cv2.flip(flip_img, 0)
    # ---------------------- 矯正boundingbox ----------------------
    flip_bboxes = list()
    for bbox in bboxes:
        x_min = bbox[0]
        y_min = bbox[1]
        x_max = bbox[2]
        y_max = bbox[3]
        name = bbox[4]
        if horizon:
            flip_bboxes.append([w - x_max, y_min, w - x_min, y_max, name])
        else:
            flip_bboxes.append([x_min, h - y_max, x_max, h - y_min, name])

    return flip_img, flip_bboxes

完整代碼

import os
import cv2
import math
import argparse
import numpy as np
import copy
from skimage import exposure
from skimage.util import random_noise
import random
import datetime
import xml.etree.ElementTree as ET
"""
YOLO圖像增廣
"""


class ImageAugmentation:
    def __init__(self):
        pass

    def getBoxes(self, image_name):
        """
        根據XML標註文件得到標註列表[x_min, y_min, x_max, y_max, cat_name]的列表
        :param image_name:
        :return:
        """
        tree = ET.parse(image_name + '.xml')
        root = tree.getroot()
        boxes = []
        for object in root.findall('object'):
            temp_list = []
            name = object.find('name').text
            for coordinate in object.find('bndbox'):
                temp_list.append(int(coordinate.text))
            temp_list.append(name)
            boxes.append(temp_list)
        # print(boxes)
        return boxes

    def saveXML(self, image_name, xml_name, boxes, shape1, shape0):
        print("xml name ====================================" + xml_name)
        print("image name ====================================" + xml_name)
        folder = ET.Element('folder')
        folder.text = 'image'

        filename = ET.Element('filename')
        filename.text = image_name

        path = ET.Element('path')
        curr_path = os.getcwd()

        path.text = curr_path + '\\image\\' + image_name

        source = ET.Element('source')
        database = ET.SubElement(source, 'database')
        database.text = 'Unknown'

        size = ET.Element('size')
        width = ET.SubElement(size, 'width')

        width.text = str(shape1)
        height = ET.SubElement(size, 'height')
        height.text = str(shape0)
        depth = ET.SubElement(size, 'depth')
        depth.text = '3'
        segmented = ET.Element('segmented')
        segmented.text = '0'

        root = ET.Element('annotation')
        root.extend((folder, filename, path))
        root.extend((source, size, segmented))

        for box in boxes:
            object = ET.Element('object')
            name = ET.SubElement(object, 'name')
            name.text = box[4]
            pose = ET.SubElement(object, 'pose')
            pose.text = 'Unspecified'
            truncated = ET.SubElement(object, 'truncated')
            truncated.text = '0'
            difficult = ET.SubElement(object, 'difficult')
            difficult.text = '0'
            bndbox = ET.SubElement(object, 'bndbox')
            xmin = ET.SubElement(bndbox, 'xmin')
            xmin.text = str(box[0])
            ymin = ET.SubElement(bndbox, 'ymin')
            ymin.text = str(box[1])
            xmax = ET.SubElement(bndbox, 'xmax')
            xmax.text = str(box[2])
            ymax = ET.SubElement(bndbox, 'ymax')
            ymax.text = str(box[3])
            root.extend((object,))

        tree = ET.ElementTree(root)
        tree.write(xml_name)

        tree = ET.parse(xml_name)  # 解析movies.xml這個文件
        root = tree.getroot()  # 得到根元素，Element類
        self.pretty_xml(root, '\t', '\n')  # 執行美化方法
        tree.write(xml_name)

    def pretty_xml(self, element, indent, newline, level=0):  # elemnt爲傳進來的Elment類，參數indent用於縮進，newline用於換行
        if element:  # 判斷element是否有子元素
            if (element.text is None) or element.text.isspace():  # 如果element的text沒有內容
                element.text = newline + indent * (level + 1)
            else:
                element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
                # else:  # 此處兩行如果把註釋去掉，Element的text也會另起一行
                # element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
        temp = list(element)  # 將element轉成list
        for subelement in temp:
            if temp.index(subelement) < (len(temp) - 1):  # 如果不是list的最後一個元素，說明下一個行是同級別元素的起始，縮進應一致
                subelement.tail = newline + indent * (level + 1)
            else:  # 如果是list的最後一個元素， 說明下一行是母元素的結束，縮進應該少一個
                subelement.tail = newline + indent * level
            self.pretty_xml(subelement, indent, newline, level=level + 1)  # 對子元素進行遞歸操作

    def changeImages(self, folder, function_name, image_name, n):
        if function_name == "crop":
            function = self.__cropImage
        elif function_name == "tran":
            function = self.__translationImage
        elif function_name == "light":
            function = self.__changeLightofImage
        elif function_name == "noise":
            function = self.__addNoiseToImage
        elif function_name == "rotate":
            function = self.__rotateImage
        elif function_name == "flip":
            function = self.__flipImage

        image = cv2.imread(image_name + '.jpg')
        boxes = self.getBoxes(image_name)
        for i in range(1, n + 1):
            print(function_name + " image #" + str(i))
            change_img, change_boxes = function(copy.deepcopy(image), copy.deepcopy(boxes))
            print("Old boxes: ", boxes)
            print("New boxes: ", change_boxes)
            current_time = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
            current_num = str(random.randint(0, 9999))
            save_image_name = folder + '/' + current_time + '_' + str(i) + current_num + '.jpg'
            save_xml_name = folder + '/' + current_time + '_' + str(i) + current_num + '.xml'
            print(image_name)
            print("save image name: " + save_image_name)
            print("save xml name:   " + save_xml_name)
            cv2.imwrite(save_image_name, change_img)
            self.saveXML(save_image_name, save_xml_name, change_boxes, change_img.shape[1], change_img.shape[0])
            print("Save new image to current path: " + save_image_name)
            print("Save new xml to current path:   " + save_xml_name)
            print("\n")

    # 1 裁切
    def __cropImage(self, img, boxes):
        """
        裁切
        :param img: 圖像
        :param bboxes: 該圖像包含的所有boundingboxes，一個list，每個元素爲[x_min,y_min,x_max,y_max]
        :return: crop_img：裁剪後的圖像；crop_bboxes：裁剪後的boundingbox的座標，list
        """
        # 裁剪圖像
        w = img.shape[1]
        h = img.shape[0]

        x_min = w
        x_max = 0
        y_min = h
        y_max = 0

        # 最小區域
        for bbox in boxes:
            x_min = min(x_min, bbox[0])
            y_min = min(y_min, bbox[1])
            x_max = max(x_max, bbox[2])
            y_max = max(y_max, bbox[3])
            name = bbox[4]

        # 包含所有目標框的最小框到各個邊的距離
        d_to_left = x_min
        d_to_right = w - x_max
        d_to_top = y_min
        d_to_bottom = h - y_max

        # 隨機擴展這個最小範圍
        crop_x_min = int(x_min - random.uniform(0, d_to_left))
        crop_y_min = int(y_min - random.uniform(0, d_to_top))
        crop_x_max = int(x_max + random.uniform(0, d_to_right))
        crop_y_max = int(y_max + random.uniform(0, d_to_bottom))

        # 確保不出界
        crop_x_min = max(0, crop_x_min)
        crop_y_min = max(0, crop_y_min)
        crop_x_max = min(w, crop_x_max)
        crop_y_max = min(h, crop_y_max)

        crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]

        # 裁剪bounding boxes
        crop_bboxes = list()
        for bbox in boxes:
            crop_bboxes.append([int(bbox[0] - crop_x_min), int(bbox[1] - crop_y_min),
                                int(bbox[2] - crop_x_min), int(bbox[3] - crop_y_min), bbox[4]])

        return crop_img, crop_bboxes

    # 2-平移
    def __translationImage(self, img, boxes):
        """
        平移
        :param img: img
        :param bboxes: bboxes：該圖像包含的所有boundingboxes，一個list，每個元素爲[x_min,y_min,x_max,y_max]
        :return: shift_img：平移後的圖像array；shift_bboxes：平移後的boundingbox的座標，list
        """

        # 平移圖像
        w = img.shape[1]
        h = img.shape[0]

        x_min = w
        x_max = 0
        y_min = h
        y_max = 0

        for bbox in boxes:
            x_min = min(x_min, bbox[0])
            y_min = min(y_min, bbox[1])
            x_max = max(x_max, bbox[2])
            y_max = max(x_max, bbox[3])
            name = bbox[4]

        # 包含所有目標框的最小框到各個邊的距離，即每個方向的最大移動距離
        d_to_left = x_min
        d_to_right = w - x_max
        d_to_top = y_min
        d_to_bottom = h - y_max

        # 在矩陣第一行中表示的是[1,0,x],其中x表示圖像將向左或向右移動的距離，如果x是正值，則表示向右移動，如果是負值的話，則表示向左移動。
        # 在矩陣第二行表示的是[0,1,y],其中y表示圖像將向上或向下移動的距離，如果y是正值的話，則向下移動，如果是負值的話，則向上移動。
        x = random.uniform(-(d_to_left / 3), d_to_right / 3)
        y = random.uniform(-(d_to_top / 3), d_to_bottom / 3)
        M = np.float32([[1, 0, x], [0, 1, y]])

        # 仿射變換
        shift_img = cv2.warpAffine(img, M,
                    (img.shape[1], img.shape[0]))  # 第一個參數表示我們希望進行變換的圖片，第二個參數是我們的平移矩陣，第三個希望展示的結果圖片的大小

        # 平移boundingbox
        shift_bboxes = list()
        for bbox in boxes:
            shift_bboxes.append([int(bbox[0] + x), int(bbox[1] + y), int(bbox[2] + x), int(bbox[3] + y), bbox[4]])

        return shift_img, shift_bboxes

    # 3-改變亮度
    def __changeLightofImage(self, img, boxes):
        """
        改變亮度
        :param img: 圖像
        :return: img：改變亮度後的圖像array
        """
        '''
        adjust_gamma(image, gamma=1, gain=1)函數:
        gamma>1時，輸出圖像變暗，小於1時，輸出圖像變亮
        '''
        flag = random.uniform(0.5, 1.5)  ##flag>1爲調暗,小於1爲調亮
        newBoxes = copy.deepcopy(boxes)
        newImage = exposure.adjust_gamma(img, flag)
        return newImage, newBoxes

    # 4-添加高斯噪聲
    def __addNoiseToImage(self, img, boxes):
        """
        加入噪聲
        :param img: 圖像
        :return: img：加入噪聲後的圖像array,由於輸出的像素是在[0,1]之間,所以得乘以255
        """
        newBoxes = copy.deepcopy(boxes)
        newImage = random_noise(img, mode='gaussian', clip=True) * 255
        return newImage, newBoxes

    # 5-旋轉
    def __rotateImage(self, img, boxes):
        """
        旋轉
        :param img: 圖像
        :param boxes:
        :param angle: 旋轉角度
        :param scale: 默認1
        :return: rot_img：旋轉後的圖像array；rot_bboxes：旋轉後的boundingbox座標list
        """
        '''
        輸入:
            img:array,(h,w,c)
            bboxes:該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
            angle:
            scale:默認1
        輸出:
            
        '''
        # 旋轉圖像
        w = img.shape[1]
        h = img.shape[0]
        angle = random.uniform(-45, 45)
        scale = random.uniform(0.5, 1.5)
        # 角度變弧度
        rangle = np.deg2rad(angle)
        # 計算新圖像的寬度和高度，分別爲最高點和最低點的垂直距離
        nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
        nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
        # 獲取圖像繞着某一點的旋轉矩陣
        # getRotationMatrix2D(Point2f center, double angle, double scale)
        # Point2f center：表示旋轉的中心點
        # double angle：表示旋轉的角度
        # double scale：圖像縮放因子
        rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)  # 返回 2x3 矩陣
        # 新中心點與舊中心點之間的位置
        rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # 仿射變換
        rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
                                 flags=cv2.INTER_LANCZOS4)  # ceil向上取整

        # 矯正boundingbox
        # rot_mat是最終的旋轉矩陣
        # 獲取原始bbox的四個中點，然後將這四個點轉換到旋轉後的座標系下
        rot_bboxes = list()
        for bbox in boxes:
            x_min = bbox[0]
            y_min = bbox[1]
            x_max = bbox[2]
            y_max = bbox[3]
            name = bbox[4]
            point1 = np.dot(rot_mat, np.array([(x_min + x_max) / 2, y_min, 1]))
            point2 = np.dot(rot_mat, np.array([x_max, (y_min + y_max) / 2, 1]))
            point3 = np.dot(rot_mat, np.array([(x_min + x_max) / 2, y_max, 1]))
            point4 = np.dot(rot_mat, np.array([x_min, (y_min + y_max) / 2, 1]))

            # 合併np.array
            concat = np.vstack((point1, point2, point3, point4))  # 在豎直方向上堆疊
            # 改變array類型
            concat = concat.astype(np.int32)
            # 得到旋轉後的座標
            rx, ry, rw, rh = cv2.boundingRect(concat)
            rx_min = rx
            ry_min = ry
            rx_max = rx + rw
            ry_max = ry + rh
            # 加入list中
            rot_bboxes.append([rx_min, ry_min, rx_max, ry_max, name])
        return rot_img, rot_bboxes

    # 6-鏡像
    def __flipImage(self, img, bboxes):
        """
        鏡像
        :param self:
        :param img:
        :param bboxes:
        :return:
        """
        '''
        鏡像後的圖片要包含所有的框
        輸入：
            img：圖像array
            bboxes：該圖像包含的所有boundingboxs,一個list,每個元素爲[x_min, y_min, x_max, y_max],要確保是數值
        輸出:
            flip_img:鏡像後的圖像array
            flip_bboxes:鏡像後的bounding box的座標list
        '''
        # 鏡像圖像
        import copy
        flip_img = copy.deepcopy(img)
        if random.random() < 0.5:
            horizon = True
        else:
            horizon = False
        h, w, _ = img.shape
        if horizon:  # 水平翻轉
            flip_img = cv2.flip(flip_img, -1)
        else:
            flip_img = cv2.flip(flip_img, 0)
        # ---------------------- 矯正boundingbox ----------------------
        flip_bboxes = list()
        for bbox in bboxes:
            x_min = bbox[0]
            y_min = bbox[1]
            x_max = bbox[2]
            y_max = bbox[3]
            name = bbox[4]
            if horizon:
                flip_bboxes.append([w - x_max, y_min, w - x_min, y_max, name])
            else:
                flip_bboxes.append([x_min, h - y_max, x_max, h - y_min, name])

        return flip_img, flip_bboxes


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Give old image folder.')
    parser.add_argument('folder', help='old image folder')
    args = parser.parse_args()
    demo = ImageAugmentation()
    folder = os.listdir(args.folder)
    for filename in folder:
        if os.path.splitext(filename)[1] == '.jpg':  # 目錄下包含.json的文件
            name = str(args.folder) + '/' + os.path.splitext(filename)[0]
            demo.changeImages(str(args.folder), "crop", name, 5)
            demo.changeImages(str(args.folder), "tran", name, 5)
            demo.changeImages(str(args.folder), "light", name, 5)
            demo.changeImages(str(args.folder), "noise", name, 5)
            demo.changeImages(str(args.folder), "rotate", name, 5)

Yolo圖像增廣

Yolo圖像增廣

工作流程

輔助工作

讀取圖片

讀取標註XML文件

保存變換後的圖片

保存更新過的標註

美化更新過的XML

圖像及座標變換

1-裁切

2-平移

3-改變亮度

4-高斯噪聲

5-旋轉

6-鏡像

完整代碼

如何使用 JS 判斷用戶是否處於活躍狀態

通過HPA+CronHPA組合應對業務複雜彈性伸縮場景

❤️‍🔥 Solon Cloud Event 新的事務特性與應用

DeepStream Python Apps

如何打包Python程序

課後練習第1章 Python入門

Ubuntu給python程序添加桌面圖標

吳恩達《神經網絡和深度學習》學習筆記

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結