mtcnn-Pnet圖像訓練處理

標註解析

PNET訓練需要對圖片進行截取,截取以後才進行歸一化定義,這裏重新對原來的標註文件進行解析。

原始的標註格式爲

0--Parade/0_Parade_marchingband_1_849.jpg
1
449 330 122 149 0 0 0 0 0 0 
0--Parade/0_Parade_Parade_0_904.jpg
1
361 98 263 339 0 0 0 0 0 0 

我們把單個的圖片標記信息定義如下

import numpy as np


class Label(object):
    def __init__(self, name, prefix='.'):
        self.name = join(prefix, basename(name))
        self.bbox = []

    def addLineBox(self, line: str):
        box = np.array([int(x) for x in line.split(' ')[:4]])
        box[2] += box[0]
        box[3] += box[1]
        self.bbox.append(box)

    def boxes(self):
        return np.array(self.bbox)

整體文件解析如下

class LabelLoader(object):

    def __init__(self, label_path):
        # label位置在images同一層
        self.prefix = join(dirname(label_path), 'images')
        self.path = label_path
        self.labels = []
        self._load()

    def _load(self):
        with open(self.path, 'r') as f:
          # 去掉兩端空白
            lines = [line.strip() for line in f.readlines()]
        cursor = 0
        length = len(lines)
        while cursor < length:
            # 圖片名稱
            label = Label(lines[cursor], prefix=self.prefix)
            # 圖片數量,標記0的時候還會有一串0,因此修改爲1
            count = max(1, int(lines[cursor + 1]))
            # 移動了兩行
            cursor += 2
            while count > 0:
                # 每一行自動解析
                label.addLineBox(lines[cursor])
                cursor += 1
                count -= 1
            # 添加到集合
            self.labels.append(label)
        return self
		
    # 便利遍歷
    def __getitem__(self, idx):
        return self.labels[idx]

    def __len__(self):
        return len(self.labels)

使用方式

if __name__ == '__main__':
    for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
        print(bel)

圖片要求

在此之前,先要強調一下PNET自身的一個特色。

YOLO等網絡不同,它並不一次性的直接進行迴歸,且不是對每一個featureMap的像素點進行座標迴歸。

它更像是針對單個像素,而非整張圖的像素迴歸,這個結構特性來源於卷積,更是全卷積的功用之一。

因此,你會看見它針對的只是訓練的臉框,而非全圖。

詳細的功用,在完工之後會進行依次全面的總結和梳理,在此只是明確針對臉框直接回歸這點進行強調。

四種素材

爲了更好的進行訓練,採用FocalLoss的思想,總共會生成四種裁剪圖

樣本類型 難易程度 判斷標準(iou) 數量(每張圖)
iou0.65iou \ge 0.65 加上正易,共20
0.4iou<0.650.4 \le iou \lt 0.65 加上正難,共20
0<iou0.30 \lt iou \le 0.3 5
iou0.3iou \le 0.3 50

其他限制

爲了生成更好的樣本,也是負樣本中難和易的區別,也就是截取的區域和原來標記是否相交。

尤其,在正樣本中,爲了取得更好判決素材,還需要去忽略太小的bbox

圖片剪裁

iou

在基礎的iou計算之上,我們還需要做額外的條件判斷,以區分是否相交這個問題。

否則計算出來的iou會存在小於0和大於1的情況。

import numpy as np


def area(x1, y1, x2, y2):
    return (x2 - x1) * (y2 - y1)

def IOU(sample, boxes):
    inner_x1 = np.maximum(sample[:, 0], boxes[:, 0])
    inner_y1 = np.maximum(sample[:, 1], boxes[:, 1])
    inner_x2 = np.minimum(sample[:, 2], boxes[:, 2])
    inner_y2 = np.minimum(sample[:, 3], boxes[:, 3])
    # 合法的iou
    valid = np.logical_and(inner_x2 > inner_x1, inner_y2 > inner_y1)
    area_inner = area(inner_x1, inner_y1, inner_x2, inner_y2)
    area_boxes = area(boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3])
    area_sample = area(sample[:, 0], sample[:, 1], sample[:, 2], sample[:, 3])
    ious = area_inner / (area_boxes + area_sample - area_inner + 0.0000001)
    # 非法的都記做0
    ious[~valid] = 0
    return ious

裁剪

class Crop(object):

    def __init__(self, label: Label, prefix='12'):
        self.prefix = prefix
        self.label = label
        self.image = loadImage(label.name)
        self.basename = basename(label.name)
        self.height, self.width, _ = self.image.shape
        self.standardLength = min(self.height, self.width)
        self.boxes = label.boxes()
         # 排除無臉圖
        self.onlyNegative = not np.any(self.boxes)
        self.negativeCounter = 0
        self.positiveCounter = 0
        self.difficultCounter = 0

        def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
        while any([
            self.negativeCounter < negativeCount,
            self.positiveCounter < positiveCount,
            self.difficultCounter < difficultCount
        ]):
            cropSize = random.randint(12, self.standardLength / 2)
            x1 = random.randint(0, self.width - cropSize)
            y1 = random.randint(0, self.height - cropSize)
            x2 = x1 + cropSize
            y2 = y1 + cropSize
            sampleBox = np.array([[x1, y1, x2, y2]])
            iou = IOU(sampleBox, self.boxes)
            cropImage = self.image[y1:y2, x1:x2, :]
            resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
            max_iou_index = np.argmax(iou)
            max_iou = iou[max_iou_index]
            if (max_iou < floor) and (self.negativeCounter < negativeCount):
                self.classify = 0
                if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
                    continue
                self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
                saveImage(resizeCorp, self.savePath)
                self.negativeCounter += 1
            elif not self.onlyNegative:
                self.difficultCounter = difficultCount
                self.positiveCounter = positiveCount
                continue
            elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
                if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
                    self.classify = 1
                    self.savePath = join(cropImageDir, self.prefix, 'positive',
                                         f'{self.positiveCounter}-{self.basename}')
                    saveImage(resizeCorp, self.savePath)
                    self.positiveCounter += 1
                elif self.difficultCounter < difficultCount:
                    self.classify = 2
                    self.savePath = join(cropImageDir, self.prefix, 'difficult',
                                         f'{self.difficultCounter}-{self.basename}')
                    saveImage(resizeCorp, self.savePath)
                    self.difficultCounter += 1
                else:
                    continue
                # 檢查用
                # self.show(sampleBox[0], self.boxes[max_iou_index])
            self.saveBox(sampleBox[0], self.boxes[max_iou_index])
        self.file.close()

    def distinctBox(self, box):
        x1, y1, x2, y2 = box
        if max(x1, y1) < 0:
            return False
        w = x2 - x1
        h = y2 - y1
        valid = max(w, h) > 20
        if (len(self.boxes) == 1) and (not valid):
            self.onlyNegative = True
        return valid

    def show(self, sample, box):
        _, ax = plt.subplots()
        ax.imshow(self.image)
        self.rect(ax, sample[0], 'green')
        self.rect(ax, box, 'red')
        plt.show()

    @staticmethod
    def rect(ax, box, color):
        x1, y1, x2, y2 = box
        w = x2 - x1
        h = y2 - y1
        shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
        ax.add_patch(shape)

裁剪方法針對單個對象,邏輯濃縮,避免混亂,調用方式如下

if __name__ == '__main__':
    for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
        Crop(bel).crop()

標記數據

座標迴歸

爲了進行框圖迴歸,還需要將原來的座標進行歸一化的格式轉換。
在這裏插入圖片描述

數據格式不再是(x,y,w,h)(\triangle x, \triangle y, \triangle w, \triangle h),而是直接針對位置進行偏移預測
(x1,y1,x1,y2) (\triangle x_1, \triangle y_1, \triangle x_1, \triangle y_2)
歸一化的尺度,還是以原來的尺度爲準,具體的計算式爲
{x1=x1x1x2x1y1=y1y1y2y1x2=x2x2x2x1y2=y2y2y2y1 \left\{ \begin{aligned} \triangle x_1 = \frac{x_1 - x_1 '}{x_2 - x_1} \\ \triangle y_1 = \frac{y_1 - y_1'}{y_2 - y_1} \\ \triangle x_2 = \frac{x_2 - x_2 '}{x_2 - x_1} \\ \triangle y_2 = \frac{y_2 - y_2'}{y_2 - y_1} \end{aligned} \right.

類型標記

爲了更好的控制數據,對於數據樣本的類型也需要進行區分

樣本類型 難易程度 標記類型
1
2
0
0

這樣,在訓練的時候,就能夠更好的進行控制了。

文件記錄

from entity.label import Label, LabelLoader
from utils.image import loadImage, saveImage
import numpy as np
from numpy import random
from compute.iou import IOU
from os.path import basename, join
from config.config import negativeCount, positiveCount, difficultCount, cropImageDir
import cv2
from matplotlib import pyplot as plt


class Crop(object):

    def __init__(self, label: Label, prefix='12'):
        self.prefix = prefix
        self.label = label
        self.image = loadImage(label.name)
        self.basename = basename(label.name)
        self.height, self.width, _ = self.image.shape
        self.standardLength = min(self.height, self.width)
        self.boxes = label.boxes()
        self.negativeCounter = 0
        self.positiveCounter = 0
        self.difficultCounter = 0
        self.classify = -1
        self.savePath = None
        self.file = open(join(cropImageDir, 'labels.txt'), 'w')

    def crop(self, floor=0.3, threshold=0.4, ceiling=0.65):
        while any([
            self.negativeCounter < negativeCount,
            self.positiveCounter < positiveCount,
            self.difficultCounter < difficultCount
        ]):
            cropSize = random.randint(12, self.standardLength / 2)
            x1 = random.randint(0, self.width - cropSize)
            y1 = random.randint(0, self.height - cropSize)
            x2 = x1 + cropSize
            y2 = y1 + cropSize
            sampleBox = np.array([[x1, y1, x2, y2]])
            iou = IOU(sampleBox, self.boxes)
            cropImage = self.image[y1:y2, x1:x2, :]
            resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
            max_iou_index = np.argmax(iou)
            max_iou = iou[max_iou_index]
            if (max_iou < floor) and (self.negativeCounter < negativeCount):
                self.classify = 0
                if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
                    continue
                self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
                saveImage(resizeCorp, self.savePath)
                self.negativeCounter += 1
            elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
                if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
                    self.classify = 1
                    self.savePath = join(cropImageDir, self.prefix, 'positive',
                                         f'{self.positiveCounter}-{self.basename}')
                    saveImage(resizeCorp, self.savePath)
                    self.positiveCounter += 1
                elif self.difficultCounter < difficultCount:
                    self.classify = 2
                    self.savePath = join(cropImageDir, self.prefix, 'difficult',
                                         f'{self.difficultCounter}-{self.basename}')
                    saveImage(resizeCorp, self.savePath)
                    self.difficultCounter += 1
                else:
                    continue
                # 檢查用
                self.show(sampleBox[0], self.boxes[max_iou_index])
            # 方法調用直接保存
            self.saveBox(sampleBox[0], self.boxes[max_iou_index])
        self.file.close()

    @staticmethod
    def distinctBox(box):
        x1, y1, x2, y2 = box
        if max(x1, y1) < 0:
            return False
        w = x2 - x1
        h = y2 - y1
        return max(w, h) > 20

    def show(self, sample, box):
        _, ax = plt.subplots()
        ax.imshow(self.image)
        self.rect(ax, sample, 'green')
        self.rect(ax, box, 'red')
        plt.show()

    def saveBox(self, sampleBox, standard):
      	#  非法數據不保存
        if self.classify == -1:
            return
        # 歸一化
        width = standard[2] - standard[0]
        height = standard[3] - standard[1]
        sampleBox = np.array(sampleBox)
        standard = np.array(standard)
        offset = sampleBox - standard
        delta_x1 = offset[0] / width
        delta_y1 = offset[1] / height
        delta_x2 = offset[2] / width
        delta_y2 = offset[3] / height
        self.file.write(f'{self.savePath} {self.classify} {delta_x1} {delta_y1} {delta_x2} {delta_y2}\n')
        self.classify = -1

    @staticmethod
    def rect(ax, box, color):
        x1, y1, x2, y2 = box
        w = x2 - x1
        h = y2 - y1
        shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
        ax.add_patch(shape)


if __name__ == '__main__':
  	# 數據全生成
    for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
        Crop(bel).crop()

配置

def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
  ...
# crop
# threshold
floor = 0.3
threshold = 0.4
ceiling = 0.65
# count
negativeCount = 1
positiveCount = 1
difficultCount = 1
cropImageDir = '../crop'

config文件中直接配置,就可以完全的掌握閾值、配比了。


修復

完善如下要點

  • 無正樣本生成條件自動結束
    如果圖片中完全沒臉,或者都是小臉,會無限循環
  • 文本追加記錄
    原來文件會進行重寫,現在統一進行直接追加
from entity.label import Label, LabelLoader
from utils.image import loadImage, saveImage
import numpy as np
from numpy import random
from compute.iou import IOU
from os.path import basename, join
from config import config
from config.config import negativeCount, positiveCount, difficultCount, cropImageDir
import cv2
from matplotlib import pyplot as plt


class Crop(object):

    def __init__(self, label: Label, prefix='12'):
        self.prefix = prefix
        self.label = label
        self.image = loadImage(label.name)
        self.basename = basename(label.name)
        self.height, self.width, _ = self.image.shape
        self.standardLength = min(self.height, self.width)
        self.boxes = label.boxes()
        self.negativeCounter = 0
        self.positiveCounter = 0
        self.difficultCounter = 0
        self.classify = -1
        self.savePath = None
        # 排除無臉圖
        self.onlyNegative = True
        # 檢查是否滿足正樣本截圖標準
        self.positiveCheck()
        # 追加模式記錄全數據
        self.file = open(join(cropImageDir, 'labels.txt'), 'a+')

    def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
        while any([
            self.negativeCounter < negativeCount,
            self.positiveCounter < positiveCount,
            self.difficultCounter < difficultCount
        ]):
            cropSize = random.randint(12, self.standardLength / 2)
            x1 = random.randint(0, self.width - cropSize)
            y1 = random.randint(0, self.height - cropSize)
            x2 = x1 + cropSize
            y2 = y1 + cropSize
            sampleBox = np.array([[x1, y1, x2, y2]])
            iou = IOU(sampleBox, self.boxes)
            cropImage = self.image[y1:y2, x1:x2, :]
            resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
            max_iou_index = np.argmax(iou)
            max_iou = iou[max_iou_index]
            if (max_iou < floor) and (self.negativeCounter < negativeCount):
                self.classify = 0
                if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
                    continue
                self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
                self.negativeCounter += 1
                saveImage(resizeCorp, self.savePath)
            elif self.onlyNegative:
                self.difficultCounter = difficultCount
                self.positiveCounter = positiveCount
            elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
                if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
                    self.classify = 1
                    self.savePath = join(cropImageDir, self.prefix, 'positive',
                                         f'{self.positiveCounter}-{self.basename}')
                    self.positiveCounter += 1
                    saveImage(resizeCorp, self.savePath)

                elif self.difficultCounter < difficultCount:
                    self.classify = 2
                    self.savePath = join(cropImageDir, self.prefix, 'difficult',
                                         f'{self.difficultCounter}-{self.basename}')
                    self.difficultCounter += 1
                    saveImage(resizeCorp, self.savePath)

                else:
                    continue
                # 檢查用
                # self.show(sampleBox[0], self.boxes[max_iou_index])
            else:
                continue
            self.saveBox(sampleBox[0], self.boxes[max_iou_index])
        self.file.close()

    def positiveCheck(self):
        w = self.boxes[:, 2] - self.boxes[:, 0]
        h = self.boxes[:, 3] - self.boxes[:, 1]
        length = np.maximum(w, h)
        # 不存在足夠大的尺寸,就只用生成負樣本
        self.onlyNegative = not np.any(length > 20)

    # 判斷是否滿足標準
    @staticmethod
    def distinctBox(box):
        x1, y1, x2, y2 = box
        if max(x1, y1) < 0:
            return False
        w = x2 - x1
        h = y2 - y1
        return max(w, h) > 20

    def show(self, sample, box):
        _, ax = plt.subplots()
        ax.imshow(self.image)
        self.rect(ax, sample, 'green')
        self.rect(ax, box, 'red')
        plt.show()

    # 歸一化的數值計算和文本存儲
    def saveBox(self, sampleBox, standard):
        if self.classify == -1:
            return
        width = standard[2] - standard[0]
        height = standard[3] - standard[1]
        sampleBox = np.array(sampleBox)
        standard = np.array(standard)
        offset = sampleBox - standard
        delta_x1 = offset[0] / width
        delta_y1 = offset[1] / height
        delta_x2 = offset[2] / width
        delta_y2 = offset[3] / height
        self.file.write(f'{self.savePath} {self.classify} {delta_x1} {delta_y1} {delta_x2} {delta_y2}\n')
        self.classify = -1
        self.file.flush()

    @staticmethod
    def rect(ax, box, color):
        x1, y1, x2, y2 = box
        w = x2 - x1
        h = y2 - y1
        shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
        ax.add_patch(shape)


if __name__ == '__main__':
    for idx, item in enumerate(LabelLoader('../wider/train/labels.txt')):
        Crop(item).crop()



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章