mtcnn-Pnet图像训练处理

标注解析

PNET训练需要对图片进行截取,截取以后才进行归一化定义,这里重新对原来的标注文件进行解析。

原始的标注格式为

0--Parade/0_Parade_marchingband_1_849.jpg
1
449 330 122 149 0 0 0 0 0 0 
0--Parade/0_Parade_Parade_0_904.jpg
1
361 98 263 339 0 0 0 0 0 0 

我们把单个的图片标记信息定义如下

import numpy as np


class Label(object):
    def __init__(self, name, prefix='.'):
        self.name = join(prefix, basename(name))
        self.bbox = []

    def addLineBox(self, line: str):
        box = np.array([int(x) for x in line.split(' ')[:4]])
        box[2] += box[0]
        box[3] += box[1]
        self.bbox.append(box)

    def boxes(self):
        return np.array(self.bbox)

整体文件解析如下

class LabelLoader(object):

    def __init__(self, label_path):
        # label位置在images同一层
        self.prefix = join(dirname(label_path), 'images')
        self.path = label_path
        self.labels = []
        self._load()

    def _load(self):
        with open(self.path, 'r') as f:
          # 去掉两端空白
            lines = [line.strip() for line in f.readlines()]
        cursor = 0
        length = len(lines)
        while cursor < length:
            # 图片名称
            label = Label(lines[cursor], prefix=self.prefix)
            # 图片数量,标记0的时候还会有一串0,因此修改为1
            count = max(1, int(lines[cursor + 1]))
            # 移动了两行
            cursor += 2
            while count > 0:
                # 每一行自动解析
                label.addLineBox(lines[cursor])
                cursor += 1
                count -= 1
            # 添加到集合
            self.labels.append(label)
        return self
		
    # 便利遍历
    def __getitem__(self, idx):
        return self.labels[idx]

    def __len__(self):
        return len(self.labels)

使用方式

if __name__ == '__main__':
    for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
        print(bel)

图片要求

在此之前,先要强调一下PNET自身的一个特色。

YOLO等网络不同,它并不一次性的直接进行回归,且不是对每一个featureMap的像素点进行座标回归。

它更像是针对单个像素,而非整张图的像素回归,这个结构特性来源于卷积,更是全卷积的功用之一。

因此,你会看见它针对的只是训练的脸框,而非全图。

详细的功用,在完工之后会进行依次全面的总结和梳理,在此只是明确针对脸框直接回归这点进行强调。

四种素材

为了更好的进行训练,采用FocalLoss的思想,总共会生成四种裁剪图

样本类型 难易程度 判断标准(iou) 数量(每张图)
iou0.65iou \ge 0.65 加上正易,共20
0.4iou<0.650.4 \le iou \lt 0.65 加上正难,共20
0<iou0.30 \lt iou \le 0.3 5
iou0.3iou \le 0.3 50

其他限制

为了生成更好的样本,也是负样本中难和易的区别,也就是截取的区域和原来标记是否相交。

尤其,在正样本中,为了取得更好判决素材,还需要去忽略太小的bbox

图片剪裁

iou

在基础的iou计算之上,我们还需要做额外的条件判断,以区分是否相交这个问题。

否则计算出来的iou会存在小于0和大于1的情况。

import numpy as np


def area(x1, y1, x2, y2):
    return (x2 - x1) * (y2 - y1)

def IOU(sample, boxes):
    inner_x1 = np.maximum(sample[:, 0], boxes[:, 0])
    inner_y1 = np.maximum(sample[:, 1], boxes[:, 1])
    inner_x2 = np.minimum(sample[:, 2], boxes[:, 2])
    inner_y2 = np.minimum(sample[:, 3], boxes[:, 3])
    # 合法的iou
    valid = np.logical_and(inner_x2 > inner_x1, inner_y2 > inner_y1)
    area_inner = area(inner_x1, inner_y1, inner_x2, inner_y2)
    area_boxes = area(boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3])
    area_sample = area(sample[:, 0], sample[:, 1], sample[:, 2], sample[:, 3])
    ious = area_inner / (area_boxes + area_sample - area_inner + 0.0000001)
    # 非法的都记做0
    ious[~valid] = 0
    return ious

裁剪

class Crop(object):

    def __init__(self, label: Label, prefix='12'):
        self.prefix = prefix
        self.label = label
        self.image = loadImage(label.name)
        self.basename = basename(label.name)
        self.height, self.width, _ = self.image.shape
        self.standardLength = min(self.height, self.width)
        self.boxes = label.boxes()
         # 排除无脸图
        self.onlyNegative = not np.any(self.boxes)
        self.negativeCounter = 0
        self.positiveCounter = 0
        self.difficultCounter = 0

        def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
        while any([
            self.negativeCounter < negativeCount,
            self.positiveCounter < positiveCount,
            self.difficultCounter < difficultCount
        ]):
            cropSize = random.randint(12, self.standardLength / 2)
            x1 = random.randint(0, self.width - cropSize)
            y1 = random.randint(0, self.height - cropSize)
            x2 = x1 + cropSize
            y2 = y1 + cropSize
            sampleBox = np.array([[x1, y1, x2, y2]])
            iou = IOU(sampleBox, self.boxes)
            cropImage = self.image[y1:y2, x1:x2, :]
            resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
            max_iou_index = np.argmax(iou)
            max_iou = iou[max_iou_index]
            if (max_iou < floor) and (self.negativeCounter < negativeCount):
                self.classify = 0
                if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
                    continue
                self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
                saveImage(resizeCorp, self.savePath)
                self.negativeCounter += 1
            elif not self.onlyNegative:
                self.difficultCounter = difficultCount
                self.positiveCounter = positiveCount
                continue
            elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
                if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
                    self.classify = 1
                    self.savePath = join(cropImageDir, self.prefix, 'positive',
                                         f'{self.positiveCounter}-{self.basename}')
                    saveImage(resizeCorp, self.savePath)
                    self.positiveCounter += 1
                elif self.difficultCounter < difficultCount:
                    self.classify = 2
                    self.savePath = join(cropImageDir, self.prefix, 'difficult',
                                         f'{self.difficultCounter}-{self.basename}')
                    saveImage(resizeCorp, self.savePath)
                    self.difficultCounter += 1
                else:
                    continue
                # 检查用
                # self.show(sampleBox[0], self.boxes[max_iou_index])
            self.saveBox(sampleBox[0], self.boxes[max_iou_index])
        self.file.close()

    def distinctBox(self, box):
        x1, y1, x2, y2 = box
        if max(x1, y1) < 0:
            return False
        w = x2 - x1
        h = y2 - y1
        valid = max(w, h) > 20
        if (len(self.boxes) == 1) and (not valid):
            self.onlyNegative = True
        return valid

    def show(self, sample, box):
        _, ax = plt.subplots()
        ax.imshow(self.image)
        self.rect(ax, sample[0], 'green')
        self.rect(ax, box, 'red')
        plt.show()

    @staticmethod
    def rect(ax, box, color):
        x1, y1, x2, y2 = box
        w = x2 - x1
        h = y2 - y1
        shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
        ax.add_patch(shape)

裁剪方法针对单个对象,逻辑浓缩,避免混乱,调用方式如下

if __name__ == '__main__':
    for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
        Crop(bel).crop()

标记数据

座标回归

为了进行框图回归,还需要将原来的座标进行归一化的格式转换。
在这里插入图片描述

数据格式不再是(x,y,w,h)(\triangle x, \triangle y, \triangle w, \triangle h),而是直接针对位置进行偏移预测
(x1,y1,x1,y2) (\triangle x_1, \triangle y_1, \triangle x_1, \triangle y_2)
归一化的尺度,还是以原来的尺度为准,具体的计算式为
{x1=x1x1x2x1y1=y1y1y2y1x2=x2x2x2x1y2=y2y2y2y1 \left\{ \begin{aligned} \triangle x_1 = \frac{x_1 - x_1 '}{x_2 - x_1} \\ \triangle y_1 = \frac{y_1 - y_1'}{y_2 - y_1} \\ \triangle x_2 = \frac{x_2 - x_2 '}{x_2 - x_1} \\ \triangle y_2 = \frac{y_2 - y_2'}{y_2 - y_1} \end{aligned} \right.

类型标记

为了更好的控制数据,对于数据样本的类型也需要进行区分

样本类型 难易程度 标记类型
1
2
0
0

这样,在训练的时候,就能够更好的进行控制了。

文件记录

from entity.label import Label, LabelLoader
from utils.image import loadImage, saveImage
import numpy as np
from numpy import random
from compute.iou import IOU
from os.path import basename, join
from config.config import negativeCount, positiveCount, difficultCount, cropImageDir
import cv2
from matplotlib import pyplot as plt


class Crop(object):

    def __init__(self, label: Label, prefix='12'):
        self.prefix = prefix
        self.label = label
        self.image = loadImage(label.name)
        self.basename = basename(label.name)
        self.height, self.width, _ = self.image.shape
        self.standardLength = min(self.height, self.width)
        self.boxes = label.boxes()
        self.negativeCounter = 0
        self.positiveCounter = 0
        self.difficultCounter = 0
        self.classify = -1
        self.savePath = None
        self.file = open(join(cropImageDir, 'labels.txt'), 'w')

    def crop(self, floor=0.3, threshold=0.4, ceiling=0.65):
        while any([
            self.negativeCounter < negativeCount,
            self.positiveCounter < positiveCount,
            self.difficultCounter < difficultCount
        ]):
            cropSize = random.randint(12, self.standardLength / 2)
            x1 = random.randint(0, self.width - cropSize)
            y1 = random.randint(0, self.height - cropSize)
            x2 = x1 + cropSize
            y2 = y1 + cropSize
            sampleBox = np.array([[x1, y1, x2, y2]])
            iou = IOU(sampleBox, self.boxes)
            cropImage = self.image[y1:y2, x1:x2, :]
            resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
            max_iou_index = np.argmax(iou)
            max_iou = iou[max_iou_index]
            if (max_iou < floor) and (self.negativeCounter < negativeCount):
                self.classify = 0
                if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
                    continue
                self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
                saveImage(resizeCorp, self.savePath)
                self.negativeCounter += 1
            elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
                if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
                    self.classify = 1
                    self.savePath = join(cropImageDir, self.prefix, 'positive',
                                         f'{self.positiveCounter}-{self.basename}')
                    saveImage(resizeCorp, self.savePath)
                    self.positiveCounter += 1
                elif self.difficultCounter < difficultCount:
                    self.classify = 2
                    self.savePath = join(cropImageDir, self.prefix, 'difficult',
                                         f'{self.difficultCounter}-{self.basename}')
                    saveImage(resizeCorp, self.savePath)
                    self.difficultCounter += 1
                else:
                    continue
                # 检查用
                self.show(sampleBox[0], self.boxes[max_iou_index])
            # 方法调用直接保存
            self.saveBox(sampleBox[0], self.boxes[max_iou_index])
        self.file.close()

    @staticmethod
    def distinctBox(box):
        x1, y1, x2, y2 = box
        if max(x1, y1) < 0:
            return False
        w = x2 - x1
        h = y2 - y1
        return max(w, h) > 20

    def show(self, sample, box):
        _, ax = plt.subplots()
        ax.imshow(self.image)
        self.rect(ax, sample, 'green')
        self.rect(ax, box, 'red')
        plt.show()

    def saveBox(self, sampleBox, standard):
      	#  非法数据不保存
        if self.classify == -1:
            return
        # 归一化
        width = standard[2] - standard[0]
        height = standard[3] - standard[1]
        sampleBox = np.array(sampleBox)
        standard = np.array(standard)
        offset = sampleBox - standard
        delta_x1 = offset[0] / width
        delta_y1 = offset[1] / height
        delta_x2 = offset[2] / width
        delta_y2 = offset[3] / height
        self.file.write(f'{self.savePath} {self.classify} {delta_x1} {delta_y1} {delta_x2} {delta_y2}\n')
        self.classify = -1

    @staticmethod
    def rect(ax, box, color):
        x1, y1, x2, y2 = box
        w = x2 - x1
        h = y2 - y1
        shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
        ax.add_patch(shape)


if __name__ == '__main__':
  	# 数据全生成
    for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
        Crop(bel).crop()

配置

def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
  ...
# crop
# threshold
floor = 0.3
threshold = 0.4
ceiling = 0.65
# count
negativeCount = 1
positiveCount = 1
difficultCount = 1
cropImageDir = '../crop'

config文件中直接配置,就可以完全的掌握阈值、配比了。


修复

完善如下要点

  • 无正样本生成条件自动结束
    如果图片中完全没脸,或者都是小脸,会无限循环
  • 文本追加记录
    原来文件会进行重写,现在统一进行直接追加
from entity.label import Label, LabelLoader
from utils.image import loadImage, saveImage
import numpy as np
from numpy import random
from compute.iou import IOU
from os.path import basename, join
from config import config
from config.config import negativeCount, positiveCount, difficultCount, cropImageDir
import cv2
from matplotlib import pyplot as plt


class Crop(object):

    def __init__(self, label: Label, prefix='12'):
        self.prefix = prefix
        self.label = label
        self.image = loadImage(label.name)
        self.basename = basename(label.name)
        self.height, self.width, _ = self.image.shape
        self.standardLength = min(self.height, self.width)
        self.boxes = label.boxes()
        self.negativeCounter = 0
        self.positiveCounter = 0
        self.difficultCounter = 0
        self.classify = -1
        self.savePath = None
        # 排除无脸图
        self.onlyNegative = True
        # 检查是否满足正样本截图标准
        self.positiveCheck()
        # 追加模式记录全数据
        self.file = open(join(cropImageDir, 'labels.txt'), 'a+')

    def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
        while any([
            self.negativeCounter < negativeCount,
            self.positiveCounter < positiveCount,
            self.difficultCounter < difficultCount
        ]):
            cropSize = random.randint(12, self.standardLength / 2)
            x1 = random.randint(0, self.width - cropSize)
            y1 = random.randint(0, self.height - cropSize)
            x2 = x1 + cropSize
            y2 = y1 + cropSize
            sampleBox = np.array([[x1, y1, x2, y2]])
            iou = IOU(sampleBox, self.boxes)
            cropImage = self.image[y1:y2, x1:x2, :]
            resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
            max_iou_index = np.argmax(iou)
            max_iou = iou[max_iou_index]
            if (max_iou < floor) and (self.negativeCounter < negativeCount):
                self.classify = 0
                if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
                    continue
                self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
                self.negativeCounter += 1
                saveImage(resizeCorp, self.savePath)
            elif self.onlyNegative:
                self.difficultCounter = difficultCount
                self.positiveCounter = positiveCount
            elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
                if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
                    self.classify = 1
                    self.savePath = join(cropImageDir, self.prefix, 'positive',
                                         f'{self.positiveCounter}-{self.basename}')
                    self.positiveCounter += 1
                    saveImage(resizeCorp, self.savePath)

                elif self.difficultCounter < difficultCount:
                    self.classify = 2
                    self.savePath = join(cropImageDir, self.prefix, 'difficult',
                                         f'{self.difficultCounter}-{self.basename}')
                    self.difficultCounter += 1
                    saveImage(resizeCorp, self.savePath)

                else:
                    continue
                # 检查用
                # self.show(sampleBox[0], self.boxes[max_iou_index])
            else:
                continue
            self.saveBox(sampleBox[0], self.boxes[max_iou_index])
        self.file.close()

    def positiveCheck(self):
        w = self.boxes[:, 2] - self.boxes[:, 0]
        h = self.boxes[:, 3] - self.boxes[:, 1]
        length = np.maximum(w, h)
        # 不存在足够大的尺寸,就只用生成负样本
        self.onlyNegative = not np.any(length > 20)

    # 判断是否满足标准
    @staticmethod
    def distinctBox(box):
        x1, y1, x2, y2 = box
        if max(x1, y1) < 0:
            return False
        w = x2 - x1
        h = y2 - y1
        return max(w, h) > 20

    def show(self, sample, box):
        _, ax = plt.subplots()
        ax.imshow(self.image)
        self.rect(ax, sample, 'green')
        self.rect(ax, box, 'red')
        plt.show()

    # 归一化的数值计算和文本存储
    def saveBox(self, sampleBox, standard):
        if self.classify == -1:
            return
        width = standard[2] - standard[0]
        height = standard[3] - standard[1]
        sampleBox = np.array(sampleBox)
        standard = np.array(standard)
        offset = sampleBox - standard
        delta_x1 = offset[0] / width
        delta_y1 = offset[1] / height
        delta_x2 = offset[2] / width
        delta_y2 = offset[3] / height
        self.file.write(f'{self.savePath} {self.classify} {delta_x1} {delta_y1} {delta_x2} {delta_y2}\n')
        self.classify = -1
        self.file.flush()

    @staticmethod
    def rect(ax, box, color):
        x1, y1, x2, y2 = box
        w = x2 - x1
        h = y2 - y1
        shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
        ax.add_patch(shape)


if __name__ == '__main__':
    for idx, item in enumerate(LabelLoader('../wider/train/labels.txt')):
        Crop(item).crop()



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章