标注解析
PNET
训练需要对图片进行截取,截取以后才进行归一化定义,这里重新对原来的标注文件进行解析。
原始的标注格式为
0--Parade/0_Parade_marchingband_1_849.jpg
1
449 330 122 149 0 0 0 0 0 0
0--Parade/0_Parade_Parade_0_904.jpg
1
361 98 263 339 0 0 0 0 0 0
我们把单个的图片标记信息定义如下
import numpy as np
class Label(object):
def __init__(self, name, prefix='.'):
self.name = join(prefix, basename(name))
self.bbox = []
def addLineBox(self, line: str):
box = np.array([int(x) for x in line.split(' ')[:4]])
box[2] += box[0]
box[3] += box[1]
self.bbox.append(box)
def boxes(self):
return np.array(self.bbox)
整体文件解析如下
class LabelLoader(object):
def __init__(self, label_path):
# label位置在images同一层
self.prefix = join(dirname(label_path), 'images')
self.path = label_path
self.labels = []
self._load()
def _load(self):
with open(self.path, 'r') as f:
# 去掉两端空白
lines = [line.strip() for line in f.readlines()]
cursor = 0
length = len(lines)
while cursor < length:
# 图片名称
label = Label(lines[cursor], prefix=self.prefix)
# 图片数量,标记0的时候还会有一串0,因此修改为1
count = max(1, int(lines[cursor + 1]))
# 移动了两行
cursor += 2
while count > 0:
# 每一行自动解析
label.addLineBox(lines[cursor])
cursor += 1
count -= 1
# 添加到集合
self.labels.append(label)
return self
# 便利遍历
def __getitem__(self, idx):
return self.labels[idx]
def __len__(self):
return len(self.labels)
使用方式
if __name__ == '__main__':
for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
print(bel)
图片要求
在此之前,先要强调一下
PNET
自身的一个特色。和
YOLO
等网络不同,它并不一次性的直接进行回归,且不是对每一个featureMap
的像素点进行座标回归。它更像是针对单个像素,而非整张图的像素回归,这个结构特性来源于卷积,更是全卷积的功用之一。
因此,你会看见它针对的只是训练的脸框,而非全图。
详细的功用,在完工之后会进行依次全面的总结和梳理,在此只是明确针对脸框直接回归这点进行强调。
四种素材
为了更好的进行训练,采用FocalLoss
的思想,总共会生成四种裁剪图
样本类型 | 难易程度 | 判断标准(iou ) |
数量(每张图 ) |
---|---|---|---|
正 | 难 | 加上正易,共20 | |
正 | 易 | 加上正难,共20 | |
负 | 难 | 5 | |
负 | 易 | 50 |
其他限制
为了生成更好的样本,也是负样本中难和易的区别,也就是截取的区域和原来标记是否相交。
尤其,在正样本中,为了取得更好判决素材,还需要去忽略太小的bbox
。
图片剪裁
iou
在基础的
iou
计算之上,我们还需要做额外的条件判断,以区分是否相交这个问题。否则计算出来的
iou
会存在小于0和大于1的情况。
import numpy as np
def area(x1, y1, x2, y2):
return (x2 - x1) * (y2 - y1)
def IOU(sample, boxes):
inner_x1 = np.maximum(sample[:, 0], boxes[:, 0])
inner_y1 = np.maximum(sample[:, 1], boxes[:, 1])
inner_x2 = np.minimum(sample[:, 2], boxes[:, 2])
inner_y2 = np.minimum(sample[:, 3], boxes[:, 3])
# 合法的iou
valid = np.logical_and(inner_x2 > inner_x1, inner_y2 > inner_y1)
area_inner = area(inner_x1, inner_y1, inner_x2, inner_y2)
area_boxes = area(boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3])
area_sample = area(sample[:, 0], sample[:, 1], sample[:, 2], sample[:, 3])
ious = area_inner / (area_boxes + area_sample - area_inner + 0.0000001)
# 非法的都记做0
ious[~valid] = 0
return ious
裁剪
class Crop(object):
def __init__(self, label: Label, prefix='12'):
self.prefix = prefix
self.label = label
self.image = loadImage(label.name)
self.basename = basename(label.name)
self.height, self.width, _ = self.image.shape
self.standardLength = min(self.height, self.width)
self.boxes = label.boxes()
# 排除无脸图
self.onlyNegative = not np.any(self.boxes)
self.negativeCounter = 0
self.positiveCounter = 0
self.difficultCounter = 0
def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
while any([
self.negativeCounter < negativeCount,
self.positiveCounter < positiveCount,
self.difficultCounter < difficultCount
]):
cropSize = random.randint(12, self.standardLength / 2)
x1 = random.randint(0, self.width - cropSize)
y1 = random.randint(0, self.height - cropSize)
x2 = x1 + cropSize
y2 = y1 + cropSize
sampleBox = np.array([[x1, y1, x2, y2]])
iou = IOU(sampleBox, self.boxes)
cropImage = self.image[y1:y2, x1:x2, :]
resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
max_iou_index = np.argmax(iou)
max_iou = iou[max_iou_index]
if (max_iou < floor) and (self.negativeCounter < negativeCount):
self.classify = 0
if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
continue
self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
saveImage(resizeCorp, self.savePath)
self.negativeCounter += 1
elif not self.onlyNegative:
self.difficultCounter = difficultCount
self.positiveCounter = positiveCount
continue
elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
self.classify = 1
self.savePath = join(cropImageDir, self.prefix, 'positive',
f'{self.positiveCounter}-{self.basename}')
saveImage(resizeCorp, self.savePath)
self.positiveCounter += 1
elif self.difficultCounter < difficultCount:
self.classify = 2
self.savePath = join(cropImageDir, self.prefix, 'difficult',
f'{self.difficultCounter}-{self.basename}')
saveImage(resizeCorp, self.savePath)
self.difficultCounter += 1
else:
continue
# 检查用
# self.show(sampleBox[0], self.boxes[max_iou_index])
self.saveBox(sampleBox[0], self.boxes[max_iou_index])
self.file.close()
def distinctBox(self, box):
x1, y1, x2, y2 = box
if max(x1, y1) < 0:
return False
w = x2 - x1
h = y2 - y1
valid = max(w, h) > 20
if (len(self.boxes) == 1) and (not valid):
self.onlyNegative = True
return valid
def show(self, sample, box):
_, ax = plt.subplots()
ax.imshow(self.image)
self.rect(ax, sample[0], 'green')
self.rect(ax, box, 'red')
plt.show()
@staticmethod
def rect(ax, box, color):
x1, y1, x2, y2 = box
w = x2 - x1
h = y2 - y1
shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
ax.add_patch(shape)
裁剪方法针对单个对象,逻辑浓缩,避免混乱,调用方式如下
if __name__ == '__main__':
for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
Crop(bel).crop()
标记数据
座标回归
为了进行框图回归,还需要将原来的座标进行归一化的格式转换。
数据格式不再是,而是直接针对位置进行偏移预测
归一化的尺度,还是以原来的尺度为准,具体的计算式为
类型标记
为了更好的控制数据,对于数据样本的类型也需要进行区分
样本类型 | 难易程度 | 标记类型 |
---|---|---|
正 | 难 | 1 |
正 | 易 | 2 |
负 | 难 | 0 |
负 | 易 | 0 |
这样,在训练的时候,就能够更好的进行控制了。
文件记录
from entity.label import Label, LabelLoader
from utils.image import loadImage, saveImage
import numpy as np
from numpy import random
from compute.iou import IOU
from os.path import basename, join
from config.config import negativeCount, positiveCount, difficultCount, cropImageDir
import cv2
from matplotlib import pyplot as plt
class Crop(object):
def __init__(self, label: Label, prefix='12'):
self.prefix = prefix
self.label = label
self.image = loadImage(label.name)
self.basename = basename(label.name)
self.height, self.width, _ = self.image.shape
self.standardLength = min(self.height, self.width)
self.boxes = label.boxes()
self.negativeCounter = 0
self.positiveCounter = 0
self.difficultCounter = 0
self.classify = -1
self.savePath = None
self.file = open(join(cropImageDir, 'labels.txt'), 'w')
def crop(self, floor=0.3, threshold=0.4, ceiling=0.65):
while any([
self.negativeCounter < negativeCount,
self.positiveCounter < positiveCount,
self.difficultCounter < difficultCount
]):
cropSize = random.randint(12, self.standardLength / 2)
x1 = random.randint(0, self.width - cropSize)
y1 = random.randint(0, self.height - cropSize)
x2 = x1 + cropSize
y2 = y1 + cropSize
sampleBox = np.array([[x1, y1, x2, y2]])
iou = IOU(sampleBox, self.boxes)
cropImage = self.image[y1:y2, x1:x2, :]
resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
max_iou_index = np.argmax(iou)
max_iou = iou[max_iou_index]
if (max_iou < floor) and (self.negativeCounter < negativeCount):
self.classify = 0
if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
continue
self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
saveImage(resizeCorp, self.savePath)
self.negativeCounter += 1
elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
self.classify = 1
self.savePath = join(cropImageDir, self.prefix, 'positive',
f'{self.positiveCounter}-{self.basename}')
saveImage(resizeCorp, self.savePath)
self.positiveCounter += 1
elif self.difficultCounter < difficultCount:
self.classify = 2
self.savePath = join(cropImageDir, self.prefix, 'difficult',
f'{self.difficultCounter}-{self.basename}')
saveImage(resizeCorp, self.savePath)
self.difficultCounter += 1
else:
continue
# 检查用
self.show(sampleBox[0], self.boxes[max_iou_index])
# 方法调用直接保存
self.saveBox(sampleBox[0], self.boxes[max_iou_index])
self.file.close()
@staticmethod
def distinctBox(box):
x1, y1, x2, y2 = box
if max(x1, y1) < 0:
return False
w = x2 - x1
h = y2 - y1
return max(w, h) > 20
def show(self, sample, box):
_, ax = plt.subplots()
ax.imshow(self.image)
self.rect(ax, sample, 'green')
self.rect(ax, box, 'red')
plt.show()
def saveBox(self, sampleBox, standard):
# 非法数据不保存
if self.classify == -1:
return
# 归一化
width = standard[2] - standard[0]
height = standard[3] - standard[1]
sampleBox = np.array(sampleBox)
standard = np.array(standard)
offset = sampleBox - standard
delta_x1 = offset[0] / width
delta_y1 = offset[1] / height
delta_x2 = offset[2] / width
delta_y2 = offset[3] / height
self.file.write(f'{self.savePath} {self.classify} {delta_x1} {delta_y1} {delta_x2} {delta_y2}\n')
self.classify = -1
@staticmethod
def rect(ax, box, color):
x1, y1, x2, y2 = box
w = x2 - x1
h = y2 - y1
shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
ax.add_patch(shape)
if __name__ == '__main__':
# 数据全生成
for index, bel in enumerate(LabelLoader('../wider/train/labels.txt')):
Crop(bel).crop()
配置
def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
...
# crop
# threshold
floor = 0.3
threshold = 0.4
ceiling = 0.65
# count
negativeCount = 1
positiveCount = 1
difficultCount = 1
cropImageDir = '../crop'
在config
文件中直接配置,就可以完全的掌握阈值、配比了。
修复
完善如下要点
- 无正样本生成条件自动结束
如果图片中完全没脸,或者都是小脸,会无限循环 - 文本追加记录
原来文件会进行重写,现在统一进行直接追加
from entity.label import Label, LabelLoader
from utils.image import loadImage, saveImage
import numpy as np
from numpy import random
from compute.iou import IOU
from os.path import basename, join
from config import config
from config.config import negativeCount, positiveCount, difficultCount, cropImageDir
import cv2
from matplotlib import pyplot as plt
class Crop(object):
def __init__(self, label: Label, prefix='12'):
self.prefix = prefix
self.label = label
self.image = loadImage(label.name)
self.basename = basename(label.name)
self.height, self.width, _ = self.image.shape
self.standardLength = min(self.height, self.width)
self.boxes = label.boxes()
self.negativeCounter = 0
self.positiveCounter = 0
self.difficultCounter = 0
self.classify = -1
self.savePath = None
# 排除无脸图
self.onlyNegative = True
# 检查是否满足正样本截图标准
self.positiveCheck()
# 追加模式记录全数据
self.file = open(join(cropImageDir, 'labels.txt'), 'a+')
def crop(self, floor=config.floor, threshold=config.threshold, ceiling=config.ceiling):
while any([
self.negativeCounter < negativeCount,
self.positiveCounter < positiveCount,
self.difficultCounter < difficultCount
]):
cropSize = random.randint(12, self.standardLength / 2)
x1 = random.randint(0, self.width - cropSize)
y1 = random.randint(0, self.height - cropSize)
x2 = x1 + cropSize
y2 = y1 + cropSize
sampleBox = np.array([[x1, y1, x2, y2]])
iou = IOU(sampleBox, self.boxes)
cropImage = self.image[y1:y2, x1:x2, :]
resizeCorp = cv2.resize(cropImage, (12, 12), interpolation=cv2.INTER_LINEAR)
max_iou_index = np.argmax(iou)
max_iou = iou[max_iou_index]
if (max_iou < floor) and (self.negativeCounter < negativeCount):
self.classify = 0
if (max_iou == 0) and (self.negativeCounter > negativeCount - 5):
continue
self.savePath = join(cropImageDir, self.prefix, 'negative', f'{self.negativeCounter}-{self.basename}')
self.negativeCounter += 1
saveImage(resizeCorp, self.savePath)
elif self.onlyNegative:
self.difficultCounter = difficultCount
self.positiveCounter = positiveCount
elif self.distinctBox(self.boxes[max_iou_index]) and (max_iou > threshold):
if (max_iou > ceiling) and (self.positiveCounter < positiveCount):
self.classify = 1
self.savePath = join(cropImageDir, self.prefix, 'positive',
f'{self.positiveCounter}-{self.basename}')
self.positiveCounter += 1
saveImage(resizeCorp, self.savePath)
elif self.difficultCounter < difficultCount:
self.classify = 2
self.savePath = join(cropImageDir, self.prefix, 'difficult',
f'{self.difficultCounter}-{self.basename}')
self.difficultCounter += 1
saveImage(resizeCorp, self.savePath)
else:
continue
# 检查用
# self.show(sampleBox[0], self.boxes[max_iou_index])
else:
continue
self.saveBox(sampleBox[0], self.boxes[max_iou_index])
self.file.close()
def positiveCheck(self):
w = self.boxes[:, 2] - self.boxes[:, 0]
h = self.boxes[:, 3] - self.boxes[:, 1]
length = np.maximum(w, h)
# 不存在足够大的尺寸,就只用生成负样本
self.onlyNegative = not np.any(length > 20)
# 判断是否满足标准
@staticmethod
def distinctBox(box):
x1, y1, x2, y2 = box
if max(x1, y1) < 0:
return False
w = x2 - x1
h = y2 - y1
return max(w, h) > 20
def show(self, sample, box):
_, ax = plt.subplots()
ax.imshow(self.image)
self.rect(ax, sample, 'green')
self.rect(ax, box, 'red')
plt.show()
# 归一化的数值计算和文本存储
def saveBox(self, sampleBox, standard):
if self.classify == -1:
return
width = standard[2] - standard[0]
height = standard[3] - standard[1]
sampleBox = np.array(sampleBox)
standard = np.array(standard)
offset = sampleBox - standard
delta_x1 = offset[0] / width
delta_y1 = offset[1] / height
delta_x2 = offset[2] / width
delta_y2 = offset[3] / height
self.file.write(f'{self.savePath} {self.classify} {delta_x1} {delta_y1} {delta_x2} {delta_y2}\n')
self.classify = -1
self.file.flush()
@staticmethod
def rect(ax, box, color):
x1, y1, x2, y2 = box
w = x2 - x1
h = y2 - y1
shape = plt.Rectangle((x1, y1), w, h, linewidth=3, color=color)
ax.add_patch(shape)
if __name__ == '__main__':
for idx, item in enumerate(LabelLoader('../wider/train/labels.txt')):
Crop(item).crop()