該函數是yolov3數據預處理函數,將(x1,y1,x2,y2,class_id)的值轉換成label格式。
x1,y1是框左上角的座標,x2,y2是框右下角點的座標。class_id 類別對應的數字。
label格式爲,[l][batch,grid,grid,k,xywh,置信度,20個座標的c值] 其中,只有一個c值爲1,其餘均爲0,xy爲中心點座標,wh爲寬高,xywh均爲<1的值。l代表3個不同的尺度的網格。13*13,26*26,52*52.這個函數的作用就是把每張圖片上的每個框對應到3個維度中9大小的某一個網格中。也就是該網格負責預測該框。k代表的是[0:6,1:7,2:8], [0:3,1:4,2:5], [0:0,1:1,2:2] ,代表的是在某一確定尺度下(已經被l決定了),三個大小中的某一個大小。
import numpy as np
true_boxes = [[[263, 211, 324, 339, 8], [165, 264, 253, 372, 8], [241, 194, 295, 299, 8], [150, 141, 229, 284, 14]],
[[69, 172, 270, 330, 12], [150, 141, 229, 284, 14], [241, 194, 295, 299, 8], [285, 201, 327, 331, 14]]]
true_boxes = np.array(true_boxes)
print(true_boxes.shape)
input_shape = (416, 416)
anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]]
num_classes = 20
def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
'''Preprocess true boxes to training input format
Parameters
----------
true_boxes: array, shape=(m, T, 5) #m 代表batch——size,每個batch裏的圖片數 T代表框的個數 5(x1,y1,x2,y2,id)x1,y1 爲框的左上點, x2,y2 爲框的右下點, id 爲框的種類數
Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
input_shape: array-like, hw, multiples of 32
anchors: array, shape=(N, 2), wh
num_classes: integer
Returns
-------
y_true: list of array, shape like yolo_outputs, xywh are reletive value
'''
assert (true_boxes[
..., 4] < num_classes).all(), 'class id must be less than num_classes' # all 用來看數組中有無元素是0,沒有的話返回值爲true
num_layers = len(anchors) // 3 # default setting 3 #看是tiny_yolo還是normal_yolo
anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
true_boxes = np.array(true_boxes, dtype='float32') # 要想使用[...,0:2]這種格式,一定要把list轉換成np.array的形式。
input_shape = np.array(input_shape, dtype='int32') # 0,1,2,3
boxes_xy = (true_boxes[..., 0:2] + true_boxes[...,
2:4]) // 2 # true_box [[x1,y1,x2,y2]] boxes_xy=[[x,y]] 框的左上角和右下角的點的座標
print('boxes_xy:\n%s' % boxes_xy)
boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
print('boxes_wh:\n%s' % boxes_wh)
print(boxes_wh.shape)
true_boxes[..., 0:2] = boxes_xy / input_shape[
::-1] # [::-1] 表示把數組倒着列出來,[3,4,5]->[5,4,3] 把xywh歸一化,這裏有一個問題,關於輸入進來的寬高是倒着的嗎?這裏爲什麼shape要-1呢。
true_boxes[..., 2:4] = boxes_wh / input_shape[::-1] # true_boxes裏面的值是介於0-1之間的數。
print('true_boxes:\n%s' % true_boxes)
m = true_boxes.shape[0] # true_box 的num
grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)] # 這裏是網格的大小,
print('grid_shapes:\n%s' % grid_shapes) #
y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + num_classes),
dtype='float32') for l in range(num_layers)]
# Expand dim to apply broadcasting.
anchors = np.expand_dims(anchors, 0)
anchors = np.array(anchors)
a_s = anchors.shape
print(a_s)
print('anchors:\n%s' % anchors)
anchor_maxes = anchors / 2.
print('anchor_maxes:\n%s' % anchor_maxes)
anchor_mins = -anchor_maxes
print('anchor_mins:\n%s' % anchor_mins)
valid_mask = boxes_wh[..., 0] > 0 # 降維了(m,T)
print('valid_mask:\n%s' % valid_mask)
# 對每一張圖片,t個框
for b in range(1):
# Discard zero rows.
wh = boxes_wh[b, valid_mask[b]] # 打印出每張圖片上,合格的框。
print('----------')
print(valid_mask[b])
print(wh)
if len(wh) == 0: continue
# Expand dim to apply broadcasting.
wh = np.expand_dims(wh, -2) # (,1,)
wh = np.array(wh)
print(wh)
print(wh.shape)
print('wh:\n%s' % wh)
box_maxes = wh / 2.
box_mins = -box_maxes
print('box_mins:\n%s' % box_mins)
# 這個運算的結果是,每個true_box 和 每個anchor(9個)的wh比最大 所以是,4,9,2 box_max (4,1,2) anchor (1,9,2)
intersect_mins = np.maximum(box_mins, anchor_mins)
print(intersect_mins)
intersect_maxes = np.minimum(box_maxes, anchor_maxes)
intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
box_area = wh[..., 0] * wh[..., 1]
anchor_area = anchors[..., 0] * anchors[..., 1]
iou = intersect_area / (box_area + anchor_area - intersect_area)
print('iou:\n%s' % iou) # (4,9)
# Find best anchor for each true box
best_anchor = np.argmax(iou, axis=-1)
print('best_anchor:\n%s' % best_anchor) # 4
print(
'--------------------------------------------------------------------------------------------------------')
for t, n in enumerate(best_anchor):
for l in range(num_layers):
if n in anchor_mask[l]:
i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32')
j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32')
k = anchor_mask[l].index(n)
c = true_boxes[b, t, 4].astype('int32')
y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
y_true[l][b, j, i, k, 4] = 1
y_true[l][b, j, i, k, 5 + c] = 1
print(i,j,k,c)
return y_true
# 也就是把每張圖片的每個框放入到哪個大小的層管理的哪個網格中.xywh_value<1
# 這個函數的作用就是把一個batch的m張圖片,每張圖片有T個框,然後把這些框放入到y_true中,y_true 就是 [012][batch,grid_cell,grid_cell,k,x1,y1,c1,z1,置信度,20個種類]
preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes)
# 存疑:what the k it is?
# 這個k就是0\1\2 就是代表三個尺度
運行結果:
(2, 4, 5)
boxes_xy:
[[[293. 275.]
[209. 318.]
[268. 246.]
[189. 212.]]
[[169. 251.]
[189. 212.]
[268. 246.]
[306. 266.]]]
boxes_wh:
[[[ 61. 128.]
[ 88. 108.]
[ 54. 105.]
[ 79. 143.]]
[[201. 158.]
[ 79. 143.]
[ 54. 105.]
[ 42. 130.]]]
(2, 4, 2)
true_boxes:
[[[ 0.7043269 0.6610577 0.14663461 0.30769232 8. ]
[ 0.50240386 0.7644231 0.21153846 0.2596154 8. ]
[ 0.6442308 0.59134614 0.1298077 0.25240386 8. ]
[ 0.45432693 0.50961536 0.18990384 0.34375 14. ]]
[[ 0.40625 0.60336536 0.48317307 0.37980768 12. ]
[ 0.45432693 0.50961536 0.18990384 0.34375 14. ]
[ 0.6442308 0.59134614 0.1298077 0.25240386 8. ]
[ 0.7355769 0.6394231 0.10096154 0.3125 14. ]]]
grid_shapes:
[array([13, 13], dtype=int32), array([26, 26], dtype=int32), array([52, 52], dtype=int32)]
(1, 9, 2)
anchors:
[[[ 10 13]
[ 16 30]
[ 33 23]
[ 30 61]
[ 62 45]
[ 59 119]
[116 90]
[156 198]
[373 326]]]
anchor_maxes:
[[[ 5. 6.5]
[ 8. 15. ]
[ 16.5 11.5]
[ 15. 30.5]
[ 31. 22.5]
[ 29.5 59.5]
[ 58. 45. ]
[ 78. 99. ]
[186.5 163. ]]]
anchor_mins:
[[[ -5. -6.5]
[ -8. -15. ]
[ -16.5 -11.5]
[ -15. -30.5]
[ -31. -22.5]
[ -29.5 -59.5]
[ -58. -45. ]
[ -78. -99. ]
[-186.5 -163. ]]]
valid_mask:
[[ True True True True]
[ True True True True]]
----------
[ True True True True]
[[ 61. 128.]
[ 88. 108.]
[ 54. 105.]
[ 79. 143.]]
[[[ 61. 128.]]
[[ 88. 108.]]
[[ 54. 105.]]
[[ 79. 143.]]]
(4, 1, 2)
wh:
[[[ 61. 128.]]
[[ 88. 108.]]
[[ 54. 105.]]
[[ 79. 143.]]]
box_mins:
[[[-30.5 -64. ]]
[[-44. -54. ]]
[[-27. -52.5]]
[[-39.5 -71.5]]]
[[[ -5. -6.5]
[ -8. -15. ]
[-16.5 -11.5]
[-15. -30.5]
[-30.5 -22.5]
[-29.5 -59.5]
[-30.5 -45. ]
[-30.5 -64. ]
[-30.5 -64. ]]
[[ -5. -6.5]
[ -8. -15. ]
[-16.5 -11.5]
[-15. -30.5]
[-31. -22.5]
[-29.5 -54. ]
[-44. -45. ]
[-44. -54. ]
[-44. -54. ]]
[[ -5. -6.5]
[ -8. -15. ]
[-16.5 -11.5]
[-15. -30.5]
[-27. -22.5]
[-27. -52.5]
[-27. -45. ]
[-27. -52.5]
[-27. -52.5]]
[[ -5. -6.5]
[ -8. -15. ]
[-16.5 -11.5]
[-15. -30.5]
[-31. -22.5]
[-29.5 -59.5]
[-39.5 -45. ]
[-39.5 -71.5]
[-39.5 -71.5]]]
iou:
[[0.01664959 0.06147541 0.09720799 0.234375 0.34954794 0.89920594
0.43031823 0.25278425 0.06421158]
[0.01367845 0.05050505 0.07986111 0.19255051 0.29356061 0.62759775
0.65868263 0.30769231 0.07815918]
[0.02292769 0.08465608 0.13386243 0.32275132 0.40298507 0.80757727
0.432 0.18356643 0.04662906]
[0.01150748 0.04248916 0.06718598 0.16198991 0.24696822 0.62149243
0.48608737 0.36574074 0.09290449]]
best_anchor:
[5 6 5 5]
--------------------------------------------------------------------------------------------------------
18 17 2 8
6 9 0 8
16 15 2 8
11 13 2 14
其中一個,true_box
[[ 0.40625 0.60336536 0.48317307 0.37980768 12. ]
[ 0.45432693 0.50961536 0.18990384 0.34375 14. ]
[ 0.6442308 0.59134614 0.1298077 0.25240386 8. ]
[ 0.7355769 0.6394231 0.10096154 0.3125 14. ]]