yolov3之preprocess_true_boxes

該函數是yolov3數據預處理函數,將(x1,y1,x2,y2,class_id)的值轉換成label格式。

x1,y1是框左上角的座標,x2,y2是框右下角點的座標。class_id 類別對應的數字。

label格式爲,[l][batch,grid,grid,k,xywh,置信度,20個座標的c值] 其中,只有一個c值爲1,其餘均爲0,xy爲中心點座標,wh爲寬高,xywh均爲<1的值。l代表3個不同的尺度的網格。13*13,26*26,52*52.這個函數的作用就是把每張圖片上的每個框對應到3個維度中9大小的某一個網格中。也就是該網格負責預測該框。k代表的是[0:6,1:7,2:8], [0:3,1:4,2:5], [0:0,1:1,2:2]   ,代表的是在某一確定尺度下(已經被l決定了),三個大小中的某一個大小。

import numpy as np

true_boxes = [[[263, 211, 324, 339, 8], [165, 264, 253, 372, 8], [241, 194, 295, 299, 8], [150, 141, 229, 284, 14]],
              [[69, 172, 270, 330, 12], [150, 141, 229, 284, 14], [241, 194, 295, 299, 8], [285, 201, 327, 331, 14]]]
true_boxes = np.array(true_boxes)
print(true_boxes.shape)
input_shape = (416, 416)
anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]]
num_classes = 20


def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
    '''Preprocess true boxes to training input format
    Parameters
    ----------
    true_boxes: array, shape=(m, T, 5)   #m 代表batch——size,每個batch裏的圖片數   T代表框的個數    5(x1,y1,x2,y2,id)x1,y1 爲框的左上點,  x2,y2 爲框的右下點, id 爲框的種類數
        Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
    input_shape: array-like, hw, multiples of 32
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    Returns
    -------
    y_true: list of array, shape like yolo_outputs, xywh are reletive value
    '''
    assert (true_boxes[
                ..., 4] < num_classes).all(), 'class id must be less than num_classes'  # all 用來看數組中有無元素是0,沒有的話返回值爲true
    num_layers = len(anchors) // 3  # default setting    3  #看是tiny_yolo還是normal_yolo
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    true_boxes = np.array(true_boxes, dtype='float32')  # 要想使用[...,0:2]這種格式,一定要把list轉換成np.array的形式。
    input_shape = np.array(input_shape, dtype='int32')  # 0,1,2,3
    boxes_xy = (true_boxes[..., 0:2] + true_boxes[...,
                                       2:4]) // 2  # true_box   [[x1,y1,x2,y2]]   boxes_xy=[[x,y]]   框的左上角和右下角的點的座標
    print('boxes_xy:\n%s' % boxes_xy)
    boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
    print('boxes_wh:\n%s' % boxes_wh)
    print(boxes_wh.shape)
    true_boxes[..., 0:2] = boxes_xy / input_shape[
                                      ::-1]  # [::-1] 表示把數組倒着列出來,[3,4,5]->[5,4,3] 把xywh歸一化,這裏有一個問題,關於輸入進來的寬高是倒着的嗎?這裏爲什麼shape要-1呢。
    true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]  # true_boxes裏面的值是介於0-1之間的數。
    print('true_boxes:\n%s' % true_boxes)
    m = true_boxes.shape[0]  # true_box 的num
    grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]  # 這裏是網格的大小,
    print('grid_shapes:\n%s' % grid_shapes)  #
    y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + num_classes),
                       dtype='float32') for l in range(num_layers)]

    # Expand dim to apply broadcasting.
    anchors = np.expand_dims(anchors, 0)
    anchors = np.array(anchors)
    a_s = anchors.shape
    print(a_s)
    print('anchors:\n%s' % anchors)
    anchor_maxes = anchors / 2.
    print('anchor_maxes:\n%s' % anchor_maxes)
    anchor_mins = -anchor_maxes
    print('anchor_mins:\n%s' % anchor_mins)
    valid_mask = boxes_wh[..., 0] > 0  # 降維了(m,T)
    print('valid_mask:\n%s' % valid_mask)
    # 對每一張圖片,t個框
    for b in range(1):
        # Discard zero rows.
        wh = boxes_wh[b, valid_mask[b]]  # 打印出每張圖片上,合格的框。
        print('----------')
        print(valid_mask[b])
        print(wh)
        if len(wh) == 0: continue
        # Expand dim to apply broadcasting.
        wh = np.expand_dims(wh, -2)  # (,1,)
        wh = np.array(wh)
        print(wh)
        print(wh.shape)
        print('wh:\n%s' % wh)
        box_maxes = wh / 2.
        box_mins = -box_maxes
        print('box_mins:\n%s' % box_mins)
        # 這個運算的結果是,每個true_box 和 每個anchor(9個)的wh比最大  所以是,4,9,2   box_max (4,1,2)   anchor (1,9,2)
        intersect_mins = np.maximum(box_mins, anchor_mins)
        print(intersect_mins)
        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
        box_area = wh[..., 0] * wh[..., 1]
        anchor_area = anchors[..., 0] * anchors[..., 1]
        iou = intersect_area / (box_area + anchor_area - intersect_area)
        print('iou:\n%s' % iou)  # (4,9)
        # Find best anchor for each true box
        best_anchor = np.argmax(iou, axis=-1)
        print('best_anchor:\n%s' % best_anchor)  # 4
        print(
            '--------------------------------------------------------------------------------------------------------')
        for t, n in enumerate(best_anchor):

            for l in range(num_layers):
                if n in anchor_mask[l]:
                    i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32')
                    j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32')
                    k = anchor_mask[l].index(n)
                    c = true_boxes[b, t, 4].astype('int32')
                    y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
                    y_true[l][b, j, i, k, 4] = 1
                    y_true[l][b, j, i, k, 5 + c] = 1
                    print(i,j,k,c)

    return y_true


# 也就是把每張圖片的每個框放入到哪個大小的層管理的哪個網格中.xywh_value<1
# 這個函數的作用就是把一個batch的m張圖片,每張圖片有T個框,然後把這些框放入到y_true中,y_true 就是 [012][batch,grid_cell,grid_cell,k,x1,y1,c1,z1,置信度,20個種類]
preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes)
# 存疑:what the k it is?
# 這個k就是0\1\2 就是代表三個尺度

運行結果:

(2, 4, 5)
boxes_xy:
[[[293. 275.]
  [209. 318.]
  [268. 246.]
  [189. 212.]]

 [[169. 251.]
  [189. 212.]
  [268. 246.]
  [306. 266.]]]
boxes_wh:
[[[ 61. 128.]
  [ 88. 108.]
  [ 54. 105.]
  [ 79. 143.]]

 [[201. 158.]
  [ 79. 143.]
  [ 54. 105.]
  [ 42. 130.]]]
(2, 4, 2)
true_boxes:
[[[ 0.7043269   0.6610577   0.14663461  0.30769232  8.        ]
  [ 0.50240386  0.7644231   0.21153846  0.2596154   8.        ]
  [ 0.6442308   0.59134614  0.1298077   0.25240386  8.        ]
  [ 0.45432693  0.50961536  0.18990384  0.34375    14.        ]]

 [[ 0.40625     0.60336536  0.48317307  0.37980768 12.        ]
  [ 0.45432693  0.50961536  0.18990384  0.34375    14.        ]
  [ 0.6442308   0.59134614  0.1298077   0.25240386  8.        ]
  [ 0.7355769   0.6394231   0.10096154  0.3125     14.        ]]]
grid_shapes:
[array([13, 13], dtype=int32), array([26, 26], dtype=int32), array([52, 52], dtype=int32)]
(1, 9, 2)
anchors:
[[[ 10  13]
  [ 16  30]
  [ 33  23]
  [ 30  61]
  [ 62  45]
  [ 59 119]
  [116  90]
  [156 198]
  [373 326]]]
anchor_maxes:
[[[  5.    6.5]
  [  8.   15. ]
  [ 16.5  11.5]
  [ 15.   30.5]
  [ 31.   22.5]
  [ 29.5  59.5]
  [ 58.   45. ]
  [ 78.   99. ]
  [186.5 163. ]]]
anchor_mins:
[[[  -5.    -6.5]
  [  -8.   -15. ]
  [ -16.5  -11.5]
  [ -15.   -30.5]
  [ -31.   -22.5]
  [ -29.5  -59.5]
  [ -58.   -45. ]
  [ -78.   -99. ]
  [-186.5 -163. ]]]
valid_mask:
[[ True  True  True  True]
 [ True  True  True  True]]
----------
[ True  True  True  True]
[[ 61. 128.]
 [ 88. 108.]
 [ 54. 105.]
 [ 79. 143.]]
[[[ 61. 128.]]

 [[ 88. 108.]]

 [[ 54. 105.]]

 [[ 79. 143.]]]
(4, 1, 2)
wh:
[[[ 61. 128.]]

 [[ 88. 108.]]

 [[ 54. 105.]]

 [[ 79. 143.]]]
box_mins:
[[[-30.5 -64. ]]

 [[-44.  -54. ]]

 [[-27.  -52.5]]

 [[-39.5 -71.5]]]
[[[ -5.   -6.5]
  [ -8.  -15. ]
  [-16.5 -11.5]
  [-15.  -30.5]
  [-30.5 -22.5]
  [-29.5 -59.5]
  [-30.5 -45. ]
  [-30.5 -64. ]
  [-30.5 -64. ]]

 [[ -5.   -6.5]
  [ -8.  -15. ]
  [-16.5 -11.5]
  [-15.  -30.5]
  [-31.  -22.5]
  [-29.5 -54. ]
  [-44.  -45. ]
  [-44.  -54. ]
  [-44.  -54. ]]

 [[ -5.   -6.5]
  [ -8.  -15. ]
  [-16.5 -11.5]
  [-15.  -30.5]
  [-27.  -22.5]
  [-27.  -52.5]
  [-27.  -45. ]
  [-27.  -52.5]
  [-27.  -52.5]]

 [[ -5.   -6.5]
  [ -8.  -15. ]
  [-16.5 -11.5]
  [-15.  -30.5]
  [-31.  -22.5]
  [-29.5 -59.5]
  [-39.5 -45. ]
  [-39.5 -71.5]
  [-39.5 -71.5]]]
iou:
[[0.01664959 0.06147541 0.09720799 0.234375   0.34954794 0.89920594
  0.43031823 0.25278425 0.06421158]
 [0.01367845 0.05050505 0.07986111 0.19255051 0.29356061 0.62759775
  0.65868263 0.30769231 0.07815918]
 [0.02292769 0.08465608 0.13386243 0.32275132 0.40298507 0.80757727
  0.432      0.18356643 0.04662906]
 [0.01150748 0.04248916 0.06718598 0.16198991 0.24696822 0.62149243
  0.48608737 0.36574074 0.09290449]]
best_anchor:
[5 6 5 5]
--------------------------------------------------------------------------------------------------------
18 17 2 8
6 9 0 8
16 15 2 8
11 13 2 14

其中一個,true_box

 [[ 0.40625     0.60336536  0.48317307  0.37980768 12.        ]
  [ 0.45432693  0.50961536  0.18990384  0.34375    14.        ]
  [ 0.6442308   0.59134614  0.1298077   0.25240386  8.        ]
  [ 0.7355769   0.6394231   0.10096154  0.3125     14.           ]]

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章