在此之前，我們主要實現了相關數據的解析，預處理等準備工作，以及對應Faster RCNN的相關網絡模塊搭建。接下來我們接着實現其他部分。

一、從RPN網絡到ROIPooling層

在上一篇中，我們實現了一個自定義的ROIPooling層，這次我們看下如何建立RPN與ROIpool層之間的聯繫。下面，我們看下如何代碼實現，通過RPN網絡的輸出，來指定對應ROIPing層的輸入。

def rpn_to_roi(rpn_cls_layer, rpn_regr_layer, C, use_regr=True, max_boxes=300,overlap_thresh=0.9):
    '''
    建立rpn網絡與roi pooling層的連接
    通過rpn網絡的輸出，找出對應的roi
    :param rpn_cls_layer:  rpn網絡的分類輸出
    :param rpn_regr_layer:  rpn網絡的迴歸輸出
    :param C:
    :param dim_ordering:
    :param use_regr:
    :param max_boxes:
    :param overlap_thresh:
    :return:
    '''
    regr_layer = rpn_regr_layer / C.std_scaling

    anchor_sizes = C.anchor_box_scales
    anchor_ratios = C.anchor_box_ratios

    assert rpn_cls_layer.shape[0] == 1
    (rows, cols) = rpn_cls_layer.shape[1:3]

    curr_layer = 0
    # A.shape = (4個在feature_map上的對應位置信息（左上角和右下角座標）， feature_map_height, feature_map_wigth, k(9))
    A = np.zeros((4, rpn_cls_layer.shape[1], rpn_cls_layer.shape[2], rpn_cls_layer.shape[3]))
    for anchor_size in anchor_sizes:
        for anchor_ratio in anchor_ratios:

            anchor_x = (anchor_size * anchor_ratio[0])/C.rpn_stride   #對應anchor在feature map上的寬度
            anchor_y = (anchor_size * anchor_ratio[1])/C.rpn_stride   #對應anchor在feature map上的高度
            # if dim_ordering == 'th':
            #     regr = regr_layer[0, 4 * curr_layer:4 * curr_layer + 4, :, :]
            # else:
            #     regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4]  #當前anchor對應迴歸值
            #     regr = np.transpose(regr, (2, 0, 1))
            regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4]  # 當前anchor對應迴歸值
            X, Y = np.meshgrid(np.arange(cols), np.arange(rows))

            A[0, :, :, curr_layer] = X - anchor_x/2   #左上點橫座標
            A[1, :, :, curr_layer] = Y - anchor_y/2   #左上縱橫座標
            A[2, :, :, curr_layer] = anchor_x   #暫時存儲anchor 寬度
            A[3, :, :, curr_layer] = anchor_y   #暫時存儲anchor 高度

            if use_regr:
                #通過rpn網絡的迴歸層的預測值，來調整anchor位置
                A[:, :, :, curr_layer] = apply_regr_np(A[:, :, :, curr_layer], regr)

            A[2, :, :, curr_layer] = np.maximum(1, A[2, :, :, curr_layer])
            A[3, :, :, curr_layer] = np.maximum(1, A[3, :, :, curr_layer])
            A[2, :, :, curr_layer] += A[0, :, :, curr_layer]  #右下角橫座標
            A[3, :, :, curr_layer] += A[1, :, :, curr_layer]  #右下角縱座標

            #確保anchor不超過feature map尺寸
            A[0, :, :, curr_layer] = np.maximum(0, A[0, :, :, curr_layer])
            A[1, :, :, curr_layer] = np.maximum(0, A[1, :, :, curr_layer])
            A[2, :, :, curr_layer] = np.minimum(cols-1, A[2, :, :, curr_layer])
            A[3, :, :, curr_layer] = np.minimum(rows-1, A[3, :, :, curr_layer])

            curr_layer += 1

    #將對應shape調整到二維（anchor總共個數，4）
    all_boxes = np.reshape(A.transpose((0, 3, 1,2)), (4, -1)).transpose((1, 0))
    all_probs = rpn_cls_layer.transpose((0, 3, 1, 2)).reshape((-1))

    x1 = all_boxes[:, 0]
    y1 = all_boxes[:, 1]
    x2 = all_boxes[:, 2]
    y2 = all_boxes[:, 3]

    #過濾掉一些異常的框
    idxs = np.where((x1 - x2 >= 0) | (y1 - y2 >= 0))
    all_boxes = np.delete(all_boxes, idxs, 0)
    all_probs = np.delete(all_probs, idxs, 0)

    #通過非極大值抑制，選取出一些anchor作爲roipooling層的輸入
    result = non_max_suppression_fast(all_boxes, all_probs, overlap_thresh=overlap_thresh, max_boxes=max_boxes)[0]

    return result

上述代碼中有調用兩個方法，一個是apply_regr_np方法，用來通過rpn網絡的迴歸層的預測值，來調整anchor位置，另外一個方法是 non_max_suppression_fast，用來對所有anchor進行非極大值抑制，選取出實際需要的anchor,具體代碼實現如下：

def apply_regr_np(X, T):
    '''
    通過rpn網絡的迴歸層的預測值，來調整anchor位置
    :param X:
    :param T:
    :return:
    '''
    try:
        x = X[0, :, :]
        y = X[1, :, :]
        w = X[2, :, :]
        h = X[3, :, :]

        tx = T[:, :, 0]
        ty = T[:, :, 1]
        tw = T[:, :, 2]
        th = T[:, :, 3]

        # (cx, cy)原始anchor中心點位置
        cx = x + w/2.
        cy = y + h/2.

        #(cx1, cy1)經過rpn網絡迴歸層調整後，anchor中心點位置
        cx1 = tx * w + cx
        cy1 = ty * h + cy

        w1 = np.exp(tw.astype(np.float64)) * w  #經過rpn網絡迴歸層調整後，anchor 寬度
        h1 = np.exp(th.astype(np.float64)) * h  #經過rpn網絡迴歸層調整後，anchor 高度
        #（x1，y1）經過rpn網絡迴歸層調整後，anchor的左上點座標
        x1 = cx1 - w1/2.
        y1 = cy1 - h1/2.

        x1 = np.round(x1)
        y1 = np.round(y1)
        w1 = np.round(w1)
        h1 = np.round(h1)
        return np.stack([x1, y1, w1, h1])
    except Exception as e:
        print(e)
        return X

def non_max_suppression_fast(boxes, probs, overlap_thresh=0.9, max_boxes=300):
    '''
    非極大值抑制算法，提取出300個anchor作爲輸入roipooling層的roi
    簡單介紹下非極大值抑制算法，假如當前有10個anchor，根據是正樣本的概率值進行升序排序爲[A,B,C,D,E,F,G,H,I,J]
    1.從具有最大概率的anchor J開始，計算其餘anchor與J之間的iou值
    2.如果iou值大於overlap_thresh閾值，則刪除掉，並將當前J重新保留下來，使我們需要的。
      例如，如果D,F與J之間的iou大於閾值，則直接捨棄，同時把J重新保留，也從原始數組中刪除掉。
    3.在剩餘的[A,B,C,E,G,H]中，繼續選取最大的概率值對應的anchor,然後重複上述過程。
    4.最後，當數組爲空，或者保留下來的anchor個數達到設定的max_boxes，則停止迭代，
      最終保留下的來的anchor 就是最終需要的。

    :param boxes: #經過rpn網絡後生成的所有候選框,shape = (anchor個數，4)
    :param probs: #rpn網絡分類層的輸出值，value對應是正例樣本的概率，shape = (anchor個數，)
    :param overlap_thresh:  iou閾值
    :param max_boxes:  最大提取的roi個數
    :return:
    '''
    if len(boxes) == 0:
        return []

    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    np.testing.assert_array_less(x1, x2)
    np.testing.assert_array_less(y1, y2)

    boxes = boxes.astype("float")

    pick = []

    area = (x2 - x1) * (y2 - y1)  #所有anchor的各自的區域面積（anchor個數，）

    #將所有anchor根據概率值進行升序排序
    idxs = np.argsort(probs)  #默認是升序

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]  #最後一個索引，即爲當前idxs中具體最大概率值（是否爲正例）的anchor的索引
        pick.append(i)  #保留當前anchor對應索引

        # 計算當前選取出來的anchor與其他anchor之間的交集
        xx1_int = np.maximum(x1[i], x1[idxs[:last]])
        yy1_int = np.maximum(y1[i], y1[idxs[:last]])
        xx2_int = np.minimum(x2[i], x2[idxs[:last]])
        yy2_int = np.minimum(y2[i], y2[idxs[:last]])
        ww_int = np.maximum(0, xx2_int - xx1_int)
        hh_int = np.maximum(0, yy2_int - yy1_int)

        area_int = ww_int * hh_int  #當前選取出來的索引對應的anchor,與其他anchor之間的 交集

        # 計算當前選取出來的索引對應的anchor 與其他anchor之間的並集
        area_union = area[i] + area[idxs[:last]] - area_int

        #overlap 即爲當前選取出來的索引對應的anchor 與其他anchor之間的交併比（iou）
        overlap = area_int/(area_union + 1e-6)

        #在idxs中刪除掉與當前選取出來的anchor之間iou大於overlap_thresh閾值的。
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlap_thresh)[0])))

        if len(pick) >= max_boxes:  #如果當前保留的anchor個數已經達到max_boxes，則直接跳出迭代
            break


    boxes = boxes[pick].astype("int")
    probs = probs[pick]
    return boxes, probs

二、構造最終精分類和精迴歸的訓練數據

def calc_roi(R, img_data, C, class_mapping):
    '''
    生成roipooing層的輸入數據以及最終分類層的訓練數據Y值以及最終迴歸層的訓練數據Y值
    :param R:  通過rpn網絡輸出結果，選取出來的對應rois,shape=(rois個數，4)
    :param img_data:  經過相關預處理後的原始數據，格式如下：
    {'width': 500,
      'height': 500,
      'bboxes': [{'y2': 500, 'y1': 27, 'x2': 183, 'x1': 20, 'class': 'person', 'difficult': False},
                 {'y2': 500, 'y1': 2, 'x2': 249, 'x1': 112, 'class': 'person', 'difficult': False},
                 {'y2': 490, 'y1': 233, 'x2': 376, 'x1': 246, 'class': 'person', 'difficult': False},
                 {'y2': 468, 'y1': 319, 'x2': 356, 'x1': 231, 'class': 'chair', 'difficult': False},
                 {'y2': 450, 'y1': 314, 'x2': 58, 'x1': 1, 'class': 'chair', 'difficult': True}], 'imageset': 'test',
      'filepath': './datasets/VOC2007/JPEGImages/000910.jpg'
    }
    :param C: 存儲相關配置信息
    :param class_mapping: 一個字典數據結構，key爲對應類別名稱，value爲對應類別的一個標識
    :return:
    '''

    bboxes = img_data['bboxes']
    (width, height) = (img_data['width'], img_data['height'])

    (resized_width, resized_height) = data_generators.get_new_img_size(width, height, C.im_size)

    gta = np.zeros((len(bboxes), 4))

    #獲得真實標註框在feature map上的座標
    for bbox_num, bbox in enumerate(bboxes):
        gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))/C.rpn_stride))
        gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))/C.rpn_stride))
        gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))/C.rpn_stride))
        gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))/C.rpn_stride))

    x_roi = []
    y_class_num = []
    y_class_regr_coords = []
    y_class_regr_label = []
    IoUs = []

    for ix in range(R.shape[0]):  #遍歷所有Roi
        (x1, y1, x2, y2) = R[ix, :]
        x1 = int(round(x1))
        y1 = int(round(y1))
        x2 = int(round(x2))
        y2 = int(round(y2))

        best_iou = 0.0  #用來存儲當前roi(候選框)與所有真實標註框之間的最優iou值
        best_bbox = -1  #當前roi(候選框)對應的最優候選框index
        for bbox_num in range(len(bboxes)):  #遍歷所有真實標註框
            #計算真實標註框與roi（候選框）之間的iou值
            curr_iou = data_generators.iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2])
            if curr_iou > best_iou:
                best_iou = curr_iou
                best_bbox = bbox_num

        if best_iou < C.classifier_min_overlap:
                continue
        else:
            w = x2 - x1
            h = y2 - y1
            x_roi.append([x1, y1, w, h])
            IoUs.append(best_iou)

            if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap:

                cls_name = 'bg'
            elif C.classifier_max_overlap <= best_iou:
                cls_name = bboxes[best_bbox]['class']
                cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0
                cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0

                cx = x1 + w / 2.0
                cy = y1 + h / 2.0

                # （tx, ty, tw, th）即爲此roi到ground-truth（真實檢測框）的對應4個平移縮放參數
                tx = (cxg - cx) / float(w)
                ty = (cyg - cy) / float(h)
                tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w))
                th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h))
            else:
                print('roi = {}'.format(best_iou))
                raise RuntimeError

        class_num = class_mapping[cls_name]
        class_label = len(class_mapping) * [0]
        class_label[class_num] = 1
        y_class_num.append(copy.deepcopy(class_label)) # y_class_num即爲構造的最終分類層的訓練數據Y值
        coords = [0] * 4 * (len(class_mapping) - 1)  # 每個類別4個座標值
        labels = [0] * 4 * (len(class_mapping) - 1)  # 對應存儲類別標籤值
        if cls_name != 'bg':
            label_pos = 4 * class_num
            sx, sy, sw, sh = C.classifier_regr_std
            coords[label_pos:4+label_pos] = [sx*tx, sy*ty, sw*tw, sh*th]
            labels[label_pos:4+label_pos] = [1, 1, 1, 1]
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))
        else:
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))

    if len(x_roi) == 0:
        return None, None, None, None

    X = np.array(x_roi)  #roipooling層輸入
    Y1 = np.array(y_class_num)  #最終分類層的訓練樣本Y值
    Y2 = np.concatenate([np.array(y_class_regr_label),np.array(y_class_regr_coords)],axis=1)   #最終迴歸層的訓練樣本Y值

    # np.expand_dims 統一增加一維，minibatch
    X = np.expand_dims(X, axis=0)
    Y1 = np.expand_dims(Y1, axis=0)
    Y2 = np.expand_dims(Y2, axis=0)

    # neg_samples: 負樣本在第二維的所有index列表
    # pos_samples: 正樣本在第二維的所有index列表
    neg_samples = np.where(Y1[0, :, -1] == 1)  # 最後一個數值爲1，說明是負樣本
    pos_samples = np.where(Y1[0, :, -1] == 0)
    if len(neg_samples) > 0:
        neg_samples = neg_samples[0]
    else:
        neg_samples = []
    if len(pos_samples) > 0:
        pos_samples = pos_samples[0]
    else:
        pos_samples = []

    # len(pos_samples) ：負樣本個數
    # len(pos_samples)： 正樣本個數
    if len(pos_samples) < C.num_rois // 2:  # 如果正樣本個數少於150，則所有正樣本都參與訓練
        selected_pos_samples = pos_samples.tolist()
    else:  # 否則的話，隨機抽取150個正樣本
        selected_pos_samples = np.random.choice(pos_samples, C.num_rois // 2, replace=False).tolist()
    try:
        # replace=False 無放回抽取
        selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples),
                                                replace=False).tolist()
    except:
        #  replace=True 有放回抽取
        selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples),
                                                replace=True).tolist()

    # sel_samples： 參與訓練的roi樣本對應的下標
    sel_samples = selected_pos_samples + selected_neg_samples


    return X[:, sel_samples, :], Y1[:, sel_samples, :], Y2[:, sel_samples, :], IoUs

到此，所有模塊都已開發完畢，後面將進行完整的模型訓練和預測過程

未完待續

相關本章完整代碼以及VOC2102數據集百度網盤下載，請關注我自己的公衆號 AI計算機視覺工坊,回覆【代碼】和【數據集】獲取。本公衆號不定期推送機器學習，深度學習，計算機視覺等相關文章，歡迎大家和我一起學習，交流。

【目標檢測算法實現系列】Keras實現Faster R-CNN算法（三）

一、從RPN網絡到ROIPooling層

二、構造最終精分類和精迴歸的訓練數據

GoogLeNet Inception v1,v2,v3,v4及Inception Resnet介紹

【目標檢測算法實現系列】Keras實現Faster R-CNN算法（四）

藍橋杯練習vip試題高精度加法

二叉樹的前序，中序，後序的遞歸與非遞歸遍歷以及按層遍歷

藍橋杯vip練習 Huffman樹

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結