在此之前，我们主要实现了相关数据的解析，预处理等准备工作，以及对应Faster RCNN的相关网络模块搭建。接下来我们接着实现其他部分。

一、从RPN网络到ROIPooling层

在上一篇中，我们实现了一个自定义的ROIPooling层，这次我们看下如何建立RPN与ROIpool层之间的联系。下面，我们看下如何代码实现，通过RPN网络的输出，来指定对应ROIPing层的输入。

def rpn_to_roi(rpn_cls_layer, rpn_regr_layer, C, use_regr=True, max_boxes=300,overlap_thresh=0.9):
    '''
    建立rpn网络与roi pooling层的连接
    通过rpn网络的输出，找出对应的roi
    :param rpn_cls_layer:  rpn网络的分类输出
    :param rpn_regr_layer:  rpn网络的回归输出
    :param C:
    :param dim_ordering:
    :param use_regr:
    :param max_boxes:
    :param overlap_thresh:
    :return:
    '''
    regr_layer = rpn_regr_layer / C.std_scaling

    anchor_sizes = C.anchor_box_scales
    anchor_ratios = C.anchor_box_ratios

    assert rpn_cls_layer.shape[0] == 1
    (rows, cols) = rpn_cls_layer.shape[1:3]

    curr_layer = 0
    # A.shape = (4个在feature_map上的对应位置信息（左上角和右下角座标）， feature_map_height, feature_map_wigth, k(9))
    A = np.zeros((4, rpn_cls_layer.shape[1], rpn_cls_layer.shape[2], rpn_cls_layer.shape[3]))
    for anchor_size in anchor_sizes:
        for anchor_ratio in anchor_ratios:

            anchor_x = (anchor_size * anchor_ratio[0])/C.rpn_stride   #对应anchor在feature map上的宽度
            anchor_y = (anchor_size * anchor_ratio[1])/C.rpn_stride   #对应anchor在feature map上的高度
            # if dim_ordering == 'th':
            #     regr = regr_layer[0, 4 * curr_layer:4 * curr_layer + 4, :, :]
            # else:
            #     regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4]  #当前anchor对应回归值
            #     regr = np.transpose(regr, (2, 0, 1))
            regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4]  # 当前anchor对应回归值
            X, Y = np.meshgrid(np.arange(cols), np.arange(rows))

            A[0, :, :, curr_layer] = X - anchor_x/2   #左上点横座标
            A[1, :, :, curr_layer] = Y - anchor_y/2   #左上纵横座标
            A[2, :, :, curr_layer] = anchor_x   #暂时存储anchor 宽度
            A[3, :, :, curr_layer] = anchor_y   #暂时存储anchor 高度

            if use_regr:
                #通过rpn网络的回归层的预测值，来调整anchor位置
                A[:, :, :, curr_layer] = apply_regr_np(A[:, :, :, curr_layer], regr)

            A[2, :, :, curr_layer] = np.maximum(1, A[2, :, :, curr_layer])
            A[3, :, :, curr_layer] = np.maximum(1, A[3, :, :, curr_layer])
            A[2, :, :, curr_layer] += A[0, :, :, curr_layer]  #右下角横座标
            A[3, :, :, curr_layer] += A[1, :, :, curr_layer]  #右下角纵座标

            #确保anchor不超过feature map尺寸
            A[0, :, :, curr_layer] = np.maximum(0, A[0, :, :, curr_layer])
            A[1, :, :, curr_layer] = np.maximum(0, A[1, :, :, curr_layer])
            A[2, :, :, curr_layer] = np.minimum(cols-1, A[2, :, :, curr_layer])
            A[3, :, :, curr_layer] = np.minimum(rows-1, A[3, :, :, curr_layer])

            curr_layer += 1

    #将对应shape调整到二维（anchor总共个数，4）
    all_boxes = np.reshape(A.transpose((0, 3, 1,2)), (4, -1)).transpose((1, 0))
    all_probs = rpn_cls_layer.transpose((0, 3, 1, 2)).reshape((-1))

    x1 = all_boxes[:, 0]
    y1 = all_boxes[:, 1]
    x2 = all_boxes[:, 2]
    y2 = all_boxes[:, 3]

    #过滤掉一些异常的框
    idxs = np.where((x1 - x2 >= 0) | (y1 - y2 >= 0))
    all_boxes = np.delete(all_boxes, idxs, 0)
    all_probs = np.delete(all_probs, idxs, 0)

    #通过非极大值抑制，选取出一些anchor作为roipooling层的输入
    result = non_max_suppression_fast(all_boxes, all_probs, overlap_thresh=overlap_thresh, max_boxes=max_boxes)[0]

    return result

上述代码中有调用两个方法，一个是apply_regr_np方法，用来通过rpn网络的回归层的预测值，来调整anchor位置，另外一个方法是 non_max_suppression_fast，用来对所有anchor进行非极大值抑制，选取出实际需要的anchor,具体代码实现如下：

def apply_regr_np(X, T):
    '''
    通过rpn网络的回归层的预测值，来调整anchor位置
    :param X:
    :param T:
    :return:
    '''
    try:
        x = X[0, :, :]
        y = X[1, :, :]
        w = X[2, :, :]
        h = X[3, :, :]

        tx = T[:, :, 0]
        ty = T[:, :, 1]
        tw = T[:, :, 2]
        th = T[:, :, 3]

        # (cx, cy)原始anchor中心点位置
        cx = x + w/2.
        cy = y + h/2.

        #(cx1, cy1)经过rpn网络回归层调整后，anchor中心点位置
        cx1 = tx * w + cx
        cy1 = ty * h + cy

        w1 = np.exp(tw.astype(np.float64)) * w  #经过rpn网络回归层调整后，anchor 宽度
        h1 = np.exp(th.astype(np.float64)) * h  #经过rpn网络回归层调整后，anchor 高度
        #（x1，y1）经过rpn网络回归层调整后，anchor的左上点座标
        x1 = cx1 - w1/2.
        y1 = cy1 - h1/2.

        x1 = np.round(x1)
        y1 = np.round(y1)
        w1 = np.round(w1)
        h1 = np.round(h1)
        return np.stack([x1, y1, w1, h1])
    except Exception as e:
        print(e)
        return X

def non_max_suppression_fast(boxes, probs, overlap_thresh=0.9, max_boxes=300):
    '''
    非极大值抑制算法，提取出300个anchor作为输入roipooling层的roi
    简单介绍下非极大值抑制算法，假如当前有10个anchor，根据是正样本的概率值进行升序排序为[A,B,C,D,E,F,G,H,I,J]
    1.从具有最大概率的anchor J开始，计算其余anchor与J之间的iou值
    2.如果iou值大于overlap_thresh阈值，则删除掉，并将当前J重新保留下来，使我们需要的。
      例如，如果D,F与J之间的iou大于阈值，则直接舍弃，同时把J重新保留，也从原始数组中删除掉。
    3.在剩余的[A,B,C,E,G,H]中，继续选取最大的概率值对应的anchor,然后重复上述过程。
    4.最后，当数组为空，或者保留下来的anchor个数达到设定的max_boxes，则停止迭代，
      最终保留下的来的anchor 就是最终需要的。

    :param boxes: #经过rpn网络后生成的所有候选框,shape = (anchor个数，4)
    :param probs: #rpn网络分类层的输出值，value对应是正例样本的概率，shape = (anchor个数，)
    :param overlap_thresh:  iou阈值
    :param max_boxes:  最大提取的roi个数
    :return:
    '''
    if len(boxes) == 0:
        return []

    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    np.testing.assert_array_less(x1, x2)
    np.testing.assert_array_less(y1, y2)

    boxes = boxes.astype("float")

    pick = []

    area = (x2 - x1) * (y2 - y1)  #所有anchor的各自的区域面积（anchor个数，）

    #将所有anchor根据概率值进行升序排序
    idxs = np.argsort(probs)  #默认是升序

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]  #最后一个索引，即为当前idxs中具体最大概率值（是否为正例）的anchor的索引
        pick.append(i)  #保留当前anchor对应索引

        # 计算当前选取出来的anchor与其他anchor之间的交集
        xx1_int = np.maximum(x1[i], x1[idxs[:last]])
        yy1_int = np.maximum(y1[i], y1[idxs[:last]])
        xx2_int = np.minimum(x2[i], x2[idxs[:last]])
        yy2_int = np.minimum(y2[i], y2[idxs[:last]])
        ww_int = np.maximum(0, xx2_int - xx1_int)
        hh_int = np.maximum(0, yy2_int - yy1_int)

        area_int = ww_int * hh_int  #当前选取出来的索引对应的anchor,与其他anchor之间的 交集

        # 计算当前选取出来的索引对应的anchor 与其他anchor之间的并集
        area_union = area[i] + area[idxs[:last]] - area_int

        #overlap 即为当前选取出来的索引对应的anchor 与其他anchor之间的交并比（iou）
        overlap = area_int/(area_union + 1e-6)

        #在idxs中删除掉与当前选取出来的anchor之间iou大于overlap_thresh阈值的。
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlap_thresh)[0])))

        if len(pick) >= max_boxes:  #如果当前保留的anchor个数已经达到max_boxes，则直接跳出迭代
            break


    boxes = boxes[pick].astype("int")
    probs = probs[pick]
    return boxes, probs

二、构造最终精分类和精回归的训练数据

def calc_roi(R, img_data, C, class_mapping):
    '''
    生成roipooing层的输入数据以及最终分类层的训练数据Y值以及最终回归层的训练数据Y值
    :param R:  通过rpn网络输出结果，选取出来的对应rois,shape=(rois个数，4)
    :param img_data:  经过相关预处理后的原始数据，格式如下：
    {'width': 500,
      'height': 500,
      'bboxes': [{'y2': 500, 'y1': 27, 'x2': 183, 'x1': 20, 'class': 'person', 'difficult': False},
                 {'y2': 500, 'y1': 2, 'x2': 249, 'x1': 112, 'class': 'person', 'difficult': False},
                 {'y2': 490, 'y1': 233, 'x2': 376, 'x1': 246, 'class': 'person', 'difficult': False},
                 {'y2': 468, 'y1': 319, 'x2': 356, 'x1': 231, 'class': 'chair', 'difficult': False},
                 {'y2': 450, 'y1': 314, 'x2': 58, 'x1': 1, 'class': 'chair', 'difficult': True}], 'imageset': 'test',
      'filepath': './datasets/VOC2007/JPEGImages/000910.jpg'
    }
    :param C: 存储相关配置信息
    :param class_mapping: 一个字典数据结构，key为对应类别名称，value为对应类别的一个标识
    :return:
    '''

    bboxes = img_data['bboxes']
    (width, height) = (img_data['width'], img_data['height'])

    (resized_width, resized_height) = data_generators.get_new_img_size(width, height, C.im_size)

    gta = np.zeros((len(bboxes), 4))

    #获得真实标注框在feature map上的座标
    for bbox_num, bbox in enumerate(bboxes):
        gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))/C.rpn_stride))
        gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))/C.rpn_stride))
        gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))/C.rpn_stride))
        gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))/C.rpn_stride))

    x_roi = []
    y_class_num = []
    y_class_regr_coords = []
    y_class_regr_label = []
    IoUs = []

    for ix in range(R.shape[0]):  #遍历所有Roi
        (x1, y1, x2, y2) = R[ix, :]
        x1 = int(round(x1))
        y1 = int(round(y1))
        x2 = int(round(x2))
        y2 = int(round(y2))

        best_iou = 0.0  #用来存储当前roi(候选框)与所有真实标注框之间的最优iou值
        best_bbox = -1  #当前roi(候选框)对应的最优候选框index
        for bbox_num in range(len(bboxes)):  #遍历所有真实标注框
            #计算真实标注框与roi（候选框）之间的iou值
            curr_iou = data_generators.iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2])
            if curr_iou > best_iou:
                best_iou = curr_iou
                best_bbox = bbox_num

        if best_iou < C.classifier_min_overlap:
                continue
        else:
            w = x2 - x1
            h = y2 - y1
            x_roi.append([x1, y1, w, h])
            IoUs.append(best_iou)

            if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap:

                cls_name = 'bg'
            elif C.classifier_max_overlap <= best_iou:
                cls_name = bboxes[best_bbox]['class']
                cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0
                cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0

                cx = x1 + w / 2.0
                cy = y1 + h / 2.0

                # （tx, ty, tw, th）即为此roi到ground-truth（真实检测框）的对应4个平移缩放参数
                tx = (cxg - cx) / float(w)
                ty = (cyg - cy) / float(h)
                tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w))
                th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h))
            else:
                print('roi = {}'.format(best_iou))
                raise RuntimeError

        class_num = class_mapping[cls_name]
        class_label = len(class_mapping) * [0]
        class_label[class_num] = 1
        y_class_num.append(copy.deepcopy(class_label)) # y_class_num即为构造的最终分类层的训练数据Y值
        coords = [0] * 4 * (len(class_mapping) - 1)  # 每个类别4个座标值
        labels = [0] * 4 * (len(class_mapping) - 1)  # 对应存储类别标签值
        if cls_name != 'bg':
            label_pos = 4 * class_num
            sx, sy, sw, sh = C.classifier_regr_std
            coords[label_pos:4+label_pos] = [sx*tx, sy*ty, sw*tw, sh*th]
            labels[label_pos:4+label_pos] = [1, 1, 1, 1]
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))
        else:
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))

    if len(x_roi) == 0:
        return None, None, None, None

    X = np.array(x_roi)  #roipooling层输入
    Y1 = np.array(y_class_num)  #最终分类层的训练样本Y值
    Y2 = np.concatenate([np.array(y_class_regr_label),np.array(y_class_regr_coords)],axis=1)   #最终回归层的训练样本Y值

    # np.expand_dims 统一增加一维，minibatch
    X = np.expand_dims(X, axis=0)
    Y1 = np.expand_dims(Y1, axis=0)
    Y2 = np.expand_dims(Y2, axis=0)

    # neg_samples: 负样本在第二维的所有index列表
    # pos_samples: 正样本在第二维的所有index列表
    neg_samples = np.where(Y1[0, :, -1] == 1)  # 最后一个数值为1，说明是负样本
    pos_samples = np.where(Y1[0, :, -1] == 0)
    if len(neg_samples) > 0:
        neg_samples = neg_samples[0]
    else:
        neg_samples = []
    if len(pos_samples) > 0:
        pos_samples = pos_samples[0]
    else:
        pos_samples = []

    # len(pos_samples) ：负样本个数
    # len(pos_samples)： 正样本个数
    if len(pos_samples) < C.num_rois // 2:  # 如果正样本个数少于150，则所有正样本都参与训练
        selected_pos_samples = pos_samples.tolist()
    else:  # 否则的话，随机抽取150个正样本
        selected_pos_samples = np.random.choice(pos_samples, C.num_rois // 2, replace=False).tolist()
    try:
        # replace=False 无放回抽取
        selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples),
                                                replace=False).tolist()
    except:
        #  replace=True 有放回抽取
        selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples),
                                                replace=True).tolist()

    # sel_samples： 参与训练的roi样本对应的下标
    sel_samples = selected_pos_samples + selected_neg_samples


    return X[:, sel_samples, :], Y1[:, sel_samples, :], Y2[:, sel_samples, :], IoUs

到此，所有模块都已开发完毕，后面将进行完整的模型训练和预测过程

未完待续

相关本章完整代码以及VOC2102数据集百度网盘下载，请关注我自己的公众号 AI计算机视觉工坊,回复【代码】和【数据集】获取。本公众号不定期推送机器学习，深度学习，计算机视觉等相关文章，欢迎大家和我一起学习，交流。

【目标检测算法实现系列】Keras实现Faster R-CNN算法（三）

一、从RPN网络到ROIPooling层

二、构造最终精分类和精回归的训练数据

SQL优化-20231016

GoogLeNet Inception v1,v2,v3,v4及Inception Resnet介紹

【目標檢測算法實現系列】Keras實現Faster R-CNN算法（四）

藍橋杯練習vip試題高精度加法

二叉樹的前序，中序，後序的遞歸與非遞歸遍歷以及按層遍歷

藍橋杯vip練習 Huffman樹

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結