bbox_transform.py

# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import numpy as np
#函數作用：返回anchor相對於GT的（dx,dy,dw,dh）四個迴歸值，shape（len（anchors），4）
def bbox_transform(ex_rois, gt_rois):
    #計算每一個anchor的width與height
    ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
    ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
    #計算每一個anchor中心點x，y座標
    ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
    ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
    #注意：當前的GT不是最一開始傳進來的所有GT，而是與對應anchor最匹配的GT，可能有重複信息
    #計算每一個GT的width與height
    gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
    gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
    #計算每一個GT的中心點x，y座標
    gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
    gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
    #要對bbox進行迴歸需要4個量，dx、dy、dw、dh，分別爲橫縱平移量、寬高縮放量
    #此迴歸與fast-rcnn迴歸不同，fast要做的是在cnn卷積完之後的特徵向量進行迴歸，dx、dy、dw、dh都是對應與特徵向量
    #此時由於是對原圖像可視野中的anchor進行迴歸，更直觀
    #定義 Tx=Pwdx(P)+Px Ty=Phdy(P)+Py Tw=Pwexp(dw(P)) Th=Phexp(dh(P))
    #P爲anchor，T爲target，最後要使得T～G，G爲ground-True
    #迴歸量dx(P)，dy(P)，dw(P)，dh(P)，即dx、dy、dw、dh
    targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = np.log(gt_widths / ex_widths)
    targets_dh = np.log(gt_heights / ex_heights)
    #targets_dx, targets_dy, targets_dw, targets_dh都爲（anchors.shape[0]，）大小
    #所以targets爲（anchors.shape[0]，4）
    targets = np.vstack(
        (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
    return targets
#boxes爲anchor信息，deltas爲'rpn_bbox_pred'層信息
#函數作用:得到改善後的anchor的信息（x1,y1,x2,y2）
def bbox_transform_inv(boxes, deltas):
    #boxes.shape[0]=K*A=Height*Width*A
    if boxes.shape[0] == 0:
        return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)

    boxes = boxes.astype(deltas.dtype, copy=False)
    #得到Height*Width*A個anchor的寬，高，中心點的x，y座標
    widths = boxes[:, 2] - boxes[:, 0] + 1.0
    heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights
    #deltas本來就只有4列，依次存（dx,dy,dw,dh）,每一行表示一個anchor
    #0::4表示先取第一個元素，以後每4個取一個，所以取的index爲（0,4,8,12,16...），但是deltas本來就只有4列，所以只能取到一個值
    dx = deltas[:, 0::4]
    dy = deltas[:, 1::4]
    dw = deltas[:, 2::4]
    dh = deltas[:, 3::4]
    #預測後的中心點，與w與h
    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
    pred_w = np.exp(dw) * widths[:, np.newaxis]
    pred_h = np.exp(dh) * heights[:, np.newaxis]
    #預測後的（x1,y1,x2,y2）存入 pred_boxes
    pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
    # x1
    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
    # y1
    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
    # x2
    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
    # y2
    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h

    return pred_boxes
#函數作用：使得boxes位於圖片內
def clip_boxes(boxes, im_shape):
    """
    Clip boxes to image boundaries.
    """
    #im_shape[0]爲圖片高，im_shape[1]爲圖片寬
    #使得boxes位於圖片內
    # x1 >= 0
    boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
    # y1 >= 0
    boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
    # x2 < im_shape[1]
    boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
    # y2 < im_shape[0]
    boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
    return boxes

轉自https://blog.csdn.net/l297969586/article/details/78026221

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

Faster RCNN源碼學習四

bbox_transform.py

python gdal 安裝使用（Windows， python 3.6.8）

Faster RCNN源碼學習一

c++ primer5 第三章

c++ primer 5 第二章

Leetcode148-鏈表排序快慢指針

c++ set

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結