proposal_layer.py
# --------------------------------------------------------
# Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
from model.config import cfg
from model.bbox_transform import bbox_transform_inv, clip_boxes, bbox_transform_inv_tf, clip_boxes_tf
from model.nms_wrapper import nms
'''
實際用到的是proposal_layer_tf,再network中調用。凡是有layer結尾的,都是再network裏面調用。
'''
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
"""A simplified version compared to fast/er RCNN
For details please see the technical report
"""
#下面提取好config裏面預先設置好的rpn參數:
#分別是nms前rpn最多框限制(12000個)以及之後最多框限制(300),以及rpn閾值
if type(cfg_key) == bytes:
cfg_key = cfg_key.decode('utf-8')
#Number of top scoring boxes to keep before apply NMS to RPN proposals 12000
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
#Number of top scoring boxes to keep after applying NMS to RPN proposals 300
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
# NMS threshold used on RPN proposals 0.7
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
# Get the scores and bounding boxes
#根據輸入,提取分類概率和bbox位置
scores = rpn_cls_prob[:, :, :, num_anchors:]
rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
scores = scores.reshape((-1, 1))
proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
proposals = clip_boxes(proposals, im_info[:2])
# Pick the top region proposals
#下面就是排序,提取出排序後數據對應排序前的索引,輸出爲order
order = scores.ravel().argsort()[::-1]
#按設置的參數截取前RPN_PRE_NMS_TOP_N個proposal
if pre_nms_topN > 0:
order = order[:pre_nms_topN]
proposals = proposals[order, :]
#按索引order從scroes中切片出對應的scores
scores = scores[order]
# Non-maximal suppression
#進行非極大值抑制
keep = nms(np.hstack((proposals, scores)), nms_thresh)
# Pick th top region proposals after NMS
#按設置好的nms後框數量限制參數--RPN_POST_NMS_TOP_N
#截取前RPN_POST_NMS_TOP_N個proposals
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
scores = scores[keep]
# Only support single image as input
#生成一個空數組
batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
#爲一張圖片單獨製造blob
blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
return blob, scores
def proposal_layer_tf(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
#這個是真實調用的,實際上比上面的要少一點,進行了一點濃縮,部分功能在其他位置實現。
if type(cfg_key) == bytes:
cfg_key = cfg_key.decode('utf-8')
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
# Get the scores and bounding boxes
scores = rpn_cls_prob[:, :, :, num_anchors:]
scores = tf.reshape(scores, shape=(-1,))
rpn_bbox_pred = tf.reshape(rpn_bbox_pred, shape=(-1, 4))
proposals = bbox_transform_inv_tf(anchors, rpn_bbox_pred)
proposals = clip_boxes_tf(proposals, im_info[:2])
# Non-maximal suppression
indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topN, iou_threshold=nms_thresh)
boxes = tf.gather(proposals, indices)
boxes = tf.to_float(boxes)
scores = tf.gather(scores, indices)
scores = tf.reshape(scores, shape=(-1, 1))
# Only support single image as input
batch_inds = tf.zeros((tf.shape(indices)[0], 1), dtype=tf.float32)
blob = tf.concat([batch_inds, boxes], 1)
return blob, scores
proposal_target_layer.py
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick, Sean Bell and Xinlei Chen
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy.random as npr
from model.config import cfg
from model.bbox_transform import bbox_transform
from utils.cython_bbox import bbox_overlaps
'''
'''
def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
"""
Assign object detection proposals to ground-truth targets. Produces proposal
classification labels and bounding-box regression targets.
根據gt,對rpn產生的proposal打上分類標籤以及計算迴歸的偏差
"""
# Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
# (i.e., rpn.proposal_layer.ProposalLayer), or any other source
all_rois = rpn_rois
all_scores = rpn_scores
# Include ground-truth boxes in the set of candidate rois
if cfg.TRAIN.USE_GT:#在config裏面這個參數是false,這段代碼應該是不執行的。
#但是依然要解讀下這段代碼。看看爲什麼不執行。
#按gt集合的形狀生成個空數組,同樣的行數
zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
#vstack按列堆疊列,hstack按行堆疊爲列。
#首先把前面生成的0數組和gt數組疊加起來,然後再與roi數組堆疊起來。
all_rois = np.vstack
(all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
)
# not sure if it a wise appending, but anyway i am not using it
#然後得分也這樣處理。
all_scores = np.vstack((all_scores, zeros))
#TRAIN.BATCH_SIZE是感興趣區域的數量
#rois_per_image就是每一張圖片允許的roi區域batch。
#在其他地方也遇到了rois_per_image,名字不一樣,其實就是一個限制參數。
num_images = 1
rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
#按cfg.TRAIN.FG_FRACTION參數計算得到每一張圖片的batch個roi中前景的數量
#比如rois_per_image=100,那麼就是最多允許選取100*0.25=25個前景roi
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
# Sample rois with classification labels and bounding box regression
# targets
#_sample_rois函數,對每張圖片的Batch按照參數設置隨機採樣
labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
all_rois, all_scores, gt_boxes, fg_rois_per_image,
rois_per_image, _num_classes)
#爲了配合下面的操作,reshape一下
#weight是權重,每一個roi都會產生誤差,對所有roi使用均權計算。
rois = rois.reshape(-1, 5)
roi_scores = roi_scores.reshape(-1)
labels = labels.reshape(-1, 1)
bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)
bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets (bbox_target_data) are stored in a
compact form N x (class, tx, ty, tw, th)
This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets).
Returns:
bbox_target (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
bbox_target_data=N x (class, tx, ty, tw, th)
然後生成一個全0數組用於存放bbox_targets
以及生成一個全0權重。
然後就計算
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
#找出類別標號大於0的框,也就是找出前景的bbox
inds = np.where(clss > 0)[0]
#對於每一個前景bbox
for ind in inds:
cls = clss[ind]#首先提取出來類別標號
start = int(4 * cls)#類別標號擴大4倍數再加4.如果前景=1那麼start=1 end=8
end = start + 4
#如果是2的話,就是8-12。其實就是一個k類*4的向量,第一類的4個target在4-8之間,第二類在8-12之間,0類也就是背景在1-4之間
#bbox_targets[ind,4:8]= bbox_target_data[ind ]
bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
#也就是說,對於不同類別的bbox,根據類別進行編碼,編碼到一個統一的向量裏面,四個偏移所在的位置,是根據其所屬類別來定位的。
return bbox_targets, bbox_inside_weights#同時返回權重。
def _compute_targets(ex_rois, gt_rois, labels):
"""Compute bounding-box regression targets for an image."""
#計算bbox迴歸量
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
#bbox_transform這個函數就是gt的四個座標和extrect的roi四個座標比對,然後轉換出偏移
targets = bbox_transform(ex_rois, gt_rois)
#下面的參數是一個正則化開關,是什麼正則化?
#事先進行了規定,得到means和stds,然後再計算的時候,用這些參數對目標進行...歸一化?
#這些means和stds是怎麼計算出來的呢?
#在config裏面直接定義了,而means應該是均值把,stds因該是統計計算出來的標準差。
#那麼就是用了數學的歸一化方法對數據進行了一些正則化,但直接用了參數,所以..
#可能是不是不準?
if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
# Optionally normalize targets by a precomputed mean and stdev
targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
/ np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
return np.hstack(
(labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
"""
對roi採樣
"""
# overlaps: (rois x gt_boxes)
#bbox_overlaps是個計算函數,return計算結果
overlaps = bbox_overlaps(
np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
gt_assignment = overlaps.argmax(axis=1)#axi=1 每一行的第幾列最大,返回索引
max_overlaps = overlaps.max(axis=1)#同上,不過返回具體值,及重疊率
labels = gt_boxes[gt_assignment, 4]#得到對應的gt之後,以gt的標籤爲label
#放在第4位的原因是,前0-3四個位置放的是座標。
# Select foreground RoIs as those with >= FG_THRESH overlap
#定義前景
fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
(max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
#定義背景,背景是一個範圍,在參數裏設置的是0.1~0.5
# Small modification to the original version where we ensure a fixed number of regions are sampled
#修改了原始版本,保證採樣的數量是固定的?
if fg_inds.size > 0 and bg_inds.size > 0:
#如果正樣本很多,大於設置的batch*正樣本比例,則隨機採樣,負樣本就爲batch減掉正樣本
#在rpn_target_layer.py裏面有類似的操作
fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
bg_rois_per_image = rois_per_image - fg_rois_per_image
to_replace = bg_inds.size < bg_rois_per_image
bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
elif fg_inds.size > 0:
to_replace = fg_inds.size < rois_per_image
fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
fg_rois_per_image = rois_per_image
elif bg_inds.size > 0:
to_replace = bg_inds.size < rois_per_image
bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
fg_rois_per_image = 0
else:
import pdb
pdb.set_trace()
# The indices that we're selecting (both fg and bg)
#最後挑選出來的前景和背景的索引
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
#提取出對應的labels
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
#又重複確定一邊,25個框之後的全部設置爲背景
labels[int(fg_rois_per_image):] = 0
rois = all_rois[keep_inds]
#提取出對應的roi以及得分
roi_scores = all_scores[keep_inds]
#下面的函數在這個函數上面,計算bbox偏移
bbox_target_data = _compute_targets(
rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
#根據偏移,進一步計算迴歸labels。實際是分爲了兩步計算。在這一步纔有weight
bbox_targets, bbox_inside_weights = \
_get_bbox_regression_labels(bbox_target_data, num_classes)
return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
#返回框的前景背景labels,roi區域及其得分,Bbox需要的修正量以及輸入的權重
#跟anchor_target_layer相比,有一定的重複,但是在前者的功能主要是針對anchor,也就是在rpn輸入的階段,而後者主要是rpn處理之後的結果再處理
rpn_rois, rpn_scores, gt_boxes, _num_classes從輸入的類別就可以看到。雖然這裏面依然定義了前景和背景,但是設計到_num_classes,其實有具體前景類別的標籤了。可以用於rpn之後的訓練。