faster rcnn代碼解讀參考
:https://github.com/adityaarun1/pytorch_fast-er_rcnn
https://github.com/jwyang/faster-rcnn.pytorch
之前的rpn_head主要是爲了獲取feature map分類的fg/bg的anchor。
而proposal_layer是爲了將所有的anchor進行nms。也就是進一步刪除不必要的anchor。
class proposal_layer(nn.Module):
#從特徵圖中生成anchor
"""
Outputs object detection proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self, feat_stride, scales, ratios):
super(proposal_layer, self).__init__()
self.feat_stride=feat_stride #anchor 進步
self.anchor_scales=scales #anchor尺度
self.anchor_ratios=ratios #anchor的橫縱比
self.num_anchors = len(scales)*len(ratios)#每個像素點的anchor個數
def forward(self, rpn_cls_logit,rpn_bbox_pred,im_info,model='train'):
# rpn_cls_logit = softmax(rpn_cls_score) : [batch, feat_h, feat_w ,(num_anchors * 2)]
# rpn_bbox_pred: [batch,feat_h, feat_w, (num_anchors*4)]
# Algorithm:
# for each (H, W) location i
# generate A anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the A anchors
# clip predicted boxes to image
# remove predicted boxes with either height or width < threshold
# sort all (proposal, score) pairs by score from highest to lowest
# take top pre_nms_topN proposals before NMS
# apply NMS with threshold 0.7 to remaining proposals
# take after_nms_topN proposals after NMS
# return the top proposals (-> RoIs top, scores top)
# the first set of _num_anchors channels are bg probs
# the second set are the fg probs
_pre_nms_topN = 0
_post_nms_topN = 0
_nms_thresh =0
_min_size = 0
if model=='train':
_pre_nms_topN = cfg['train_rpn_pre_nms_top_n']
_post_nms_topN = cfg['train_rpn_post_nms_top_n']
_nms_thresh = cfg['train_rpn_nms_thresh']
_min_size = cfg['train_rpn_min_size']
if model=='test':
_pre_nms_topN = cfg['test_rpn_pre_nms_top_n']
_post_nms_topN = cfg['test_rpn_post_nms_top_n']
_nms_thresh = cfg['test_rpn_nms_thresh']
_min_size = cfg['test_rpn_min_size']
# 獲取features的h\w [batch, feat_h, feat_w ,(num_anchors * 2)]
batch_size, feat_height, feat_width = rpn_cls_logit.shape[0:3]
# 在feature上生成anchor_length個anchors;
anchors, anchor_length = generate_anchors_pre(feat_height, feat_width, self.feat_stride,
self.anchor_scales, self.anchor_ratios)
# 擴展成批量大小
anchors = np.stack((anchors,) * batch_size, axis=0)
self.anchors = torch.from_numpy(anchors).to(rpn_bbox_pred.device)#將anchor轉到相應的device
self.anchor_length = anchor_length#anchor個數
# Get the scores and bounding boxes;rpn_cls_logit :[batch, feat_h, feat_w ,(num_anchors * 2)]->[batch, feat_h, feat_w ,num_anchors]
scores = rpn_cls_logit[:, :, :, self.num_anchors:]#獲取前景得分batch * h * w * num_anchors
#rpn_bbox_pred:[batch,feat_h, feat_w, (num_anchors*4)]->[batch,feat_h*feat_w*num_anchors, 4]
rpn_bbox_pred = rpn_bbox_pred.view((batch_size,-1, 4))
# 前景得分[batch, feat_h, feat_w, num_anchors]->#(batch , h * w * num_anchors,1 )*前景得分
scores = scores.contiguous().view(batch_size,-1, 1)
# anchor爲feature的總anchor,且爲左上右下角表示,rpn_bbox_pred爲偏移量
# 將anchor利用預測的偏移量rpn_bbox_pred轉換爲預測的proposals
# ?如何保證anchors和rpn_bbox_pred形狀一樣,anchor設置和feature設置有關係
#[batch,feat_h*feat_w*num_anchors, 4]
proposals = bbox_transform_inv(self.anchors, rpn_bbox_pred)#兩個角點的方式
# [batch, feat_h * feat_w * num_anchors, 4]根據變換後的圖像大小對proposal超出邊界範圍的進行裁剪
proposals = clip_boxes_batch(proposals, im_info[0,:2], batch_size)# 限制proposal範圍
scores_keep = scores #[batch, feat_h * feat_w * num_anchors, 1] #前景得分
proposals_keep = proposals #[batch, feat_h * feat_w * num_anchors, 4] #前景對應的proposal 還是左上右下的形式
blob_batch = proposals.new(batch_size, _post_nms_topN, 5).zero_() #有可能到不了這個_post_nms_topN
scores_batch = proposals.new(batch_size, _post_nms_topN, 1).zero_()#有可能到不了這個_post_nms_topN
for i in range(batch_size):
# # 3. remove predicted boxes with either height or width < threshold
# # (NOTE: convert min_size to input image scale stored in im_info[2])
#[feat_h * feat_w * num_anchors, 4]
proposals_single = proposals_keep[i]#獲取當前圖像的proposal
# [feat_h * feat_w * num_anchors, 1]
scores_single = scores_keep[i]#獲取當前圖像的score
# Pick the top region proposals
# [feat_h * feat_w * num_anchors]
# [feat_h * feat_w * num_anchors]
scores_single, order_single = scores_single.view(-1).sort(descending=True)
# # 4. sort all (proposal, score) pairs by score from highest to lowest
# # 5. take top pre_nms_topN (e.g. 6000)
if _pre_nms_topN > 0 and _pre_nms_topN < scores_keep.numel(): # 取出_pre_nms_topN個最大的
order_single = order_single[:_pre_nms_topN]#[_pre_nms_top]
scores_single = scores_single[:_pre_nms_topN].view(-1, 1)
# [_pre_nms_top,4]
proposals_single = proposals_single[order_single,:]#以及對應預測的proposal
# [_pre_nms_top,1]
# 6. apply nms (e.g. threshold = 0.7)
# 7. take after_nms_topN (e.g. 300)
# 8. return the top proposals (-> RoIs top)
# Non-maximal suppression
# # [_pre_nms_top,4] [_pre_nms_top,1]
keep_idx_i = nms(proposals_single, scores_single.squeeze(1), _nms_thresh)#按照_nms_thresh進行非極大值抑制score,並選出保留的keep
keep_idx_i = keep_idx_i.long().view(-1)
# Pick th top region proposals after NMS
if _post_nms_topN > 0:#從非極大值抑制的結果中取出_post_nms_topN最大的
keep_idx_i = keep_idx_i[:_post_nms_topN]
proposals_single = proposals_single[keep_idx_i, :]# 將proposal範圍再次減小
scores_single = scores_single[keep_idx_i,]# 並找到對應得分
scores_batch[i,:scores_single.shape[0],:] = scores_single
# Only support single image as input
#batch_inds = proposals_single.new_zeros(proposals_single.size(0), 1)#創建一個與proposal類型一樣大小爲proposals.size(0), 1的batch_inds
batch_inds = proposals_single.new(proposals_single.size(0), 1).fill_(i)
blob = torch.cat((batch_inds, proposals_single), 1)#將batch_inds壓入到proposals中,也就是porposal座標+前景
blob_batch[i,:scores_single.shape[0],:] = blob
# print('scores_single',scores_single.shape)
# [batch ,post_nms_topN,5]\[batch , post_nms_topN,1 ]
# index = torch.where(scores_batch >= 0)
# blob_batch = blob_batch[index]
# scores_batch = scores_batch[index]
#
# print('blob_batch',blob_batch.shape)
return blob_batch, scores_batch
這裏可能不是特別對。
特別是這兩行,因爲實際上經過nms最後的個數是很可能小於_post_nms_topN的,但是當時爲了批量處理改成了這種形式。實際上最後也是用
了batch=1,因此可以認爲沒必要。
blob_batch = proposals.new(batch_size, _post_nms_topN, 5).zero_() #有可能到不了這個_post_nms_topN
scores_batch = proposals.new(batch_size, _post_nms_topN, 1).zero_()#有可能到不了這個_post_nms_topN
二、處理步驟
- 獲取nms的前後閾值,
-
rpn_cls_logit也就是對應rpn_head的softmax後的得分,因爲NMS第一步就是根據得分排序的。
- 使用generate_anchors_pre生成anchor_lenght個anchor,這裏纔是直接根據generate_anchors_pre生成啊anchor,rpn_head裏面是從網絡卷積來的,可以預測,但是在訓練的過程中還是需要首先在feature map上生成anchors
- 然後就是根據傳入的批量將單張圖的anchor以棧的形式複製到批量中,這裏可以發現,anchor的生成只是與feature map的shape有關。其他的數據信息未影響到anchor生成
- 從中提取rpn_cls_logit中提取前num_anchors個,也就是fg得分。[batch, feat_h, feat_w ,(num_anchors * 2)]->[batch, feat_h, feat_w ,num_anchors],爲什麼前num_anchors個就是前景得分,猜測是訓練時約定的。
- 將rpn_bbox_pred從[batch,feat_h, feat_w, (num_anchors*4)]->[batch,feat_h*feat_w*num_anchors, 4],這裏只是使用view。
- 然後進一步的將score轉換成相同類型[batch, feat_h, feat_w, num_anchors]->#(batch , h * w * num_anchors,1 ),這裏的得分是前景得分。
- 利用bbox_transform_inv將rpn_head預測的rpn_bbox_pred作用在剛纔生成的anchors上,因爲rpn_head預測的是偏移量。proposal的形式依舊是[batch,feat_h*feat_w*num_anchors, 4]也就是與rpn_bbox_pred相同,這裏也是連個點座標的形式(anchor)
- 將偏移出去的proposals進行裁剪,因爲這裏的proposals已經對應到了input上。
- 先將得分最高的_pre_nms_topN個proposal獲取到,然後利用nms將iou小於threshold的proposal保留下來。
- 繼續從這些篩選的結果裏面取出_post_nms_topN個proposal,這裏我是弄錯了的,因爲當前的proposal個數不一定大於_post_nms_topN,所以裏面有很多全零填充,之所以這樣,只是爲了考慮可以批量的組合數據。
- 輸出保留的proposal和得分score
三、總結
proposal layer這一層主要是爲了將rpn_head中預測的平移量作用到對應的、anchor上,並利用非極大值抑制抑制進行篩選。換種看法就是rpn_head生成score和四個座標平移和w、h放縮,然後proposal layer就利用這些迴歸量和NMS做一些bounding box的刪除合併。
'''
in :
rpn_cls_logit = softmax(rpn_cls_score) : [batch, feat_h, feat_w ,(num_anchors * 2)]經過softmax的cls得分
rpn_bbox_pred : [batch,feat_h, feat_w, (num_anchors*4)]#bbox的平移放縮變換量
out :
rpn_proposal : [batch , post_nms_topN ,5] # 映射到輸入圖像的proposal,第0維度是全零表示類別,使用batch_inds進行cat,其實不應該是全0而應該對應batch的index。
rpn_proposal_scores : [batch , post_nms_topN ,1 ] # 每個rpn_proposal的得分,也就是通過NMS操作保留的那部分得分
'''