ProposalLayer源碼解析
標籤(空格分隔): faster-rcnn 物體檢測 源碼
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------
import caffe
import numpy as np
import yaml
from fast_rcnn.config import cfg
from generate_anchors import generate_anchors
from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
from fast_rcnn.nms_wrapper import nms
DEBUG = False
'''
ProposalLayer是由python寫的一個層
ProposalLayer有兩個輸入
bottom[0]:是每一個anchor是否是前景的概率,大小爲:(batch_size,2*k,w,h)
bottom[1]: 每個anchor的座標偏移量,大小爲:(batch_size,4*k,w,h)
ProposalLayer的輸出是一個二維矩陣
top[0]:(num_rois,5)
每一行代表(batch_ind,start_w,start_h,end_w,end_h)
'''
class ProposalLayer(caffe.Layer):
"""
Outputs object detection proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
'setup這個函數從配置文件中讀取配置信息,包括共享卷積層最後一層的feature map 相對於原始圖像的縮放比例'
'以及anchor的寬高比和基準尺寸之類的'
def setup(self, bottom, top):
# parse the layer parameter string, which must be valid YAML
layer_params = yaml.load(self.param_str_)
self._feat_stride = layer_params['feat_stride']
anchor_scales = layer_params.get('scales', (8, 16, 32))
self._anchors = generate_anchors(scales=np.array(anchor_scales))
self._num_anchors = self._anchors.shape[0]。#feature map上的每一點產生的anchor數
if DEBUG:
print 'feat_stride: {}'.format(self._feat_stride)
print 'anchors:'
print self._anchors
# rois blob: holds R regions of interest, each is a 5-tuple
# (n, x1, y1, x2, y2) specifying an image batch index n and a
# rectangle (x1, y1, x2, y2)
top[0].reshape(1, 5)
# scores blob: holds scores for R regions of interest
if len(top) > 1:
top[1].reshape(1, 1, 1, 1)
def forward(self, bottom, top):
# Algorithm:
#
# for each (H, W) location i
# generate A anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the A anchors
# clip predicted boxes to image
# remove predicted boxes with either height or width < threshold
# sort all (proposal, score) pairs by score from highest to lowest
# take top pre_nms_topN proposals before NMS
# apply NMS with threshold 0.7 to remaining proposals
# take after_nms_topN proposals after NMS
# return the top proposals (-> RoIs top, scores top)
assert bottom[0].data.shape[0] == 1, \
'Only single item batches are supported'
'''
從配置文件中讀取相關的配置參數
cfg_key:當前是測試階段還是訓練階段;
pre_nms_topN: 在NMS處理之前,分數在前面的rois
post_nms_topN: 在NMS處理之後,分數在前面的rois
nms_thresh: NMS的閾值
min_size: rois最小的尺寸
'''
cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
# the first set of _num_anchors channels are bg probs
# the second set are the fg probs, which we want
scores = bottom[0].data[:, self._num_anchors:, :, :] #選取前num_anchors個feature map
bbox_deltas = bottom[1].data #bottom[1]相對於默認的座標的偏移量
im_info = bottom[2].data[0, :] #原圖像的相關信息,長寬之類的
if DEBUG:
print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
print 'scale: {}'.format(im_info[2])
# 1. Generate proposals from bbox deltas and shifted anchors
height, width = scores.shape[-2:]
if DEBUG:
print 'score map size: {}'.format(scores.shape)
# Enumerate all shifts
'將feature map上的點對應到原圖像上'
shift_x = np.arange(0, width) * self._feat_stride
shift_y = np.arange(0, height) * self._feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()#shifts的大小爲是一個二維舉證(w*h,4)
# Enumerate all shifted anchors:
#
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = self._num_anchors
K = shifts.shape[0]
anchors = self._anchors.reshape((1, A, 4)) + \
shifts.reshape((1, K, 4)).transpose((1, 0, 2))
anchors = anchors.reshape((K * A, 4))#生成默認的anchor的座標
# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:
#
# bbox deltas will be (1, 4 * A, H, W) format
# transpose to (1, H, W, 4 * A)
# reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
# in slowest to fastest order
bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))#生成anchor座標的偏移量,anchor的個數爲A(w*h)*K
# Same story for the scores:
#
# scores are (1, A, H, W) format
# transpose to (1, H, W, A)
# reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))#獲取每一個anchor的分數
# Convert anchors into proposals via bbox transformations
proposals = bbox_transform_inv(anchors, bbox_deltas)#進行座標偏移,經過這一步之後anchor邊成了proposal
# 2. clip predicted boxes to image
proposals = clip_boxes(proposals, im_info[:2])#然後 將proposal限制在原圖像範圍之內
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = _filter_boxes(proposals, min_size * im_info[2])#最小尺寸的限制
proposals = proposals[keep, :]
scores = scores[keep]
# 4. sort all (proposal, score) pairs by score from highest to lowest
# 5. take top pre_nms_topN (e.g. 6000)
order = scores.ravel().argsort()[::-1]#對分數進行排序,
if pre_nms_topN > 0:
order = order[:pre_nms_topN]
proposals = proposals[order, :]#在nms之前取pre_nms_topN個proposal
scores = scores[order]
# 6. apply nms (e.g. threshold = 0.7)
# 7. take after_nms_topN (e.g. 300)
# 8. return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)#進行NMS
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]#取NMS之後的post_nms_topN
scores = scores[keep]#獲取對應分數
# Output rois blob
# Our RPN implementation only supports a single input image, so all
# batch inds are 0
batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)#獲取batch_size的索引,將proposal對應到batch_size的那一張圖片
blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
top[0].reshape(*(blob.shape))
top[0].data[...] = blob
# [Optional] output scores blob
if len(top) > 1:
top[1].reshape(*(scores.shape))
top[1].data[...] = scores
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
def _filter_boxes(boxes, min_size):
"""Remove all boxes with any side smaller than min_size."""
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep