if img_data['bboxes'][bbox_num]['class'] != 'bg': # all GT boxes should be mapped to an anchor box, so we keep track of which anchor box was best if curr_iou > best_iou_for_bbox[bbox_num]: best_anchor_for_bbox[bbox_num] = [jy, ix, anchor_ratio_idx, anchor_size_idx] best_iou_for_bbox[bbox_num] = curr_iou best_x_for_bbox[bbox_num,:] = [x1_anc, x2_anc, y1_anc, y2_anc] best_dx_for_bbox[bbox_num,:] = [tx, ty, tw, th] # we set the anchor to positive if the IOU is >0.7 (it does not matter if there was another better box, it just indicates overlap) if curr_iou > C.rpn_max_overlap: bbox_type = 'pos' num_anchors_for_bbox[bbox_num] += 1 # we update the regression layer target if this IOU is the best for the current (x,y) and anchor position if curr_iou > best_iou_for_loc: best_iou_for_loc = curr_iou best_regr = (tx, ty, tw, th) # if the IOU is >0.3 and <0.7, it is ambiguous and no included in the objective if C.rpn_min_overlap < curr_iou < C.rpn_max_overlap: # gray zone between neg and pos if bbox_type != 'pos': bbox_type = 'neutral' # turn on or off outputs depending on IOUs if bbox_type == 'neg': y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1 y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0 elif bbox_type == 'neutral': y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0 y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0 elif bbox_type == 'pos': y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1 y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1 start = 4 * (anchor_ratio_idx + n_anchratios * anchor_size_idx) y_rpn_regr[jy, ix, start:start+4] = best_regr如果標註樣本不是背景,那麼更新一系列變量,bbox_num對應於標註樣本,如果iou大於C.rpn_max_overlap(初始設定爲0.7),則將bbox_type設定爲pos,這個變量對應於每一個anchor。如果iou還大於best_iou_for_loc,這裏每一點的每一個anchor(在該代碼中是4*3個),都會跟該標註樣本里面所有的bounding box做對比,如果大於則將best_regress設爲該anchor的tx,ty,tw,th。
如果在iou值在min和max中間,那麼type設爲neutral中性。
y_is_box_valid,y_rpn_overlap在該點(output的寬高)標記,哪個anchor
一個點的anchor循環結束以後,根據type值在標記y_is_box_valid是否y是有效的(背景和pos的有效)和y_rpn_overlaprpn和y是否重疊。
下邊做循環,保證每一個樣本的bbox都至少有一個best anchor
# we ensure that every bbox has at least one positive RPN region for idx in range(num_anchors_for_bbox.shape[0]): if num_anchors_for_bbox[idx] == 0: # no box with an IOU greater than zero ... if best_anchor_for_bbox[idx, 0] == -1: continue
# one issue is that the RPN has many more negative than positive regions, so we turn off some of the negative # regions. We also limit it to 256 regions. num_regions = 256看註釋,因爲。。所以設了regions不能超過256
if len(pos_locs[0]) > num_regions/2: val_locs = random.sample(range(len(pos_locs[0])), len(pos_locs[0]) - num_regions/2) y_is_box_valid[0, pos_locs[0][val_locs], pos_locs[1][val_locs], pos_locs[2][val_locs]] = 0 num_pos = num_regions/2 if len(neg_locs[0]) + num_pos > num_regions: val_locs = random.sample(range(len(neg_locs[0])), len(neg_locs[0]) - num_pos) y_is_box_valid[0, neg_locs[0][val_locs], neg_locs[1][val_locs], neg_locs[2][val_locs]] = 0
y_rpn_cls = np.concatenate([y_is_box_valid, y_rpn_overlap], axis=1) y_rpn_regr = np.concatenate([np.repeat(y_rpn_overlap, 4, axis=1), y_rpn_regr], axis=1) return np.copy(y_rpn_cls), np.copy(y_rpn_regr)把很多數據都合在了一起,返回。訓練時候是每張圖每張圖訓練的
參考文章鏈接:
https://zhuanlan.zhihu.com/p/28585873
https://zhuanlan.zhihu.com/p/24916624