訓練、測試都是調用了網絡。那麼網絡是如何張開、恢復、調用,數據是如何在網絡裏面流動的呢?
network類基本包含了整個網絡的架構設計,直接給出代碼解析:
# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Xinlei Chen
# 南石北岸生2019.4.7
# https://mp.csdn.net/postedit
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim import losses
from tensorflow.contrib.slim import arg_scope
import numpy as np
from layer_utils.snippets import generate_anchors_pre, generate_anchors_pre_tf
from layer_utils.proposal_layer import proposal_layer, proposal_layer_tf
from layer_utils.proposal_top_layer import proposal_top_layer, proposal_top_layer_tf
from layer_utils.anchor_target_layer import anchor_target_layer
from layer_utils.proposal_target_layer import proposal_target_layer
from utils.visualization import draw_bounding_boxes
from model.config import cfg
class Network(object):
def __init__(self):#構造方法,自動生成下列變量,用了self,可以不傳入值得生成變量。
self._predictions = {}#
self._losses = {}
self._anchor_targets = {}
self._proposal_targets = {}
self._layers = {}
self._gt_image = None
self._act_summaries = []
self._score_summaries = {}
self._train_summaries = []
self._event_summaries = {}
self._variables_to_fix = {}
#最後還原圖像的時候加上均值並且進行通道變換
def _add_gt_image(self):
# add back mean
image = self._image + cfg.PIXEL_MEANS
# BGR to RGB (opencv uses BGR)
resized = tf.image.resize_bilinear(image, tf.to_int32(self._im_info[:2] / self._im_info[2]))
self._gt_image = tf.reverse(resized, axis=[-1])
#以下都是tensorboard用到的summary
def _add_gt_image_summary(self):
# use a customized visualization function to visualize the boxes
if self._gt_image is None:
self._add_gt_image()
image = tf.py_func(draw_bounding_boxes,
[self._gt_image, self._gt_boxes, self._im_info],
tf.float32, name="gt_boxes")
return tf.summary.image('GROUND_TRUTH', image)
def _add_act_summary(self, tensor):
tf.summary.histogram('ACT/' + tensor.op.name + '/activations', tensor)
tf.summary.scalar('ACT/' + tensor.op.name + '/zero_fraction',
tf.nn.zero_fraction(tensor))
def _add_score_summary(self, key, tensor):
tf.summary.histogram('SCORE/' + tensor.op.name + '/' + key + '/scores', tensor)
def _add_train_summary(self, var):
tf.summary.histogram('TRAIN/' + var.op.name, var)
#rpn對block4的feature maps 利用18個1x1的卷積得到一個18通道的特徵圖,對於特徵圖上每一個點,都對應一個長度爲18的向量
#這個向量對應這9個尺寸的anchor,每個anchor對應了前景和背景兩類。
def _reshape_layer(self, bottom, num_dim, name):#num_dim是要強制轉換的通道數。
#整體思路:首先進行通道順序變換,然後強制將1x18通道形狀轉換爲2x9的通道形狀
input_shape = tf.shape(bottom)#讀取數據維度
with tf.variable_scope(name) as scope:#打開變量域,定位這個變量
# change the channel to the caffe format
#將通道順序變換
to_caffe = tf.transpose(bottom, [0, 3, 1, 2])
# then force it to have channel 2
#首先進行tf.concat,得到一個shape,按照這個shape進行reshape,
#
reshaped = tf.reshape(to_caffe,
tf.concat(axis=0, values=[[1, num_dim, -1], [input_shape[2]]]))
# then swap the channel back
#然後將通道順序換回原來的通道。
to_tf = tf.transpose(reshaped, [0, 2, 3, 1])
return to_tf
#針對以上,給出1組示例,幫助理解。
#feature map 大小爲38x38x24,24是因爲我設置的anchor參數爲[4,8,16,32]和[0.5,1,2]。每個anchor centre有3x4=12個anchor框,每個框有前景和背景2類得分值。所以是24.
#softmax用於將得分轉換爲概率
#key:tf.nn.softmax()
def _softmax_layer(self, bottom, name):
if name.startswith('rpn_cls_prob_reshape'):
input_shape = tf.shape(bottom)#輸入的shape
bottom_reshaped = tf.reshape(bottom, [-1, input_shape[-1]])#重新排版一下得分矩陣以滿足tf.nn.softmax的輸入要求
reshaped_score = tf.nn.softmax(bottom_reshaped, name=name)#將得分矩陣轉換爲概率矩陣。
return tf.reshape(reshaped_score, input_shape)#轉換回輸入的數據組織順序
return tf.nn.softmax(bottom, name=name)#如果不是rpn_cls_prob_reshape,即不是經過reshape的rpn_cls_prob,就直接滿足tf.nn.softmax了,可以直接調用並返回結果
#測試的時候有兩種模式,一種是top,一種是nms,都是對衆多框的選測方法,nms快,top慢,但是文檔解釋說top更好,默認是nms。
def _proposal_top_layer(self, rpn_cls_prob, rpn_bbox_pred, name):
#這個函數跟proposal_layer()功能類似
with tf.variable_scope(name) as scope:
if cfg.USE_E2E_TF:#USE_E2E_TF是端端的tf模型,這個參數默認爲True。相對的是非端端的tf模型,可以輸入到多個tf實例模型裏面運行。
#端到端的tf模型只在測試的前饋過程中測試過。這個參數最好別動,不然下面代碼可能會出BUG
#proposal_top_layer的函數有兩種,一種是非tf的,一種是tf的。這個版本的faster用的tf實現的。畢竟tf可以調用gpu,更快。
rois, rpn_scores = proposal_top_layer_tf(
rpn_cls_prob,
rpn_bbox_pred,
self._im_info,
self._feat_stride,
self._anchors,
self._num_anchors
)#選擇前TEST.RPN_TOP_N個,這個參數默認是C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
#需要注意的是,對於得到的框數m,m小於12000的話,會從5000個值裏面隨機選擇m個框,所以會選到空白,導致丟失重要的框,不過這種情況很少發生
else:
#如果不是端端的tfmodel,就調用tf.py_func,將tensor轉換爲numpy array格式進行np的處理,然後輸出numpy array,並轉換爲tensor後返回。
rois, rpn_scores = tf.py_func(proposal_top_layer,
[rpn_cls_prob, rpn_bbox_pred, self._im_info,
self._feat_stride, self._anchors, self._num_anchors],
[tf.float32, tf.float32], name="proposal_top")
rois.set_shape([cfg.TEST.RPN_TOP_N, 5])
rpn_scores.set_shape([cfg.TEST.RPN_TOP_N, 1])
#以上總之就是proposals挑選的兩種方法nms、TOP中的TOP。TOP又分兩種模式,端到端的tf處理或者調用np處理,增加tensor接口的靈活性,結果沒差。
return rois, rpn_scores
#和上面對應,下面的是nms模式。函數名字的區別就是有無top。
#_proposal_layer從所有anchor框中選擇出選擇C.TRAIN.RPN_POST_NMS_TOP_N = 2000個作爲rois供給fast rcnn部分,即rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
def _proposal_layer(self, rpn_cls_prob, rpn_bbox_pred, name):
with tf.variable_scope(name) as scope:
if cfg.USE_E2E_TF:
rois, rpn_scores = proposal_layer_tf(
rpn_cls_prob,
rpn_bbox_pred,
self._im_info,
self._mode,
self._feat_stride,
self._anchors,
self._num_anchors
)
else:
rois, rpn_scores = tf.py_func(proposal_layer,
[rpn_cls_prob, rpn_bbox_pred, self._im_info, self._mode,
self._feat_stride, self._anchors, self._num_anchors],
[tf.float32, tf.float32], name="proposal")
rois.set_shape([None, 5])
rpn_scores.set_shape([None, 1])
return rois, rpn_scores
# Only use it if you have roi_pooling op written in tf.image
#實際上並沒有調用到,roi-pooling在resnet裏面定義爲crop_pool_layer,在network類裏面也有,但是在resnet子類裏進行了方法重定義,覆蓋了network的_crop_pool_layer
#這個roi-pooling調用的是tensorflow本身的roi-pooling,但tf.image本身沒有哦。所以要不自己寫一個添加進去,不過這樣比較麻煩,所以直接在faster項目代碼裏面寫到_crop_pool_layer()裏。
def _roi_pool_layer(self, bootom, rois, name):#這個輸入和_crop_pool_layer一樣的。這裏並沒有實現,就是做個樣子
with tf.variable_scope(name) as scope:
return tf.image.roi_pooling(bootom, rois,
pooled_height=cfg.POOLING_SIZE,
pooled_width=cfg.POOLING_SIZE,
spatial_scale=1. / 16.)[0]
#roi-pooling
def _crop_pool_layer(self, bottom, rois, name):
#其中rois爲regions of intesting,就是座標信息。Bottom是block4的輸出feature map
with tf.variable_scope(name) as scope:
#tf.slice(input_, begin, size, name=None):輸入、開始切片的位置,切片的範圍:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
#tf.squeeze是用來刪除空數據的。
# Get the normalized coordinates of bounding boxes
#輸入的rois的座標位置是特徵圖上的座標,相對於resnet101而言,從feature到原圖的縮放比例大概是16。因爲中間有padding,所以這個位置信息我一直覺得是不精確的16倍縮放。
bottom_shape = tf.shape(bottom)
#獲取圖像的高度
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
#獲取圖像的寬度
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
#獲取相對座標。相對座標是歸一化座標,也就是原圖中的實際x/寬,Y/邊,得到的數範圍在0-1
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
#首先計算出來bounding boxes歸一化座標,然後組織這個節點的bp傳播。
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
#pooling size的默認參數是7,爲了進行max pooling,我們將範圍擴大到14x14,這樣經過下面的max pooling出來就是7x7
#tf.image.crop_and_resize=roi-pooling
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
#利用tf.nn.dropoutDropout就是在不同的訓練過程中隨機扔掉一部分神經元。對每個神經元,以一定的概率ratio,讓其停止工作,這次訓練過程中不更新權值,也不參加神經網絡的計算。
def _dropout_layer(self, bottom, name, ratio=0.5):
return tf.nn.dropout(bottom, ratio, name=name)
#_anchor_target_layer從所有anchor框中選擇batch個框訓練RPN網絡,即rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
def _anchor_target_layer(self, rpn_cls_score, name):
#根據得分對anchor打標籤的部分.調用了py_func方法,在anchor_target_layer.py裏面調用numpy來處理tensor,並不會反傳
with tf.variable_scope(name) as scope:
#調用anchor_target_layer()進行一系列處理,具體在另一個代碼裏面,這裏就不說了
rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = tf.py_func(
anchor_target_layer,
[rpn_cls_score, self._gt_boxes, self._im_info, self._feat_stride, self._anchors, self._num_anchors],
[tf.float32, tf.float32, tf.float32, tf.float32],
name="anchor_target")
#set_shape是設置句柄placeholder的shape的。
rpn_labels.set_shape([1, 1, None, None])
rpn_bbox_targets.set_shape([1, None, None, self._num_anchors * 4])
rpn_bbox_inside_weights.set_shape([1, None, None, self._num_anchors * 4])
rpn_bbox_outside_weights.set_shape([1, None, None, self._num_anchors * 4])
#數據格式轉爲int32,標籤嘛,整數。
rpn_labels = tf.to_int32(rpn_labels, name="to_int32")
self._anchor_targets['rpn_labels'] = rpn_labels
self._anchor_targets['rpn_bbox_targets'] = rpn_bbox_targets
self._anchor_targets['rpn_bbox_inside_weights'] = rpn_bbox_inside_weights
self._anchor_targets['rpn_bbox_outside_weights'] = rpn_bbox_outside_weights
self._score_summaries.update(self._anchor_targets)#加入記錄節點,跟tensorboard相關,沒啥說的。
return rpn_labels#得到標籤有-1\0\1三類,兩個閾值,比如<0.3爲背景,標記0。得分>0.5爲前景,標記爲1,中間的標記爲-1,不做處理
#_proposal_target_layer在完成_anchor_target_layer的基礎上從5000個rois中選擇出128個訓練fast rcnn,即rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
def _proposal_target_layer(self, rois, roi_scores, name):
with tf.variable_scope(name) as scope:#打開變量作用域
#使用py接口,用numpy函數處理proposal。得到用於訓練的rois
rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = tf.py_func(
proposal_target_layer,
[rois, roi_scores, self._gt_boxes, self._num_classes],
[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32],
name="proposal_target")
#
rois.set_shape([cfg.TRAIN.BATCH_SIZE, 5])
roi_scores.set_shape([cfg.TRAIN.BATCH_SIZE])
labels.set_shape([cfg.TRAIN.BATCH_SIZE, 1])
bbox_targets.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
bbox_inside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
bbox_outside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4])
self._proposal_targets['rois'] = rois
self._proposal_targets['labels'] = tf.to_int32(labels, name="to_int32")
self._proposal_targets['bbox_targets'] = bbox_targets
self._proposal_targets['bbox_inside_weights'] = bbox_inside_weights
self._proposal_targets['bbox_outside_weights'] = bbox_outside_weights
self._score_summaries.update(self._proposal_targets)
return rois, roi_scores
#self.proposal_layer()、self._anchor_target_layer()以及self._proposal_target_layer()三個函數其實是一個完整的proposal結構
#同樣兩種模式,一種用tf函數計算,一種用numpy計算,前者是tf端到端的模型。
#generate_anchors_pre_tf或者generate_anchors_pre是重點,作用是生成anchors。
def _anchor_component(self):
with tf.variable_scope('ANCHOR_' + self._tag) as scope:
# just to get the shape right
height = tf.to_int32(tf.ceil(self._im_info[0] / np.float32(self._feat_stride[0])))
width = tf.to_int32(tf.ceil(self._im_info[1] / np.float32(self._feat_stride[0])))
if cfg.USE_E2E_TF:
anchors, anchor_length = generate_anchors_pre_tf(
height,
width,
self._feat_stride,
self._anchor_scales,
self._anchor_ratios
)
else:#調用py接口處理tensor.默認是tf端端處理
anchors, anchor_length = tf.py_func(generate_anchors_pre,
[height, width,
self._feat_stride, self._anchor_scales, self._anchor_ratios],
[tf.float32, tf.int32], name="generate_anchors")
anchors.set_shape([None, 4])
anchor_length.set_shape([])
#保存到全局變量裏,後面會用到
self._anchors = anchors
self._anchor_length = anchor_length
#建立網絡,默認可訓練模式
def _build_network(self, is_training=True):
# select initializers
#使用截斷的正態分佈初始化權重,默認是False
if cfg.TRAIN.TRUNCATED:
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
else:#隨機初始化
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
#經過backbone得到最後一層conv4的feature map
net_conv = self._image_to_head(is_training)#把輸入從全局變量傳過來
with tf.variable_scope(self._scope, self._scope):#打開變量作用域
# build the anchors for the image
self._anchor_component()#生成所有anchors.在網絡網絡和輸入確定的時候所有anchor就可以計算了,和特徵沒關係,主要是針對輸入圖像尺寸的
# region proposal network
rois = self._region_proposal(net_conv, is_training, initializer)
# region of interest pooling
#roi-pooling,完成proposal的最後一步處理
#這裏注意一下,這個pool5不是單純的將feature map進行pooling,而是對於每一個proposal,先crop,然後pooling。[batcb_id,proposal_id,...]
if cfg.POOLING_MODE == 'crop':
pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
else:
raise NotImplementedError
#將pooling結果輸入全連接層
# self._head_to_tail在resnetv1子類裏面定義了類方法,進行全連接的計算,輸出全連接層結果,取均值處理
fc7 = self._head_to_tail(pool5, is_training)
with tf.variable_scope(self._scope, self._scope):
# region classification
#對於每一個proposal的ROI-pooling結果,進行分類
cls_prob, bbox_pred = self._region_classification(fc7, is_training, #is_training這個參數是因爲裏面有全連接op,要定義參數是可訓練的參數
initializer, initializer_bbox)
self._score_summaries.update(self._predictions)
return rois, cls_prob, bbox_pred#返回rois及其對應的類別得分和框的預測
#對於迴歸的loss計算
def _smooth_l1_loss(self, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
sigma_2 = sigma ** 2#=1
box_diff = bbox_pred - bbox_targets#計算與gt的偏移量
in_box_diff = bbox_inside_weights * box_diff#經過一層網絡之後的輸出
abs_in_box_diff = tf.abs(in_box_diff)#絕對值化
#tf.less返回兩個張量各元素比較(x<y)得到的真假值組成的張量
#總之就是smoothL1分段函數的實現
smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
+ (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
out_loss_box = bbox_outside_weights * in_loss_box
loss_box = tf.reduce_mean(tf.reduce_sum(
out_loss_box,
axis=dim
))
return loss_box#bbox_outside_weights、bbox_inside_weights爲正負樣本的權重矩陣
def _add_losses(self, sigma_rpn=3.0):
with tf.variable_scope('LOSS_' + self._tag) as scope:
# RPN, class loss
#採用的交叉熵分類損失,一個batch得到的loss取均值進行反向傳播
rpn_cls_score = tf.reshape(self._predictions['rpn_cls_score_reshape'], [-1, 2])
rpn_label = tf.reshape(self._anchor_targets['rpn_labels'], [-1])
rpn_select = tf.where(tf.not_equal(rpn_label, -1))
rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2])
rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1])
rpn_cross_entropy = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label))
#box loss採用smooth_L1損失函數
# RPN, bbox loss
rpn_bbox_pred = self._predictions['rpn_bbox_pred']
rpn_bbox_targets = self._anchor_targets['rpn_bbox_targets']
rpn_bbox_inside_weights = self._anchor_targets['rpn_bbox_inside_weights']
rpn_bbox_outside_weights = self._anchor_targets['rpn_bbox_outside_weights']
rpn_loss_box = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3])
#RPN的loss是基於anchor計算的,而RCNN是基於anchor得到的proposals計算的
# RCNN, class loss
cls_score = self._predictions["cls_score"]
label = tf.reshape(self._proposal_targets["labels"], [-1])
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label))
# RCNN, bbox loss
bbox_pred = self._predictions['bbox_pred']
bbox_targets = self._proposal_targets['bbox_targets']
bbox_inside_weights = self._proposal_targets['bbox_inside_weights']
bbox_outside_weights = self._proposal_targets['bbox_outside_weights']
loss_box = self._smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights)
#將中間計算結果保存在self._losses,後面將傳入layers_to_output()中
self._losses['cross_entropy'] = cross_entropy
self._losses['loss_box'] = loss_box
self._losses['rpn_cross_entropy'] = rpn_cross_entropy
self._losses['rpn_loss_box'] = rpn_loss_box
#幾個loss進行加和,得到最終的Loss
loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
regularization_loss = tf.add_n(tf.losses.get_regularization_losses(), 'regu')
self._losses['total_loss'] = loss + regularization_loss
self._event_summaries.update(self._losses)
return loss
#搭建RPN網絡,首先輸入conv4計算得到的feature map,關於原圖的變量已經在全局變量裏面,
def _region_proposal(self, net_conv, is_training, initializer):
rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer,
scope="rpn_conv/3x3")
#首先進行一個3x3的卷積層卷積,RPN_CHANNELS=512默認
self._act_summaries.append(rpn)
#再經過_num_anchors * 2個1x1的卷積,目的是通道轉換
rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
weights_initializer=initializer,
padding='VALID', activation_fn=None, scope='rpn_cls_score')
# change it so that the score has 2 as its channel size
#首先進行通道變換,針對每一個anchor分爲兩類,
rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
#然後對兩類的得分score進行softmax得到prob
rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
#將得分的通道變換回去,變回rpn_cls_pred
rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred")
#將進行softmax得到prob變換回去,得到rpn_cls_prob
rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
#上面是cls的得分和概率計算,下面計算boxes的偏移量預測結果rpn_bbox_pred
rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
weights_initializer=initializer,
padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
if is_training:#如果是訓練模式,就要通過_proposal_target_layer計算用於訓練RCNN的Proposal,以及調用_anchor_target_layer()選擇用於訓練RPN的anchor
rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
# Try to have a deterministic order for the computing graph, for reproducibility
with tf.control_dependencies([rpn_labels]):#順序控制器
#tf.control_dependencies[a,b],函數指定某些操作執行的依賴關係,先a再b,這裏先執行rpn_labels的op再執行下面,相當於op判斷語句
rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
else:#如果僅僅是測試,就不需要進行這個挑選,直接_proposal_layer(),經過nms或者top過濾anchor得到Proposal再進入RCNN計算分類和box偏移量就好
if cfg.TEST.MODE == 'nms':
rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
elif cfg.TEST.MODE == 'top':
rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
else:
raise NotImplementedError
#計算得到的中間結果保存到self._predictions裏面,在計算loss的時候或者測試的時候都會用到
self._predictions["rpn_cls_score"] = rpn_cls_score
self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
self._predictions["rpn_cls_prob"] = rpn_cls_prob
self._predictions["rpn_cls_pred"] = rpn_cls_pred
self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
self._predictions["rois"] = rois
return rois#RPN返回的是ROIS
#對rois的分類和迴歸都寫在一起了,其實是classification and regression
#也就是RCNN對rois的分類
def _region_classification(self, fc7, is_training, initializer, initializer_bbox):
#首先經過一個全連接層,輸出節點的數量就是分類類別數,比如21
cls_score = slim.fully_connected(fc7, self._num_classes,
weights_initializer=initializer,
trainable=is_training,
activation_fn=None, scope='cls_score')
#得分經過softmax轉換爲概率
cls_prob = self._softmax_layer(cls_score, "cls_prob")
#排序一下
cls_pred = tf.argmax(cls_score, axis=1, name="cls_pred")
#與上面的分類並行,對於輸入也進行一個全連接層,輸出節點數量=類別數量x4,也就是每一個類別有4個位置偏移量
bbox_pred = slim.fully_connected(fc7, self._num_classes * 4,
weights_initializer=initializer_bbox,
trainable=is_training,
activation_fn=None, scope='bbox_pred')
#將結果保存在self._predictions,裏面還有rpn的cls和bbox的prob\pred\score
self._predictions["cls_score"] = cls_score
self._predictions["cls_pred"] = cls_pred
self._predictions["cls_prob"] = cls_prob
self._predictions["bbox_pred"] = bbox_pred
return cls_prob, bbox_pred
#在網絡子類裏面實現
def _image_to_head(self, is_training, reuse=None):
raise NotImplementedError
def _head_to_tail(self, pool5, is_training, reuse=None):
raise NotImplementedError
#create_architecture構建網絡,在demo.py裏面調用訓練腳本或者測試腳本也調用。
#是整體網絡結構了我們來捋一捋這個網絡最上層的封裝
def create_architecture(self, mode, num_classes, tag=None,
anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2)):
self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3])#輸入圖像
self._im_info = tf.placeholder(tf.float32, shape=[3])#輸入圖像的長、寬信息,還有一個維度應該是id
self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5])#ground true
self._tag = tag
self._num_classes = num_classes#類別數
self._mode = mode#模式,nms或者top
self._anchor_scales = anchor_scales#anchor參數
self._num_scales = len(anchor_scales)#anchor數量
#都是anchor的
self._anchor_ratios = anchor_ratios
self._num_ratios = len(anchor_ratios)
self._num_anchors = self._num_scales * self._num_ratios
#調用模式
training = mode == 'TRAIN'
testing = mode == 'TEST'
assert tag != None#這裏對tag必須爲空
# handle most of the regularizers here
#調用tf.contrib.layers.l2_regularizer()處理絕大部分的正則化
#cfg.TRAIN.WEIGHT_DECAY=0.0001,權重衰減,用於正則化
#tf.contrib.layers.l2_regularizer是tensorflow自帶的L2正則化項計算,目的是限制權重,防止過擬合
weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)
if cfg.TRAIN.BIAS_DECAY:#偏置是否也進行正則化
biases_regularizer = weights_regularizer
else:
biases_regularizer = tf.no_regularizer
# list as many types of layers as possible, even if they are not used now
#打開儘量多功能層的變量作用域
with arg_scope([slim.conv2d, slim.conv2d_in_plane, \
slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
weights_regularizer=weights_regularizer,
biases_regularizer=biases_regularizer,
biases_initializer=tf.constant_initializer(0.0)):
rois, cls_prob, bbox_pred = self._build_network(training)#構建網絡
layers_to_output = {'rois': rois}#layers_to_output字典變量,其中Key='rois'裏面存放檢測好的rois
for var in tf.trainable_variables():#tf.trainable_variables()用於返回可訓練類型的變量列表
self._train_summaries.append(var)
if testing:#測試模式,均值和方差都是設好的
stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (self._num_classes))
means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (self._num_classes))
#對框進行固定值的修正
self._predictions["bbox_pred"] *= stds#(0.1, 0.1, 0.2, 0.2)
self._predictions["bbox_pred"] += means#(0, 0, 0, 0)
else:#訓練模式,self._add_losses得到RPN和RCNN的box\cls loss,以及總loss
#先計算Loss
self._add_losses()
layers_to_output.update(self._losses)
val_summaries = []
with tf.device("/cpu:0"):#用CPU運行以下,都是訓練過程中保存的信息
val_summaries.append(self._add_gt_image_summary())
for key, var in self._event_summaries.items():
val_summaries.append(tf.summary.scalar(key, var))
for key, var in self._score_summaries.items():
self._add_score_summary(key, var)
for var in self._act_summaries:
self._add_act_summary(var)
for var in self._train_summaries:
self._add_train_summary(var)
self._summary_op = tf.summary.merge_all()
self._summary_op_val = tf.summary.merge(val_summaries)
layers_to_output.update(self._predictions)#同樣,加入新的鍵值對,主要是預測結果
return layers_to_output
def get_variables_to_restore(self, variables, var_keep_dic):#恢復網絡
raise NotImplementedError
def fix_variables(self, sess, pretrained_model):#手動修改網絡頭部的卷積,在具體網絡如resnet裏面實現
raise NotImplementedError
# Extract the head feature maps, for example for vgg16 it is conv5_3
# only useful during testing mode
#沒用到,對於self._layers["head"]這個op,指定數據輸入feed_dict={self._image: image},得到feature map
#本來是想設計爲backbone輸出的fp,但是這個版本的faster沒有定義這個op
def extract_head(self, sess, image):
feed_dict = {self._image: image}
feat = sess.run(self._layers["head"], feed_dict=feed_dict)
return feat
# only useful during testing mode
#test.py裏面調用到,主要是輸入圖像,並且run出計算結果。
def test_image(self, sess, image, im_info):
feed_dict = {self._image: image,
self._im_info: im_info}
#run的op是在test_net.py裏面用下面語句生成的
#saver.restore(sess, args.model)
#sess.run(tf.global_variables_initializer())
else:
print(('Loading initial weights from {:s}').format(args.weight))
sess.run(tf.global_variables_initializer())#首先初始化,然後run出op得到結果
cls_score, cls_prob, bbox_pred, rois = sess.run([self._predictions["cls_score"],
self._predictions['cls_prob'],
self._predictions['bbox_pred'],
self._predictions['rois']],
feed_dict=feed_dict)
return cls_score, cls_prob, bbox_pred, rois
def get_summary(self, sess, blobs):
feed_dict = {self._image: blobs['data'], self._im_info: blobs['im_info'],
self._gt_boxes: blobs['gt_boxes']}
summary = sess.run(self._summary_op_val, feed_dict=feed_dict)
return summary
#訓練train_val.py裏面調用到,用於獲取loss進行階段性彙報
def train_step(self, sess, blobs, train_op):
feed_dict = {self._image: blobs['data'], self._im_info: blobs['im_info'],
self._gt_boxes: blobs['gt_boxes']}
rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, loss, _ = sess.run([self._losses["rpn_cross_entropy"],
self._losses['rpn_loss_box'],
self._losses['cross_entropy'],
self._losses['loss_box'],
self._losses['total_loss'],
train_op],
feed_dict=feed_dict)
return rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, loss
#訓練的時候,一組圖片組成blobs進行輸入
def train_step_with_summary(self, sess, blobs, train_op):
feed_dict = {self._image: blobs['data'], self._im_info: blobs['im_info'],
self._gt_boxes: blobs['gt_boxes']}
rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, loss, summary, _ = sess.run([self._losses["rpn_cross_entropy"],
self._losses['rpn_loss_box'],
self._losses['cross_entropy'],
self._losses['loss_box'],
self._losses['total_loss'],
self._summary_op,
train_op],
feed_dict=feed_dict)
return rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, loss, summary
#跟上面的def一樣,只是多了一個_summary_op
#下面的函數沒有用到
def train_step_no_return(self, sess, blobs, train_op):
feed_dict = {self._image: blobs['data'], self._im_info: blobs['im_info'],
self._gt_boxes: blobs['gt_boxes']}
sess.run([train_op], feed_dict=feed_dict)