SSD在vgg-16的基礎上,去掉了vgg-16的三個全連接層,保留了第1-5的卷積層,又新增加了第6-11的卷積層。下一卷積層的輸入,都是上一卷積層提取完特徵,池化後的結果。最後選取4,6,7,8,9,10,11.六個特徵層,對着六個特徵層進行兩次卷積,一次卷積通過線性迴歸預測位置,另一次卷積通過softmax歸一化來做分類。
SSD的網絡架構:
def ssd_net(inputs, # 定義ssd網絡結構
num_classes=SSDNet.default_params.num_classes, # 分類數
feat_layers=SSDNet.default_params.feat_layers, # 特徵層
anchor_sizes=SSDNet.default_params.anchor_sizes,
anchor_ratios=SSDNet.default_params.anchor_ratios,
normalizations=SSDNet.default_params.normalizations, # 正則化
is_training=True,
dropout_keep_prob=0.5,
prediction_fn=slim.softmax,
reuse=None,
scope='ssd_300_vgg'):
"""SSD net definition.
"""
# if data_format == 'NCHW':
# inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))
# End_points collect relevant activations for external use.
end_points = {} # 用於收集每一層輸出結果
with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
# Original VGG-16 blocks.
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') # VGG16網絡的第一個conv,重複2次卷積,核爲3x3,64個特徵
end_points['block1'] = net # conv1_2結果存入end_points,name='block1'
net = slim.max_pool2d(net, [2, 2], scope='pool1')
# Block 2.
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') # 重複2次卷積,核爲3x3,128個特徵 #conv2d,tensorflow的函數
end_points['block2'] = net # conv2_2結果存入end_points,name='block2'
net = slim.max_pool2d(net, [2, 2], scope='pool2')
# Block 3.
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') # 重複3次卷積,核爲3x3,256個特徵
end_points['block3'] = net # conv3_3結果存入end_points,name='block3'
net = slim.max_pool2d(net, [2, 2], scope='pool3')
# Block 4.
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') # 重複3次卷積,核爲3x3,512個特徵
end_points['block4'] = net # 在池化層之前,將conv4_3結果存入end_points,name='block4'
net = slim.max_pool2d(net, [2, 2], scope='pool4')
# Block 5.
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # 重複3次卷積,核爲3x3,512個特徵
end_points['block5'] = net # conv5_3結果存入end_points,name='block5'
net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')
# Additional SSD blocks. #去掉了VGG的全連接層
# Block 6: let's dilate the hell out of it!
net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') # 將VGG基礎網絡最後的池化層結果做擴展卷積(帶孔卷積);
end_points['block6'] = net # conv6結果存入end_points,name='block6'
net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # dropout層
# Block 7: 1x1 conv. Because the fuck.
net = slim.conv2d(net, 1024, [1, 1], scope='conv7') # 將dropout後的網絡做1x1卷積,輸出1024特徵,name='block7'
end_points['block7'] = net
net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training) # 將卷積後的網絡繼續做dropout
# Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
end_point = 'block8'
with tf.variable_scope(end_point):
net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') # 對上述dropout的網絡做1x1卷積,然後做3x3卷積,,輸出512特徵圖,name=‘block8’
net = custom_layers.pad2d(net, pad=(1, 1))
net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
end_points[end_point] = net
end_point = 'block9'
with tf.variable_scope(end_point):
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') # 對上述網絡做1x1卷積,然後做3x3卷積,輸出256特徵圖,name=‘block9’
net = custom_layers.pad2d(net, pad=(1, 1))
net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
end_points[end_point] = net
end_point = 'block10'
with tf.variable_scope(end_point):
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') # 對上述網絡做1x1卷積,然後做3x3卷積,輸出256特徵圖,name=‘block10’
net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
end_points[end_point] = net
end_point = 'block11'
with tf.variable_scope(end_point):
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') # 對上述網絡做1x1卷積,然後做3x3卷積,輸出256特徵圖,name=‘block11’
net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
end_points[end_point] = net
# Prediction and localisations layers. #預測和定位
predictions = []
logits = []
localisations = []
for i, layer in enumerate(feat_layers): # 遍歷特徵層
with tf.variable_scope(layer + '_box'): # 起個命名範圍
p, l = ssd_multibox_layer(end_points[layer], # 做多尺度大小box預測的特徵層,返回每個cell中每個先驗框預測的類別p和預測的位置l
num_classes, # 種類數
anchor_sizes[i], # 先驗框尺度(同一特徵圖上的先驗框尺度和長寬比一致)
anchor_ratios[i], # 先驗框長寬比
normalizations[i]) # 每個特徵正則化信息,目前是隻對第一個特徵圖做歸一化操作;
# 把每一層的預測收集
predictions.append(prediction_fn(p)) # prediction_fn爲softmax,預測類別
logits.append(p) # 把每個cell每個先驗框預測的類別的概率值存在logits中
localisations.append(l) # 預測位置信息
return predictions, localisations, logits, end_points # 返回類別預測結果,位置預測結果,所屬某個類別的概率值,以及特徵層
ssd_net.default_image_size = 300
預測位置和分類:
def ssd_multibox_layer(inputs, # 輸入特徵層
num_classes, # 類別數
sizes, # 參考先驗框的尺度
ratios=[1], # 默認的先驗框長寬比爲1
normalization=-1, # 默認不做正則化
bn_normalization=False):
"""Construct a multibox layer, return a class and localization predictions.
"""
net = inputs
if normalization > 0: # 如果輸入整則化數,則進行L2正
net = custom_layers.l2_normalization(net, scaling=True) # 對通道所在維度進行正則化,隨後乘以gamma縮放係數
# Number of anchors.
num_anchors = len(sizes) + len(ratios) # 每層特徵圖參考先驗框的個數[4,6,6,6,4,4]
# Location. #每個先驗框對應4個座標信息
num_loc_pred = num_anchors * 4 # 特徵圖上每個單元預測的座標所需維度=錨點框數*4
loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None, # 通過對特徵圖進行3x3卷積得到位置信息和類別權重信息
scope='conv_loc') # 該部分是定位信息,輸出維度爲[特徵圖h,特徵圖w,每個單元所有錨點框座標]
loc_pred = custom_layers.channel_to_last(loc_pred)
loc_pred = tf.reshape(loc_pred, # 最後整個特徵圖所有錨點框預測目標位置 tensor爲[h*w*每個cell先驗框數,4]
tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])
# Class prediction. #類別預測
num_cls_pred = num_anchors * num_classes # 特徵圖上每個單元預測的類別所需維度=錨點框數*種類數
cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None, # 該部分是類別信息,輸出維度爲[特徵圖h,特徵圖w,每個單元所有錨點框對應類別信息]
scope='conv_cls')
cls_pred = custom_layers.channel_to_last(cls_pred)
cls_pred = tf.reshape(cls_pred,
tensor_shape(cls_pred, 4)[:-1] + [num_anchors,
num_classes]) # 最後整個特徵圖所有錨點框預測類別 tensor爲[h*w*每個cell先驗框數,種類數]
return cls_pred, loc_pred # 返回預測得到的類別和box位置 tensor
參考博客:https://blog.csdn.net/c20081052/article/details/80391627