TensorFlow-SSD

SSD在vgg-16的基礎上,去掉了vgg-16的三個全連接層,保留了第1-5的卷積層,又新增加了第6-11的卷積層。下一卷積層的輸入,都是上一卷積層提取完特徵,池化後的結果。最後選取4,6,7,8,9,10,11.六個特徵層,對着六個特徵層進行兩次卷積,一次卷積通過線性迴歸預測位置,另一次卷積通過softmax歸一化來做分類。

SSD的網絡架構:

def ssd_net(inputs,  # 定義ssd網絡結構
            num_classes=SSDNet.default_params.num_classes,  # 分類數
            feat_layers=SSDNet.default_params.feat_layers,  # 特徵層
            anchor_sizes=SSDNet.default_params.anchor_sizes,
            anchor_ratios=SSDNet.default_params.anchor_ratios,
            normalizations=SSDNet.default_params.normalizations,  # 正則化
            is_training=True,
            dropout_keep_prob=0.5,
            prediction_fn=slim.softmax,
            reuse=None,
            scope='ssd_300_vgg'):
    """SSD net definition.
    """
    # if data_format == 'NCHW':
    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))

    # End_points collect relevant activations for external use.
    end_points = {}  # 用於收集每一層輸出結果
    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
        # Original VGG-16 blocks.
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')  # VGG16網絡的第一個conv,重複2次卷積,核爲3x3,64個特徵
        end_points['block1'] = net  # conv1_2結果存入end_points,name='block1'
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        # Block 2.
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')  # 重複2次卷積,核爲3x3,128個特徵    #conv2d,tensorflow的函數
        end_points['block2'] = net  # conv2_2結果存入end_points,name='block2'
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        # Block 3.
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')  # 重複3次卷積,核爲3x3,256個特徵
        end_points['block3'] = net  # conv3_3結果存入end_points,name='block3'
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        # Block 4.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')  # 重複3次卷積,核爲3x3,512個特徵
        end_points['block4'] = net  # 在池化層之前,將conv4_3結果存入end_points,name='block4'
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        # Block 5.
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')  # 重複3次卷積,核爲3x3,512個特徵
        end_points['block5'] = net  # conv5_3結果存入end_points,name='block5'
        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')

        # Additional SSD blocks.                                                  #去掉了VGG的全連接層
        # Block 6: let's dilate the hell out of it!
        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')  # 將VGG基礎網絡最後的池化層結果做擴展卷積(帶孔卷積);
        end_points['block6'] = net  # conv6結果存入end_points,name='block6'
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)  # dropout層
        # Block 7: 1x1 conv. Because the fuck.
        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')  # 將dropout後的網絡做1x1卷積,輸出1024特徵,name='block7'
        end_points['block7'] = net
        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)  # 將卷積後的網絡繼續做dropout

        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
        end_point = 'block8'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')  # 對上述dropout的網絡做1x1卷積,然後做3x3卷積,,輸出512特徵圖,name=‘block8’
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block9'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')  # 對上述網絡做1x1卷積,然後做3x3卷積,輸出256特徵圖,name=‘block9’
            net = custom_layers.pad2d(net, pad=(1, 1))
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block10'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')  # 對上述網絡做1x1卷積,然後做3x3卷積,輸出256特徵圖,name=‘block10’
            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net
        end_point = 'block11'
        with tf.variable_scope(end_point):
            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')  # 對上述網絡做1x1卷積,然後做3x3卷積,輸出256特徵圖,name=‘block11’
            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
        end_points[end_point] = net

        # Prediction and localisations layers. #預測和定位
        predictions = []
        logits = []
        localisations = []
        for i, layer in enumerate(feat_layers):  # 遍歷特徵層
            with tf.variable_scope(layer + '_box'):  # 起個命名範圍
                p, l = ssd_multibox_layer(end_points[layer],  # 做多尺度大小box預測的特徵層,返回每個cell中每個先驗框預測的類別p和預測的位置l
                                          num_classes,  # 種類數
                                          anchor_sizes[i],  # 先驗框尺度(同一特徵圖上的先驗框尺度和長寬比一致)
                                          anchor_ratios[i],  # 先驗框長寬比
                                          normalizations[i])  # 每個特徵正則化信息,目前是隻對第一個特徵圖做歸一化操作;
                # 把每一層的預測收集
                predictions.append(prediction_fn(p))  # prediction_fn爲softmax,預測類別
        logits.append(p)  # 把每個cell每個先驗框預測的類別的概率值存在logits中
        localisations.append(l)  # 預測位置信息

    return predictions, localisations, logits, end_points  # 返回類別預測結果,位置預測結果,所屬某個類別的概率值,以及特徵層


ssd_net.default_image_size = 300

預測位置和分類:

def ssd_multibox_layer(inputs,  # 輸入特徵層
                       num_classes,  # 類別數
                       sizes,  # 參考先驗框的尺度
                       ratios=[1],  # 默認的先驗框長寬比爲1
                       normalization=-1,  # 默認不做正則化
                       bn_normalization=False):
    """Construct a multibox layer, return a class and localization predictions.
    """
    net = inputs
    if normalization > 0:  # 如果輸入整則化數,則進行L2正
        net = custom_layers.l2_normalization(net, scaling=True)  # 對通道所在維度進行正則化,隨後乘以gamma縮放係數
    # Number of anchors.
    num_anchors = len(sizes) + len(ratios)  # 每層特徵圖參考先驗框的個數[4,6,6,6,4,4]

    # Location.     #每個先驗框對應4個座標信息
    num_loc_pred = num_anchors * 4  # 特徵圖上每個單元預測的座標所需維度=錨點框數*4
    loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None,  # 通過對特徵圖進行3x3卷積得到位置信息和類別權重信息
                           scope='conv_loc')  # 該部分是定位信息,輸出維度爲[特徵圖h,特徵圖w,每個單元所有錨點框座標]
    loc_pred = custom_layers.channel_to_last(loc_pred)
    loc_pred = tf.reshape(loc_pred,  # 最後整個特徵圖所有錨點框預測目標位置 tensor爲[h*w*每個cell先驗框數,4]
                          tensor_shape(loc_pred, 4)[:-1] + [num_anchors, 4])
    # Class prediction.                                                #類別預測
    num_cls_pred = num_anchors * num_classes  # 特徵圖上每個單元預測的類別所需維度=錨點框數*種類數
    cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None,  # 該部分是類別信息,輸出維度爲[特徵圖h,特徵圖w,每個單元所有錨點框對應類別信息]
                           scope='conv_cls')
    cls_pred = custom_layers.channel_to_last(cls_pred)
    cls_pred = tf.reshape(cls_pred,
                          tensor_shape(cls_pred, 4)[:-1] + [num_anchors,
                                                            num_classes])  # 最後整個特徵圖所有錨點框預測類別 tensor爲[h*w*每個cell先驗框數,種類數]
    return cls_pred, loc_pred  # 返回預測得到的類別和box位置 tensor

參考博客:https://blog.csdn.net/c20081052/article/details/80391627

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章