使用Resnet_50_101_152訓練自己的數據集

前言:

前面兩節介紹了AlexNet和VGG-19模型的結構,以及具體的實現。正如前面講的兩者在結構上是相似的。但是接下來講的Resnet(殘差網絡)不僅在深度上取得巨大的進步,而且在架構上也與之前的網絡是不同的。殘差網絡的發明人是何凱明博士期間,在CVPR的文章《Deep Residual Learning for Image Recognition》中首次提出。值得注意的是他還是廣東省的高考狀元,兩次獲得ICCV 最佳論文獎。果然大佬都是用來膜拜的。言歸正傳咱們就看一下殘差網絡的架構以及最後的實現吧。

 

模型一: Resnet(殘差網絡)

 

沒有找到152的網絡的架構圖,湊合看一下50層的殘差網絡的結構吧。 看過上兩篇的就知道了,這個圖是爲了和最後的代碼進行對比的。

 

同樣首先是程序的主程序:

 

# -*- coding: utf-8 -*-
# @Time    : 2019/7/2 18:56
# @Author  : YYLin
# @Email   : [email protected]
# @File    : Resnet_50_101_152_Train.py
import Resnet_50_101_152
import tensorflow as tf
import os
import cv2
import numpy as np
from keras.utils import to_categorical

# 當加載 Resnet_152的時候 會發生GPU內存溢出 所以就是用CPU進行訓練
# 當使用 inception_V4 batch_sizei爲8的時候 就會出現內存溢出的問題 說明這個網絡還是比較複雜的
# os.environ['CUDA_VISIBLE_DEVICES'] = "-1"
# 定義一些模型中所需要的參數
batch_size = 32
img_high = 100
img_width = 100
Channel = 3
label = 9

resnet_type = 'resnet_v2_50'

# 定義輸入圖像的佔位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')
keep_prob = tf.placeholder("float")
is_train = tf.placeholder(tf.bool)

# 使用ResNet_50_101_152 需要在最後加上batch normal 所以需要使用 is_train
net = Resnet_50_101_152.resnet(inputs, resnet_type, is_train, label)
score = tf.squeeze(net, axis=(1, 2))
softmax_result = tf.nn.softmax(score)

# 定義損失函數 以及相對應的優化器
cross_entropy = -tf.reduce_sum(y*tf.log(softmax_result))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

# 顯示最後預測的結果
correct_prediction = tf.equal(tf.argmax(softmax_result, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


# 現在的我只需要加載圖像和對應的label即可 不需要加載text中的內容
def load_satetile_image(batch_size=128, dataset='train'):
    img_list = []
    label_list = []
    dir_counter = 0

    if dataset == 'train':
        path = '../Dataset/baidu/train_image/train'

        # 對路徑下的所有子文件夾中的所有jpg文件進行讀取並存入到一個list中
        for child_dir in os.listdir(path):
            child_path = os.path.join(path, child_dir)
            for dir_image in os.listdir(child_path):
                img = cv2.imread(os.path.join(child_path, dir_image))
                img = img/255.0
                img_list.append(img)
                label_list.append(dir_counter)

            dir_counter += 1
    else:
        path = '../Dataset/baidu/valid_image/valid'

        # 對路徑下的所有子文件夾中的所有jpg文件進行讀取並存入到一個list中
        for child_dir in os.listdir(path):
            child_path = os.path.join(path, child_dir)
            for dir_image in os.listdir(child_path):
                img = cv2.imread(os.path.join(child_path, dir_image))
                img = img / 255.0
                img_list.append(img)
                label_list.append(dir_counter)

            dir_counter += 1

    # 返回的img_list轉成了 np.array的格式
    X_train = np.array(img_list)
    Y_train = to_categorical(label_list, 9)

    # 加載數據的時候 重新排序
    # print('X_train.shape, Y_train.shape:', X_train.shape, Y_train.shape)
    data_index = np.arange(X_train.shape[0])
    np.random.shuffle(data_index)
    data_index = data_index[:batch_size]
    x_batch = X_train[data_index, :, :, :]
    y_batch = Y_train[data_index, :]

    return x_batch, y_batch


# 開始feed 數據並且訓練數據
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(500000//batch_size):
        # 加載訓練集和驗證集
        img, img_label = load_satetile_image(batch_size, dataset='train')
        img_valid, img_valid_label = load_satetile_image(batch_size, dataset='vaild')
        # print('使用 mnist.train.next_batch加載的數據集形狀', img.shape, type(img))

        # 源碼之中是增加了正則化項 但是損失函數中暫時不再增加
        if i % 20 == 0:
            train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label, is_train: True})
            print("step %d, training accuracy %g" % (i, train_accuracy))
        train_step.run(feed_dict={inputs: img, y: img_label, is_train: True})

        if i % 50 == 0:
            valid_socre = accuracy.eval(feed_dict={inputs: img_valid, y: img_valid_label, is_train: False})
            print("step %d, valid accuracy %g" % (i, valid_socre))

 

 

然後是本節的核心代碼: Resnet (殘差網絡)

 

本代碼中殘差網絡中的層數分別是50,101,152層。爲了方便分析,在這裏只是簡單的分析一下層數爲50層的殘差網絡的架構。

第一: 從殘差網絡的結構圖中,我們可以看到conv2到conv5中重複單元數分別是[3、 4、 6、3 ],代碼中unit單元爲resnet_v2_50的單元數爲也爲[3、 4、 6、3 ]

第二: 殘差網絡中第一層的卷積核大小爲7 * 7, 步長爲 2 * 2,通道數爲64   代碼中驗證通過

第三: 第一層卷積之後卷積核大小爲3 * 3, 步長是 2 * 2,最大池化層。   代碼中驗證通過

第四: 開始驗證重複部分,代碼中也即是對應重複部分的卷積操作。  這個是猜的不確定是否正確。科學有時候是需要猜的

        for i in range(4):
            net = block(net, 'block'+str(i+1), UNITS[resnet_v2][i],
                        CHANNELS[i], is_train)

 第五: 全局池化層然後softmax輸出,   代碼中增加了batch normalize 以及relu激活函數 驗證通過

所以說下面的代碼復現還是很忠於原論文的。所以總體上驗證通過。打完收工!!!!!!!!!!!!!!!

 

# -*- coding: utf-8 -*-
# @Time    : 2019/7/2 8:36
# @Author  : YYLin
# @Email   : [email protected]
# @File    : Resnet_50_101_152.py
# 本代碼實現的是殘差網絡 50 101 152  參考代碼中是有訓練集 驗證集 測試集
# 首先這個代碼的整體架構是讓人相信的
import tensorflow as tf
UNITS = {'resnet_v2_50': [3, 4, 6, 3], 'resnet_v2_101': [3, 4, 23, 3],
         'resnet_v2_152': [3, 8, 36, 3]}
CHANNELS = [64, 128, 256, 512]


def bottleneck(net, channel, is_train, holes=1, c_name='pretrain', stride=1,
               shortcut_conv=False, key=tf.GraphKeys.GLOBAL_VARIABLES):
    with tf.variable_scope('bottleneck_v2', reuse=tf.AUTO_REUSE):
        # define initializer for weights and biases
        w_initializer = tf.contrib.layers.xavier_initializer()
        b_initializer = tf.zeros_initializer()
        regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
        # batch normalization
        net = tf.layers.batch_normalization(inputs=net, axis=-1,
                                            training=is_train, name='preact')
        net = tf.nn.relu(net)

        # shortcut
        if shortcut_conv:
            with tf.variable_scope('shortcut', reuse=tf.AUTO_REUSE):
                kernel = tf.get_variable(initializer=w_initializer,
                                         shape=[1, 1, net.shape[-1],
                                                channel*4],
                                         name='weights',
                                         regularizer=regularizer,
                                         collections=['pretrain', key])
                # convolution for shortcut in order to output size
                shortcut = tf.nn.conv2d(input=net, filter=kernel,
                                        strides=[1, stride, stride, 1],
                                        padding='SAME')
                biases = tf.get_variable(initializer=b_initializer,
                                         shape=channel*4, name='biases',
                                         regularizer=regularizer,
                                         collections=['pretrain', key])
                shortcut = tf.nn.bias_add(shortcut, biases)
        else:
            # shortcut
            shortcut = net

        # convolution 1
        with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
            kernel = tf.get_variable(initializer=w_initializer,
                                     shape=[1, 1, net.shape[-1], channel],
                                     name='weights', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.atrous_conv2d(value=net, filters=kernel, rate=holes,
                                      padding='SAME')
            biases = tf.get_variable(initializer=b_initializer,
                                     shape=channel, name='biases',
                                     regularizer=regularizer,
                                     collections=['non_pretrain', key])
            net = tf.nn.bias_add(net, biases)
            # batch normalization
            net = tf.layers.batch_normalization(inputs=net, axis=-1,
                                                training=is_train,
                                                name='preact')
            net = tf.nn.relu(net)

        # convolution 2
        with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE):
            kernel = tf.get_variable(initializer=w_initializer,
                                     shape=[3, 3, channel, channel],
                                     name='weights', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.conv2d(input=net, filter=kernel,
                               strides=[1, stride, stride, 1], padding='SAME')
            biases = tf.get_variable(initializer=b_initializer,
                                     shape=channel, name='biases',
                                     regularizer=regularizer,
                                     collections=['non_pretrain', key])
            net = tf.nn.bias_add(net, biases)
            # batch normalization
            net = tf.layers.batch_normalization(inputs=net, axis=-1,
                                                training=is_train,
                                                name='preact')
            net = tf.nn.relu(net)

        # convolution 3
        with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE):
            kernel = tf.get_variable(initializer=w_initializer,
                                     shape=[1, 1, channel, channel*4],
                                     name='weights', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.atrous_conv2d(value=net, filters=kernel, rate=holes,
                                      padding='SAME')
            biases = tf.get_variable(initializer=b_initializer,
                                     shape=channel*4, name='biases',
                                     regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.bias_add(net, biases)

    return net, shortcut


def block(net, name, unit, channel, is_train):
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        for i in range(unit):
            with tf.variable_scope('unit_'+str(i+1), reuse=tf.AUTO_REUSE):
                # block1 i=0 stride=1
                if i == 0:
                    if name != 'block1':
                        net, shortcut = bottleneck(net, channel, is_train,
                                                   stride=2,
                                                   shortcut_conv=True)
                    else:
                        net, shortcut = bottleneck(net, channel, is_train,
                                                   stride=1,
                                                   shortcut_conv=True)
                else:
                    net, shortcut = bottleneck(net, channel, is_train)
            net = tf.add(net, shortcut)

    return net


def resnet(input_, resnet_v2, is_train, classes):
    key = tf.GraphKeys.GLOBAL_VARIABLES
    with tf.variable_scope(resnet_v2, reuse=tf.AUTO_REUSE):
        # define initializer for weights and biases
        w_initializer = tf.contrib.layers.xavier_initializer()
        b_initializer = tf.zeros_initializer()
        regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
        # convolution 1
        with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
            kernel = tf.get_variable(initializer=w_initializer,
                                     shape=[7, 7, 3, 64],
                                     name='weights', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.conv2d(input=input_, filter=kernel,
                               strides=[1, 2, 2, 1], padding='SAME')
            biases = tf.get_variable(initializer=b_initializer, shape=64,
                                     name='biases', regularizer=regularizer,
                                     collections=['pretrain', key])
            net = tf.nn.bias_add(net, biases)
            net = tf.nn.max_pool(value=net, ksize=[1, 3, 3, 1],
                                 strides=[1, 2, 2, 1], padding='SAME')

        for i in range(4):
            net = block(net, 'block'+str(i+1), UNITS[resnet_v2][i],
                        CHANNELS[i], is_train)

        net = tf.layers.batch_normalization(inputs=net, axis=-1,
                                            training=is_train, name='postnorm')
        net = tf.nn.relu(net)

        h, w = net.shape[1:3]
        net = tf.nn.avg_pool(value=net, ksize=[1, h, w, 1],
                             strides=[1, 1, 1, 1], padding='VALID')

    # logits is not in scope 'resnet_v2' in order to fine-tune
    with tf.variable_scope('logits', reuse=tf.AUTO_REUSE):
        kernel = tf.get_variable(initializer=w_initializer,
                                 shape=[1, 1, 2048, classes], name='weights',
                                 regularizer=regularizer,
                                 collections=['non_pretrain', key])
        net = tf.nn.conv2d(input=net, filter=kernel,
                           strides=[1, 1, 1, 1], padding='VALID')
        biases = tf.get_variable(initializer=b_initializer, shape=classes,
                                 name='biases', regularizer=regularizer,
                                 collections=['non_pretrain', key])
        net = tf.nn.bias_add(net, biases)
    return net


 

 

最後實驗結果分析: 

巡行的是在太慢了,真心不想等了,模型轉的是在太慢了,等下一個專欄我專門介紹百度點石這個比賽的時候,在奉獻上完成的訓練結果。

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章