前言:
前面兩節介紹了AlexNet和VGG-19模型的結構,以及具體的實現。正如前面講的兩者在結構上是相似的。但是接下來講的Resnet(殘差網絡)不僅在深度上取得巨大的進步,而且在架構上也與之前的網絡是不同的。殘差網絡的發明人是何凱明博士期間,在CVPR的文章《Deep Residual Learning for Image Recognition》中首次提出。值得注意的是他還是廣東省的高考狀元,兩次獲得ICCV 最佳論文獎。果然大佬都是用來膜拜的。言歸正傳咱們就看一下殘差網絡的架構以及最後的實現吧。
模型一: Resnet(殘差網絡)
沒有找到152的網絡的架構圖,湊合看一下50層的殘差網絡的結構吧。 看過上兩篇的就知道了,這個圖是爲了和最後的代碼進行對比的。
同樣首先是程序的主程序:
# -*- coding: utf-8 -*-
# @Time : 2019/7/2 18:56
# @Author : YYLin
# @Email : [email protected]
# @File : Resnet_50_101_152_Train.py
import Resnet_50_101_152
import tensorflow as tf
import os
import cv2
import numpy as np
from keras.utils import to_categorical
# 當加載 Resnet_152的時候 會發生GPU內存溢出 所以就是用CPU進行訓練
# 當使用 inception_V4 batch_sizei爲8的時候 就會出現內存溢出的問題 說明這個網絡還是比較複雜的
# os.environ['CUDA_VISIBLE_DEVICES'] = "-1"
# 定義一些模型中所需要的參數
batch_size = 32
img_high = 100
img_width = 100
Channel = 3
label = 9
resnet_type = 'resnet_v2_50'
# 定義輸入圖像的佔位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')
keep_prob = tf.placeholder("float")
is_train = tf.placeholder(tf.bool)
# 使用ResNet_50_101_152 需要在最後加上batch normal 所以需要使用 is_train
net = Resnet_50_101_152.resnet(inputs, resnet_type, is_train, label)
score = tf.squeeze(net, axis=(1, 2))
softmax_result = tf.nn.softmax(score)
# 定義損失函數 以及相對應的優化器
cross_entropy = -tf.reduce_sum(y*tf.log(softmax_result))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# 顯示最後預測的結果
correct_prediction = tf.equal(tf.argmax(softmax_result, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# 現在的我只需要加載圖像和對應的label即可 不需要加載text中的內容
def load_satetile_image(batch_size=128, dataset='train'):
img_list = []
label_list = []
dir_counter = 0
if dataset == 'train':
path = '../Dataset/baidu/train_image/train'
# 對路徑下的所有子文件夾中的所有jpg文件進行讀取並存入到一個list中
for child_dir in os.listdir(path):
child_path = os.path.join(path, child_dir)
for dir_image in os.listdir(child_path):
img = cv2.imread(os.path.join(child_path, dir_image))
img = img/255.0
img_list.append(img)
label_list.append(dir_counter)
dir_counter += 1
else:
path = '../Dataset/baidu/valid_image/valid'
# 對路徑下的所有子文件夾中的所有jpg文件進行讀取並存入到一個list中
for child_dir in os.listdir(path):
child_path = os.path.join(path, child_dir)
for dir_image in os.listdir(child_path):
img = cv2.imread(os.path.join(child_path, dir_image))
img = img / 255.0
img_list.append(img)
label_list.append(dir_counter)
dir_counter += 1
# 返回的img_list轉成了 np.array的格式
X_train = np.array(img_list)
Y_train = to_categorical(label_list, 9)
# 加載數據的時候 重新排序
# print('X_train.shape, Y_train.shape:', X_train.shape, Y_train.shape)
data_index = np.arange(X_train.shape[0])
np.random.shuffle(data_index)
data_index = data_index[:batch_size]
x_batch = X_train[data_index, :, :, :]
y_batch = Y_train[data_index, :]
return x_batch, y_batch
# 開始feed 數據並且訓練數據
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(500000//batch_size):
# 加載訓練集和驗證集
img, img_label = load_satetile_image(batch_size, dataset='train')
img_valid, img_valid_label = load_satetile_image(batch_size, dataset='vaild')
# print('使用 mnist.train.next_batch加載的數據集形狀', img.shape, type(img))
# 源碼之中是增加了正則化項 但是損失函數中暫時不再增加
if i % 20 == 0:
train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label, is_train: True})
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run(feed_dict={inputs: img, y: img_label, is_train: True})
if i % 50 == 0:
valid_socre = accuracy.eval(feed_dict={inputs: img_valid, y: img_valid_label, is_train: False})
print("step %d, valid accuracy %g" % (i, valid_socre))
然後是本節的核心代碼: Resnet (殘差網絡)
本代碼中殘差網絡中的層數分別是50,101,152層。爲了方便分析,在這裏只是簡單的分析一下層數爲50層的殘差網絡的架構。
第一: 從殘差網絡的結構圖中,我們可以看到conv2到conv5中重複單元數分別是[3、 4、 6、3 ],代碼中unit單元爲resnet_v2_50的單元數爲也爲[3、 4、 6、3 ]
第二: 殘差網絡中第一層的卷積核大小爲7 * 7, 步長爲 2 * 2,通道數爲64 代碼中驗證通過
第三: 第一層卷積之後卷積核大小爲3 * 3, 步長是 2 * 2,最大池化層。 代碼中驗證通過
第四: 開始驗證重複部分,代碼中也即是對應重複部分的卷積操作。 這個是猜的不確定是否正確。科學有時候是需要猜的
for i in range(4):
net = block(net, 'block'+str(i+1), UNITS[resnet_v2][i],
CHANNELS[i], is_train)
第五: 全局池化層然後softmax輸出, 代碼中增加了batch normalize 以及relu激活函數 驗證通過
所以說下面的代碼復現還是很忠於原論文的。所以總體上驗證通過。打完收工!!!!!!!!!!!!!!!
# -*- coding: utf-8 -*-
# @Time : 2019/7/2 8:36
# @Author : YYLin
# @Email : [email protected]
# @File : Resnet_50_101_152.py
# 本代碼實現的是殘差網絡 50 101 152 參考代碼中是有訓練集 驗證集 測試集
# 首先這個代碼的整體架構是讓人相信的
import tensorflow as tf
UNITS = {'resnet_v2_50': [3, 4, 6, 3], 'resnet_v2_101': [3, 4, 23, 3],
'resnet_v2_152': [3, 8, 36, 3]}
CHANNELS = [64, 128, 256, 512]
def bottleneck(net, channel, is_train, holes=1, c_name='pretrain', stride=1,
shortcut_conv=False, key=tf.GraphKeys.GLOBAL_VARIABLES):
with tf.variable_scope('bottleneck_v2', reuse=tf.AUTO_REUSE):
# define initializer for weights and biases
w_initializer = tf.contrib.layers.xavier_initializer()
b_initializer = tf.zeros_initializer()
regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
# batch normalization
net = tf.layers.batch_normalization(inputs=net, axis=-1,
training=is_train, name='preact')
net = tf.nn.relu(net)
# shortcut
if shortcut_conv:
with tf.variable_scope('shortcut', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[1, 1, net.shape[-1],
channel*4],
name='weights',
regularizer=regularizer,
collections=['pretrain', key])
# convolution for shortcut in order to output size
shortcut = tf.nn.conv2d(input=net, filter=kernel,
strides=[1, stride, stride, 1],
padding='SAME')
biases = tf.get_variable(initializer=b_initializer,
shape=channel*4, name='biases',
regularizer=regularizer,
collections=['pretrain', key])
shortcut = tf.nn.bias_add(shortcut, biases)
else:
# shortcut
shortcut = net
# convolution 1
with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[1, 1, net.shape[-1], channel],
name='weights', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.atrous_conv2d(value=net, filters=kernel, rate=holes,
padding='SAME')
biases = tf.get_variable(initializer=b_initializer,
shape=channel, name='biases',
regularizer=regularizer,
collections=['non_pretrain', key])
net = tf.nn.bias_add(net, biases)
# batch normalization
net = tf.layers.batch_normalization(inputs=net, axis=-1,
training=is_train,
name='preact')
net = tf.nn.relu(net)
# convolution 2
with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[3, 3, channel, channel],
name='weights', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.conv2d(input=net, filter=kernel,
strides=[1, stride, stride, 1], padding='SAME')
biases = tf.get_variable(initializer=b_initializer,
shape=channel, name='biases',
regularizer=regularizer,
collections=['non_pretrain', key])
net = tf.nn.bias_add(net, biases)
# batch normalization
net = tf.layers.batch_normalization(inputs=net, axis=-1,
training=is_train,
name='preact')
net = tf.nn.relu(net)
# convolution 3
with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[1, 1, channel, channel*4],
name='weights', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.atrous_conv2d(value=net, filters=kernel, rate=holes,
padding='SAME')
biases = tf.get_variable(initializer=b_initializer,
shape=channel*4, name='biases',
regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.bias_add(net, biases)
return net, shortcut
def block(net, name, unit, channel, is_train):
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
for i in range(unit):
with tf.variable_scope('unit_'+str(i+1), reuse=tf.AUTO_REUSE):
# block1 i=0 stride=1
if i == 0:
if name != 'block1':
net, shortcut = bottleneck(net, channel, is_train,
stride=2,
shortcut_conv=True)
else:
net, shortcut = bottleneck(net, channel, is_train,
stride=1,
shortcut_conv=True)
else:
net, shortcut = bottleneck(net, channel, is_train)
net = tf.add(net, shortcut)
return net
def resnet(input_, resnet_v2, is_train, classes):
key = tf.GraphKeys.GLOBAL_VARIABLES
with tf.variable_scope(resnet_v2, reuse=tf.AUTO_REUSE):
# define initializer for weights and biases
w_initializer = tf.contrib.layers.xavier_initializer()
b_initializer = tf.zeros_initializer()
regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
# convolution 1
with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[7, 7, 3, 64],
name='weights', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.conv2d(input=input_, filter=kernel,
strides=[1, 2, 2, 1], padding='SAME')
biases = tf.get_variable(initializer=b_initializer, shape=64,
name='biases', regularizer=regularizer,
collections=['pretrain', key])
net = tf.nn.bias_add(net, biases)
net = tf.nn.max_pool(value=net, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME')
for i in range(4):
net = block(net, 'block'+str(i+1), UNITS[resnet_v2][i],
CHANNELS[i], is_train)
net = tf.layers.batch_normalization(inputs=net, axis=-1,
training=is_train, name='postnorm')
net = tf.nn.relu(net)
h, w = net.shape[1:3]
net = tf.nn.avg_pool(value=net, ksize=[1, h, w, 1],
strides=[1, 1, 1, 1], padding='VALID')
# logits is not in scope 'resnet_v2' in order to fine-tune
with tf.variable_scope('logits', reuse=tf.AUTO_REUSE):
kernel = tf.get_variable(initializer=w_initializer,
shape=[1, 1, 2048, classes], name='weights',
regularizer=regularizer,
collections=['non_pretrain', key])
net = tf.nn.conv2d(input=net, filter=kernel,
strides=[1, 1, 1, 1], padding='VALID')
biases = tf.get_variable(initializer=b_initializer, shape=classes,
name='biases', regularizer=regularizer,
collections=['non_pretrain', key])
net = tf.nn.bias_add(net, biases)
return net
最後實驗結果分析:
巡行的是在太慢了,真心不想等了,模型轉的是在太慢了,等下一個專欄我專門介紹百度點石這個比賽的時候,在奉獻上完成的訓練結果。