前言:
上一節介紹的圖像識別中一個經典的模型AlexNet,今天介紹的是圖像識別領域另一個經典的模型VGG-19。VGG-19是由牛津大學的Oxford Visual Geometry Group實驗室發明的。因爲不像是AlexNet是由Alex一個人完成的。所以這個模型就按照實驗室的名稱的縮寫命名。VGG-19和AlexNet的整體架構是相似的,只是在AlexNet進行了一些改進,具體的有。
第一: VGG16相比AlexNet的一個改進是採用連續的幾個3x3的卷積核代替AlexNet中的較大卷積核(11x11,7x7,5x5)
第二: VGGNet的結構非常簡潔,整個網絡都使用了同樣大小的卷積核尺寸(3x3)和最大池化尺寸(2x2)
VGG-19的架構圖:
首先讓我們看一下VGG的發展歷程,第三行表示VGG不同版本的卷積層數,從11層到13再到16最後達到19層。
首先同樣是本程序的主程序:
和上一節的AlexNet幾乎一毛一樣。所以只把代碼公佈一下,就不做解釋了。
# -*- coding: utf-8 -*-
# @Time : 2019/7/2 16:07
# @Author : YYLin
# @Email : [email protected]
# @File : VGG_19_Train.py
# 定義一些模型中所需要的參數
from VGG_19 import VGG19
import tensorflow as tf
import os
import cv2
import numpy as np
from keras.utils import to_categorical
batch_size = 64
img_high = 100
img_width = 100
Channel = 3
label = 9
# 定義輸入圖像的佔位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')
keep_prob = tf.placeholder("float")
is_train = tf.placeholder(tf.bool)
model = VGG19(inputs, keep_prob, label)
score = model.fc8
softmax_result = tf.nn.softmax(score)
# 定義損失函數 以及相對應的優化器
cross_entropy = -tf.reduce_sum(y*tf.log(softmax_result))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# 顯示最後預測的結果
correct_prediction = tf.equal(tf.argmax(softmax_result, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# 現在的我只需要加載圖像和對應的label即可 不需要加載text中的內容
def load_satetile_image(batch_size=128, dataset='train'):
img_list = []
label_list = []
dir_counter = 0
if dataset == 'train':
path = '../Dataset/baidu/train_image/train'
# 對路徑下的所有子文件夾中的所有jpg文件進行讀取並存入到一個list中
for child_dir in os.listdir(path):
child_path = os.path.join(path, child_dir)
for dir_image in os.listdir(child_path):
img = cv2.imread(os.path.join(child_path, dir_image))
img = img / 255.0
img_list.append(img)
label_list.append(dir_counter)
dir_counter += 1
else:
path = '../Dataset/baidu/valid_image/valid'
# 對路徑下的所有子文件夾中的所有jpg文件進行讀取並存入到一個list中
for child_dir in os.listdir(path):
child_path = os.path.join(path, child_dir)
for dir_image in os.listdir(child_path):
img = cv2.imread(os.path.join(child_path, dir_image))
img = img / 255.0
img_list.append(img)
label_list.append(dir_counter)
dir_counter += 1
# 返回的img_list轉成了 np.array的格式
X_train = np.array(img_list)
Y_train = to_categorical(label_list, 9)
# print('to_categorical之後Y_train的類型和形狀:', type(Y_train), Y_train.shape)
# 加載數據的時候 重新排序
data_index = np.arange(X_train.shape[0])
np.random.shuffle(data_index)
data_index = data_index[:batch_size]
x_batch = X_train[data_index, :, :, :]
y_batch = Y_train[data_index, :]
return x_batch, y_batch
# 開始feed 數據並且訓練數據
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(500000//batch_size):
# 加載訓練集和驗證集
img, img_label = load_satetile_image(batch_size, dataset='train')
img_valid, img_valid_label = load_satetile_image(batch_size, dataset='vaild')
# print('使用 mnist.train.next_batch加載的數據集形狀', img.shape, type(img))
# print('模型使用的是dropout的模型')
dropout_rate = 0.5
# print('經過 tf.reshape之後數據的形狀以及類型是:', img.shape, type(img))
if i % 20 == 0:
train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label, keep_prob: dropout_rate})
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run(feed_dict={inputs: img, y: img_label, keep_prob: dropout_rate})
# 輸出驗證集上的結果
if i % 50 == 0:
dropout_rate = 1
valid_socre = accuracy.eval(feed_dict={inputs: img_valid, y: img_valid_label, keep_prob: dropout_rate})
print("step %d, valid accuracy %g" % (i, valid_socre))
本節的核心代碼 VGG-19:
從圖中我們可以看到VGG-19有16個卷積層,卷積層的通道數分別是64、128、256、512。最後有三個全連接層通道數分別是4096,4096,1000。
第一: VGG-19所有的卷積核大小都是 3 * 3, 步長爲1 * 1。 代碼中滿足要求
第二: VGG-19所有最大池化層的卷積核大小爲2 * 2, 步長爲1 * 1 代碼中滿足要求
第三: 根據上圖查看一下每層卷積操作的通道數是否與代碼對應 顯然代碼滿足要求。
第四: 在第一節的時候我們向模型中增加一些優化技巧,我們發現使用batch normalize的話,能夠極大的提高模型的準確率。但是VGG-19中並沒有增加。 嘗試增加batch normalize。而且也沒有使用一些激活函數,所以說這個模型可以嘗試的優化方案還是很多的。
# -*- coding: utf-8 -*-
# @Time : 2019/7/2 8:18
# @Author : YYLin
# @Email : [email protected]
# @File : VGG_19.py
# 本模型爲VGG-19參考代碼鏈接
import tensorflow as tf
def maxPoolLayer(x, kHeight, kWidth, strideX, strideY, name, padding="SAME"):
return tf.nn.max_pool(x, ksize=[1, kHeight, kWidth, 1],
strides=[1, strideX, strideY, 1], padding=padding, name=name)
def dropout(x, keepPro, name=None):
return tf.nn.dropout(x, keepPro, name)
def fcLayer(x, inputD, outputD, reluFlag, name):
with tf.variable_scope(name) as scope:
w = tf.get_variable("w", shape=[inputD, outputD], dtype="float")
b = tf.get_variable("b", [outputD], dtype="float")
out = tf.nn.xw_plus_b(x, w, b, name=scope.name)
if reluFlag:
return tf.nn.relu(out)
else:
return out
def convLayer(x, kHeight, kWidth, strideX, strideY, featureNum, name, padding = "SAME"):
channel = int(x.get_shape()[-1])
with tf.variable_scope(name) as scope:
w = tf.get_variable("w", shape=[kHeight, kWidth, channel, featureNum])
b = tf.get_variable("b", shape=[featureNum])
featureMap = tf.nn.conv2d(x, w, strides=[1, strideY, strideX, 1], padding=padding)
out = tf.nn.bias_add(featureMap, b)
return tf.nn.relu(tf.reshape(out, featureMap.get_shape().as_list()), name=scope.name)
class VGG19(object):
def __init__(self, x, keepPro, classNum):
self.X = x
self.KEEPPRO = keepPro
self.CLASSNUM = classNum
self.begin_VGG_19()
def begin_VGG_19(self):
"""build model"""
conv1_1 = convLayer(self.X, 3, 3, 1, 1, 64, "conv1_1" )
conv1_2 = convLayer(conv1_1, 3, 3, 1, 1, 64, "conv1_2")
pool1 = maxPoolLayer(conv1_2, 2, 2, 2, 2, "pool1")
conv2_1 = convLayer(pool1, 3, 3, 1, 1, 128, "conv2_1")
conv2_2 = convLayer(conv2_1, 3, 3, 1, 1, 128, "conv2_2")
pool2 = maxPoolLayer(conv2_2, 2, 2, 2, 2, "pool2")
conv3_1 = convLayer(pool2, 3, 3, 1, 1, 256, "conv3_1")
conv3_2 = convLayer(conv3_1, 3, 3, 1, 1, 256, "conv3_2")
conv3_3 = convLayer(conv3_2, 3, 3, 1, 1, 256, "conv3_3")
conv3_4 = convLayer(conv3_3, 3, 3, 1, 1, 256, "conv3_4")
pool3 = maxPoolLayer(conv3_4, 2, 2, 2, 2, "pool3")
conv4_1 = convLayer(pool3, 3, 3, 1, 1, 512, "conv4_1")
conv4_2 = convLayer(conv4_1, 3, 3, 1, 1, 512, "conv4_2")
conv4_3 = convLayer(conv4_2, 3, 3, 1, 1, 512, "conv4_3")
conv4_4 = convLayer(conv4_3, 3, 3, 1, 1, 512, "conv4_4")
pool4 = maxPoolLayer(conv4_4, 2, 2, 2, 2, "pool4")
conv5_1 = convLayer(pool4, 3, 3, 1, 1, 512, "conv5_1")
conv5_2 = convLayer(conv5_1, 3, 3, 1, 1, 512, "conv5_2")
conv5_3 = convLayer(conv5_2, 3, 3, 1, 1, 512, "conv5_3")
conv5_4 = convLayer(conv5_3, 3, 3, 1, 1, 512, "conv5_4")
pool5 = maxPoolLayer(conv5_4, 2, 2, 2, 2, "pool5")
print('最後一層卷積層的形狀是:', pool5.shape)
fcIn = tf.reshape(pool5, [-1, 4*4*512])
fc6 = fcLayer(fcIn, 4*4*512, 4096, True, "fc6")
dropout1 = dropout(fc6, self.KEEPPRO)
fc7 = fcLayer(dropout1, 4096, 4096, True, "fc7")
dropout2 = dropout(fc7, self.KEEPPRO)
self.fc8 = fcLayer(dropout2, 4096, self.CLASSNUM, True, "fc8")
VGG-19增加batch normalize: 親測是可以使用的,但是需要將batch_size修改成32不然GPU顯存溢出
# -*- coding: utf-8 -*-
# @Time : 2019/7/2 16:57
# @Author : YYLin
# @Email : [email protected]
# @File : VGG_19_BN.py
import tensorflow as tf
# 相對於第一個版本 增加的批量正則化 2019 7 2
def bn(x, is_training):
return tf.layers.batch_normalization(x, training=is_training)
def maxPoolLayer(x, kHeight, kWidth, strideX, strideY, name, padding="SAME"):
return tf.nn.max_pool(x, ksize=[1, kHeight, kWidth, 1],
strides=[1, strideX, strideY, 1], padding=padding, name=name)
def dropout(x, keepPro, name=None):
return tf.nn.dropout(x, keepPro, name)
def fcLayer(x, inputD, outputD, reluFlag, name):
with tf.variable_scope(name) as scope:
w = tf.get_variable("w", shape=[inputD, outputD], dtype="float")
b = tf.get_variable("b", [outputD], dtype="float")
out = tf.nn.xw_plus_b(x, w, b, name=scope.name)
if reluFlag:
return tf.nn.relu(out)
else:
return out
def convLayer(x, kHeight, kWidth, strideX, strideY, featureNum, name, padding = "SAME"):
channel = int(x.get_shape()[-1])
with tf.variable_scope(name) as scope:
w = tf.get_variable("w", shape=[kHeight, kWidth, channel, featureNum])
b = tf.get_variable("b", shape=[featureNum])
featureMap = tf.nn.conv2d(x, w, strides=[1, strideY, strideX, 1], padding=padding)
out = tf.nn.bias_add(featureMap, b)
return tf.nn.relu(tf.reshape(out, featureMap.get_shape().as_list()), name=scope.name)
class VGG19(object):
def __init__(self, x, keepPro, classNum, is_training):
self.X = x
self.KEEPPRO = keepPro
self.CLASSNUM = classNum
self.is_training = is_training
self.begin_VGG_19()
def begin_VGG_19(self):
"""build model"""
conv1_1 = convLayer(self.X, 3, 3, 1, 1, 64, "conv1_1" )
conv1_1 = bn(conv1_1, self.is_training)
conv1_2 = convLayer(conv1_1, 3, 3, 1, 1, 64, "conv1_2")
conv1_2 = bn(conv1_2, self.is_training)
pool1 = maxPoolLayer(conv1_2, 2, 2, 2, 2, "pool1")
conv2_1 = convLayer(pool1, 3, 3, 1, 1, 128, "conv2_1")
conv2_1 = bn(conv2_1, self.is_training)
conv2_2 = convLayer(conv2_1, 3, 3, 1, 1, 128, "conv2_2")
conv2_2 = bn(conv2_2, self.is_training)
pool2 = maxPoolLayer(conv2_2, 2, 2, 2, 2, "pool2")
conv3_1 = convLayer(pool2, 3, 3, 1, 1, 256, "conv3_1")
conv3_1 = bn(conv3_1, self.is_training)
conv3_2 = convLayer(conv3_1, 3, 3, 1, 1, 256, "conv3_2")
conv3_2 = bn(conv3_2, self.is_training)
conv3_3 = convLayer(conv3_2, 3, 3, 1, 1, 256, "conv3_3")
conv3_3 = bn(conv3_3, self.is_training)
conv3_4 = convLayer(conv3_3, 3, 3, 1, 1, 256, "conv3_4")
conv3_4 = bn(conv3_4, self.is_training)
pool3 = maxPoolLayer(conv3_4, 2, 2, 2, 2, "pool3")
conv4_1 = convLayer(pool3, 3, 3, 1, 1, 512, "conv4_1")
conv4_1 = bn(conv4_1, self.is_training)
conv4_2 = convLayer(conv4_1, 3, 3, 1, 1, 512, "conv4_2")
conv4_2 = bn(conv4_2, self.is_training)
conv4_3 = convLayer(conv4_2, 3, 3, 1, 1, 512, "conv4_3")
conv4_3 = bn(conv4_3, self.is_training)
conv4_4 = convLayer(conv4_3, 3, 3, 1, 1, 512, "conv4_4")
conv4_4 = bn(conv4_4, self.is_training)
pool4 = maxPoolLayer(conv4_4, 2, 2, 2, 2, "pool4")
conv5_1 = convLayer(pool4, 3, 3, 1, 1, 512, "conv5_1")
conv5_1 = bn(conv5_1, self.is_training)
conv5_2 = convLayer(conv5_1, 3, 3, 1, 1, 512, "conv5_2")
conv5_2 = bn(conv5_2, self.is_training)
conv5_3 = convLayer(conv5_2, 3, 3, 1, 1, 512, "conv5_3")
conv5_3 = bn(conv5_3, self.is_training)
conv5_4 = convLayer(conv5_3, 3, 3, 1, 1, 512, "conv5_4")
conv5_4 = bn(conv5_4, self.is_training)
pool5 = maxPoolLayer(conv5_4, 2, 2, 2, 2, "pool5")
print('最後一層卷積層的形狀是:', pool5.shape)
fcIn = tf.reshape(pool5, [-1, 4*4*512])
fc6 = fcLayer(fcIn, 4*4*512, 4096, True, "fc6")
dropout1 = dropout(fc6, self.KEEPPRO)
fc7 = fcLayer(dropout1, 4096, 4096, True, "fc7")
dropout2 = dropout(fc7, self.KEEPPRO)
self.fc8 = fcLayer(dropout2, 4096, self.CLASSNUM, True, "fc8")
VGG-19模型運行的結果分析:
VGG-19 增加BN之後的結果分析: