使用了VGG19的模型遷移到貓狗識別中,並且在最後添加了兩層FC全連接層用於分類。
並且網絡中添加了學習率衰減以及平均滑動模型
其中train_image(2w5張圖片)存放訓練樣本 , test1(256張圖片)存放測試樣本
train_image:
test1:
VGG模型:
需要自己下載
VGG遷移學習_貓狗識別.py
VGG_PAT需要修改成自己本地保存VGG模型的地址
#遷移學習貓狗識別
#首先下載vgg19網絡的mat文件,然後對數據進行預處理
#直接送到了我們預先展開的vgg網絡中,注意這裏權重是constant,直接把別人訓練好的權重拿來初始化,然後最後改一下全連接層
import tensorflow as tf
import numpy as np
import get_files
import get_batch
import VGG_net
import model
import os
tf.app.flags.DEFINE_integer('image_size', 224, '圖片尺寸')
tf.app.flags.DEFINE_integer('batch_size', 32, '每次訓練圖片的張數')
tf.app.flags.DEFINE_integer('capacity', 256, '隊列中最多容納元素的個數')
tf.app.flags.DEFINE_float('learning_rate_base', 0.0001, '基礎的學習率用於指數衰減的學習率中')
tf.app.flags.DEFINE_float('learning_rate_decay', 0.99, '學習率的衰減率')
tf.app.flags.DEFINE_float('moving_average_decay', 0.99, '滑動平均的衰減率')
tf.app.flags.DEFINE_integer('training_steps', 6000, '訓練的輪數')
tf.app.flags.DEFINE_integer('n_class', 2, '類別數目')
tf.app.flags.DEFINE_integer('all_number', 25000, '訓練樣本總數')
tf.app.flags.DEFINE_string('train_dir', './train_image', '數據存放地址')
tf.app.flags.DEFINE_string('logs_train_dir', './logs_train_dir/', '訓練集輸出日誌保存的路徑')
tf.app.flags.DEFINE_string('save_dir', './save/', '模型保存的路徑')
tf.app.flags.DEFINE_string('VGG_PATH', '../FCN.tensorflow-master/Model_zoo/imagenet-vgg-verydeep-19.mat', 'VGG網絡參數')
FLAGS = tf.app.flags.FLAGS
def main(argv=None):
print('獲取圖片和標籤集中')
train, train_label = get_files.get_files(FLAGS.train_dir)
print('生成批次中')
train_batch, train_label_batch =get_batch.get_batch(train,train_label,FLAGS.image_size,FLAGS.image_size,FLAGS.batch_size,FLAGS.capacity)
print('train_batch',train_batch.shape)
nets=VGG_net.net(FLAGS.VGG_PATH,train_batch)#進入VGG模型,傳入權重參數和預測圖像,獲得所有層輸出結果
#修改VGG網絡,最後層添加兩個全連接層
with tf.variable_scope("dense1"):
image=tf.reshape(nets["relu5_4"],[FLAGS.batch_size,-1])#相當於在微調網絡模型,從relu5_4開始調整
weights=tf.Variable(tf.random_normal(shape=[14*14*512,10],stddev=0.1))
bias=tf.Variable(tf.zeros(shape=[10])+0.1)
dense1=tf.nn.tanh(tf.matmul(image,weights)+bias)
with tf.variable_scope("out"):
weights=tf.Variable(tf.random_normal(shape=[10,FLAGS.n_class],stddev=0.1))
bias=tf.Variable(tf.zeros(shape=[FLAGS.n_class])+0.1)
out=tf.matmul(dense1,weights)+bias
loss=model.loss(logits=out,labels=train_label_batch)
op=model.train(learning_rate_base=FLAGS.learning_rate_base,loss=loss,
learning_rate_decay=FLAGS.learning_rate_decay,all_number=FLAGS.all_number,batch_size=FLAGS.batch_size)
train_end = model.moving(moving_average_decay=FLAGS.moving_average_decay, train_step=op) # 滑動模型最終的梯度下降值
accuracy=model.accuracy(out=out,train_label_batch=train_label_batch)
saver = tf.train.Saver() # 初始化持久化類
summary_op = tf.summary.merge_all() # 合併所有日誌
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())#變量初始化
train_writer = tf.summary.FileWriter(FLAGS.logs_train_dir, sess.graph) # 訓練集日記保存
coord = tf.train.Coordinator()#線程
threads = tf.train.start_queue_runners(sess=sess, coord=coord)#線程
try:
for step in range(FLAGS.training_steps):#循環100次
summary,_,tra_loss, tra_acc = sess.run([summary_op,train_end, loss, accuracy])
if step % 50 == 0 and step != 0:
train_writer.add_summary(summary, step) # 保存訓練日誌
print("step", step, "loss", tra_loss, "acc", tra_acc * 100.0)
if step % 2000 == 0 or (step + 1) == FLAGS.training_steps:#每一步都顯示損失值和精確值
saver.save(sess, os.path.join(FLAGS.save_dir, 'model.ckpt'), global_step=step)
except tf.errors.OutOfRangeError:
print('訓練出現出錯')
finally: # 線程結束
coord.request_stop()
coord.join(threads)
if __name__=="__main__":
tf.app.run()
VGG_net.py
import tensorflow as tf
import numpy as np
import scipy.io as scio
def _conv_layer(input, weights, bias):
conv = tf.nn.conv2d(input, tf.constant(weights), strides=[1, 1, 1, 1], padding="SAME")
return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1,), padding="SAME")
def net(data_path,input_image):
layers=('conv1_1','relu1_1','conv1_2','relu1_2','pool1',# VGG網絡前五大部分
'conv2_1','relu2_1','conv2_2','relu2_2','pool2',
'conv3_1','relu3_1','conv3_2','relu3_2','conv3_3','relu3_3','conv3_4','relu3_4','pool3',
'conv4_1','relu4_1','conv4_2','relu4_2','conv4_3','relu4_3','conv4_4','relu4_4','pool4',
'conv5_1', 'relu5_1','conv5_2','relu5_2','conv5_3','relu5_3','conv5_4','relu5_4'
)
data=scio.loadmat(data_path)#返回VGG19模型中內容
mean=data['normalization'][0][0][0]# 獲得圖像均值
mean_pixel=np.mean(mean,axis=(0,1))#RGB
weights=data['layers'][0]#壓縮VGG網絡中參數,把維度是1的維度去掉 剩下的就是權重
net={}
current=input_image#預測圖像
for i,name in enumerate(layers):
kind=name[:4]
if kind=='conv':
kernels,bias=weights[i][0][0][0][0]
kernels=np.transpose(kernels,[1,0,2,3])
bias=bias.reshape(-1)
current=_conv_layer(current,kernels,bias)
elif kind=='relu':
current=tf.nn.relu(current)#激活函數
elif kind=="pool":
current=_pool_layer(current)#池化
net[name]=current# 每層前向傳播結果放在net中,是一個字典
assert len(net)==len(layers)
return net
model.py
import tensorflow as tf
def loss(logits,labels):
with tf.variable_scope('loss') as scope:
loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
tf.summary.scalar(scope.name + '/loss', loss)
return loss
def train(learning_rate_base,loss,learning_rate_decay,all_number,batch_size):
with tf.variable_scope('optimizer') as scope:
#op = tf.train.AdamOptimizer(learning_rate_base).minimize(loss) # 梯度下降
learning_rate = tf.train.exponential_decay(
learning_rate_base, tf.Variable(0, trainable=False), all_number / batch_size,
learning_rate_decay)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, trainable=False, name='global_step')
train_op = optimizer.minimize(loss, global_step=global_step, name=scope.name)
return train_op
def moving(moving_average_decay,train_step):
with tf.variable_scope('moving_optimizer') as scope:
# 設置滑動平均方法
variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay,tf.Variable(0, trainable=False)) # 定義滑動平均類
variable_averages_op = variable_averages.apply(tf.trainable_variables()) # 在所有可訓練的變量上使用滑動平均
# 同時反向傳播和滑動平均
with tf.control_dependencies([train_step, variable_averages_op]):
train_op = tf.no_op(name='moving_optimizer')
return train_op
def accuracy(out,train_label_batch):
with tf.variable_scope('accuracy') as scope:
accuracy = tf.reduce_mean(tf.cast(tf.nn.in_top_k(out, train_label_batch, 1), tf.float32)) # 準確度
tf.summary.scalar(scope.name + '/accuracy', accuracy)
return accuracy
get_files.py
import os
import numpy as np
def get_files(file_dir):
cats = []
label_cats = []
dogs = []
label_dogs = []
for file in os.listdir(file_dir):
name = file.split(sep='.')
if 'cat' in name[0]:
cats.append(file_dir +"\\"+ file)
label_cats.append(0)
else:
if 'dog' in name[0]:
dogs.append(file_dir +"\\"+ file)
label_dogs.append(1)
image_list = np.hstack((cats, dogs))
label_list = np.hstack((label_cats, label_dogs))
# 把標籤和圖片都放倒一個 temp 中 然後打亂順序,然後取出來
temp = np.array([image_list, label_list])
temp = temp.transpose()
# 打亂順序
np.random.shuffle(temp)
# 取出第一個元素作爲 image 第二個元素作爲 label
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(i) for i in label_list]
return image_list, label_list
get_batch.py
import tensorflow as tf
# image_W ,image_H 指定圖片大小,batch_size 每批讀取的個數 ,capacity隊列中 最多容納元素的個數
def get_batch(image, label, image_W, image_H, batch_size, capacity):
# 轉換數據爲 ts 能識別的格式
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
# 將image 和 label 放倒隊列裏
input_queue = tf.train.slice_input_producer([image, label])
label = input_queue[1]
# 讀取圖片的全部信息
image_contents = tf.read_file(input_queue[0])
# 把圖片解碼,channels =3 爲彩色圖片, r,g ,b 黑白圖片爲 1 ,也可以理解爲圖片的厚度
image = tf.image.decode_jpeg(image_contents, channels=3)
# 將圖片以圖片中心進行裁剪或者擴充爲 指定的image_W,image_H
image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
# 對數據進行標準化,標準化,就是減去它的均值,除以他的方差
image = tf.image.per_image_standardization(image)
# 生成批次 num_threads 有多少個線程根據電腦配置設置 capacity 隊列中 最多容納圖片的個數 tf.train.shuffle_batch 打亂順序,
image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity)
# 重新定義下 label_batch 的形狀
label_batch = tf.reshape(label_batch, [batch_size])
# 轉化圖片
image_batch = tf.cast(image_batch, tf.float32)
return image_batch, label_batch
predict_one.py
測試
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import model
import os
import VGG_net
import cv2
# 從指定目錄中選取一張圖片
def get_one_image(train):
files = os.listdir(train) # os.listdir():得到路徑下所有圖片名字或者文件夾名字
n = len(files) # 得到長度,即總共多少張圖片
ind = np.random.randint(0, n) # 從0到n中隨機選擇一個數字
img_dir = os.path.join(train, files[ind]) # 路徑並接,得到某個圖片的具體路徑
image = Image.open(img_dir) # 加載該圖片
plt.imshow(image) # 顯示
plt.show()
image = image.resize([224, 224]) # 把預測圖片尺寸修改成與訓練樣本尺寸一樣大小
image = np.array(image) # 轉換成數組格式
return image
def evaluate_one_image():
# 存放的是我從百度下載的貓狗圖片路徑
train = './test1/'
image_array = get_one_image(train)
BATCH_SIZE = 1 # 因爲只讀取一副圖片 所以batch 設置爲1
N_CLASSES = 2 # 2個輸出神經元,[1,0] 或者 [0,1]貓和狗的概率
# 轉化圖片格式,模型卷積層裏面conv2d()要求輸入數據爲float32類型
im = cv2.resize(image_array, (224, 224), interpolation=cv2.INTER_CUBIC) # mnist檢測圖片尺寸爲28*28,所以改變測試圖片尺寸爲28*28
image = tf.cast(im, tf.float32)
# 圖片原來是三維的 [208, 208, 3] 重新定義圖片形狀 改爲一個4D 四維的 tensor
image = tf.reshape(image, [1, 224, 224, 3])
# 用最原始的輸入數據的方式向模型輸入數據 placeholder
x = tf.placeholder(tf.float32, shape=[1,224, 224, 3])
nets = VGG_net.net('../FCN.tensorflow-master/Model_zoo/imagenet-vgg-verydeep-19.mat', x) # 進入VGG模型,傳入權重參數和預測圖像,獲得所有層輸出結果
# 修改VGG網絡,最後層添加兩個全連接層
with tf.variable_scope("dense1"):
i = tf.reshape(nets["relu5_4"], [1, -1]) # 相當於在微調網絡模型,從relu5_4開始調整
weights = tf.Variable(tf.random_normal(shape=[14 * 14 * 512, 10], stddev=0.1))
bias = tf.Variable(tf.zeros(shape=[10]) + 0.1)
dense1 = tf.nn.tanh(tf.matmul(i, weights) + bias)
with tf.variable_scope("out"):
weights = tf.Variable(tf.random_normal(shape=[10, 2], stddev=0.1))
bias = tf.Variable(tf.zeros(shape=[2]) + 0.1)
out = tf.matmul(dense1, weights) + bias
# 因爲 inference 的返回沒有用激活函數,所以在這裏對結果用softmax 激活
logit = tf.nn.softmax(out)
# 存放模型的路徑
logs_train_dir = 'save/'
# 保存或者讀取模型
saver = tf.train.Saver()
# 打開模型
ckpt = tf.train.get_checkpoint_state(logs_train_dir) # tf.train.get_checkpoint_state加載模型路徑
if ckpt and ckpt.model_checkpoint_path:
# ckpt.model_checkpoint_path.split('-')[-1]得到最新的模型,再按‘-’分割取最後一個
global_step = ckpt.model_checkpoint_path.split('-')[-1]
saver.restore(sess, ckpt.model_checkpoint_path)
print('模型加載成功, 訓練的步數爲 %s' % global_step)
else:
print('模型加載失敗,,,文件沒有找到')
# 將圖片輸入到模型計算
prediction = sess.run(logit, feed_dict={x: image.eval()})
print('貓的概率 %.6f' % prediction[:, 0])
print('狗的概率 %.6f' % prediction[:, 1])
if prediction[:, 0]>=prediction[:, 1]:
print('圖片爲貓')
else:
print('圖片爲狗')
if __name__=="__main__":
# 測試
print("正在檢測")
sess = tf.InteractiveSession() # 創建tensorflow的默認會話:
evaluate_one_image()
結果: