遷移學習VGG_貓狗識別

使用了VGG19的模型遷移到貓狗識別中,並且在最後添加了兩層FC全連接層用於分類。

並且網絡中添加了學習率衰減以及平均滑動模型

 

其中train_image(2w5張圖片)存放訓練樣本 , test1(256張圖片)存放測試樣本

train_image:

 test1:

 VGG模型:

需要自己下載

VGG遷移學習_貓狗識別.py

VGG_PAT需要修改成自己本地保存VGG模型的地址

#遷移學習貓狗識別
#首先下載vgg19網絡的mat文件,然後對數據進行預處理
#直接送到了我們預先展開的vgg網絡中,注意這裏權重是constant,直接把別人訓練好的權重拿來初始化,然後最後改一下全連接層
import tensorflow as tf
import numpy as np
import get_files
import get_batch
import VGG_net
import model
import os
tf.app.flags.DEFINE_integer('image_size', 224, '圖片尺寸')
tf.app.flags.DEFINE_integer('batch_size', 32, '每次訓練圖片的張數')
tf.app.flags.DEFINE_integer('capacity', 256, '隊列中最多容納元素的個數')
tf.app.flags.DEFINE_float('learning_rate_base', 0.0001, '基礎的學習率用於指數衰減的學習率中')
tf.app.flags.DEFINE_float('learning_rate_decay', 0.99, '學習率的衰減率')
tf.app.flags.DEFINE_float('moving_average_decay', 0.99, '滑動平均的衰減率')
tf.app.flags.DEFINE_integer('training_steps', 6000, '訓練的輪數')
tf.app.flags.DEFINE_integer('n_class', 2, '類別數目')
tf.app.flags.DEFINE_integer('all_number', 25000, '訓練樣本總數')
tf.app.flags.DEFINE_string('train_dir', './train_image', '數據存放地址')
tf.app.flags.DEFINE_string('logs_train_dir', './logs_train_dir/', '訓練集輸出日誌保存的路徑')
tf.app.flags.DEFINE_string('save_dir', './save/', '模型保存的路徑')
tf.app.flags.DEFINE_string('VGG_PATH', '../FCN.tensorflow-master/Model_zoo/imagenet-vgg-verydeep-19.mat', 'VGG網絡參數')
FLAGS = tf.app.flags.FLAGS



def main(argv=None):
    print('獲取圖片和標籤集中')
    train, train_label = get_files.get_files(FLAGS.train_dir)
    print('生成批次中')
    train_batch, train_label_batch =get_batch.get_batch(train,train_label,FLAGS.image_size,FLAGS.image_size,FLAGS.batch_size,FLAGS.capacity)
    print('train_batch',train_batch.shape)
    nets=VGG_net.net(FLAGS.VGG_PATH,train_batch)#進入VGG模型,傳入權重參數和預測圖像,獲得所有層輸出結果

    #修改VGG網絡,最後層添加兩個全連接層
    with tf.variable_scope("dense1"):
        image=tf.reshape(nets["relu5_4"],[FLAGS.batch_size,-1])#相當於在微調網絡模型,從relu5_4開始調整
        weights=tf.Variable(tf.random_normal(shape=[14*14*512,10],stddev=0.1))
        bias=tf.Variable(tf.zeros(shape=[10])+0.1)
        dense1=tf.nn.tanh(tf.matmul(image,weights)+bias)

    with tf.variable_scope("out"):
        weights=tf.Variable(tf.random_normal(shape=[10,FLAGS.n_class],stddev=0.1))
        bias=tf.Variable(tf.zeros(shape=[FLAGS.n_class])+0.1)
        out=tf.matmul(dense1,weights)+bias



    loss=model.loss(logits=out,labels=train_label_batch)

    op=model.train(learning_rate_base=FLAGS.learning_rate_base,loss=loss,
                  learning_rate_decay=FLAGS.learning_rate_decay,all_number=FLAGS.all_number,batch_size=FLAGS.batch_size)

    train_end = model.moving(moving_average_decay=FLAGS.moving_average_decay, train_step=op)  # 滑動模型最終的梯度下降值


    accuracy=model.accuracy(out=out,train_label_batch=train_label_batch)


    saver = tf.train.Saver()  # 初始化持久化類
    summary_op = tf.summary.merge_all()  # 合併所有日誌

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())#變量初始化
        train_writer = tf.summary.FileWriter(FLAGS.logs_train_dir, sess.graph)  # 訓練集日記保存
        coord = tf.train.Coordinator()#線程
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)#線程
        try:
            for step in range(FLAGS.training_steps):#循環100次
                summary,_,tra_loss, tra_acc = sess.run([summary_op,train_end, loss, accuracy])

                if step % 50 == 0 and step != 0:
                    train_writer.add_summary(summary, step)  # 保存訓練日誌
                    print("step", step, "loss", tra_loss, "acc", tra_acc * 100.0)

                if step % 2000 == 0 or (step + 1) == FLAGS.training_steps:#每一步都顯示損失值和精確值
                    saver.save(sess, os.path.join(FLAGS.save_dir, 'model.ckpt'), global_step=step)

        except tf.errors.OutOfRangeError:
            print('訓練出現出錯')
        finally:  # 線程結束
            coord.request_stop()
        coord.join(threads)


if __name__=="__main__":
    tf.app.run()

 

VGG_net.py

import tensorflow as tf
import numpy as np
import scipy.io as scio
def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=[1, 1, 1, 1], padding="SAME")
    return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1,), padding="SAME")

def net(data_path,input_image):
    layers=('conv1_1','relu1_1','conv1_2','relu1_2','pool1',# VGG網絡前五大部分
            'conv2_1','relu2_1','conv2_2','relu2_2','pool2',
            'conv3_1','relu3_1','conv3_2','relu3_2','conv3_3','relu3_3','conv3_4','relu3_4','pool3',
            'conv4_1','relu4_1','conv4_2','relu4_2','conv4_3','relu4_3','conv4_4','relu4_4','pool4',
            'conv5_1', 'relu5_1','conv5_2','relu5_2','conv5_3','relu5_3','conv5_4','relu5_4'
            )
    data=scio.loadmat(data_path)#返回VGG19模型中內容
    mean=data['normalization'][0][0][0]# 獲得圖像均值
    mean_pixel=np.mean(mean,axis=(0,1))#RGB
    weights=data['layers'][0]#壓縮VGG網絡中參數,把維度是1的維度去掉 剩下的就是權重
    net={}
    current=input_image#預測圖像
    for i,name in enumerate(layers):
        kind=name[:4]
        if kind=='conv':
            kernels,bias=weights[i][0][0][0][0]
            kernels=np.transpose(kernels,[1,0,2,3])
            bias=bias.reshape(-1)
            current=_conv_layer(current,kernels,bias)
        elif kind=='relu':
            current=tf.nn.relu(current)#激活函數
        elif kind=="pool":
            current=_pool_layer(current)#池化
        net[name]=current# 每層前向傳播結果放在net中,是一個字典
    assert len(net)==len(layers)
    return net

 

model.py

import tensorflow as tf
def loss(logits,labels):
    with tf.variable_scope('loss') as scope:
        loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
        tf.summary.scalar(scope.name + '/loss', loss)
    return loss

def train(learning_rate_base,loss,learning_rate_decay,all_number,batch_size):
    with tf.variable_scope('optimizer') as scope:
        #op = tf.train.AdamOptimizer(learning_rate_base).minimize(loss)  # 梯度下降

        learning_rate = tf.train.exponential_decay(
            learning_rate_base, tf.Variable(0, trainable=False), all_number / batch_size,
            learning_rate_decay)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        global_step = tf.Variable(0, trainable=False, name='global_step')
        train_op = optimizer.minimize(loss, global_step=global_step, name=scope.name)

    return train_op

def moving(moving_average_decay,train_step):
    with tf.variable_scope('moving_optimizer') as scope:
        # 設置滑動平均方法
        variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay,tf.Variable(0, trainable=False))  # 定義滑動平均類
        variable_averages_op = variable_averages.apply(tf.trainable_variables())  # 在所有可訓練的變量上使用滑動平均
        # 同時反向傳播和滑動平均
        with tf.control_dependencies([train_step, variable_averages_op]):
            train_op = tf.no_op(name='moving_optimizer')

    return train_op

def accuracy(out,train_label_batch):
    with tf.variable_scope('accuracy') as scope:
        accuracy = tf.reduce_mean(tf.cast(tf.nn.in_top_k(out, train_label_batch, 1), tf.float32))  # 準確度
        tf.summary.scalar(scope.name + '/accuracy', accuracy)
    return accuracy

 

get_files.py

import os
import numpy as np
def get_files(file_dir):
    cats = []
    label_cats = []
    dogs = []
    label_dogs = []
    for file in os.listdir(file_dir):
        name = file.split(sep='.')
        if 'cat' in name[0]:
            cats.append(file_dir +"\\"+ file)
            label_cats.append(0)
        else:
            if 'dog' in name[0]:
                dogs.append(file_dir +"\\"+ file)
                label_dogs.append(1)
        image_list = np.hstack((cats, dogs))
        label_list = np.hstack((label_cats, label_dogs))
    # 把標籤和圖片都放倒一個 temp 中 然後打亂順序,然後取出來
    temp = np.array([image_list, label_list])
    temp = temp.transpose()
    # 打亂順序
    np.random.shuffle(temp)
    # 取出第一個元素作爲 image 第二個元素作爲 label
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(i) for i in label_list]
    return image_list, label_list

 

get_batch.py

import tensorflow as tf
# image_W ,image_H 指定圖片大小,batch_size 每批讀取的個數 ,capacity隊列中 最多容納元素的個數
def get_batch(image, label, image_W, image_H, batch_size, capacity):
    # 轉換數據爲 ts 能識別的格式
    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)

    # 將image 和 label 放倒隊列裏
    input_queue = tf.train.slice_input_producer([image, label])
    label = input_queue[1]
    # 讀取圖片的全部信息
    image_contents = tf.read_file(input_queue[0])
    # 把圖片解碼,channels =3 爲彩色圖片, r,g ,b  黑白圖片爲 1 ,也可以理解爲圖片的厚度
    image = tf.image.decode_jpeg(image_contents, channels=3)
    # 將圖片以圖片中心進行裁剪或者擴充爲 指定的image_W,image_H
    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
    # 對數據進行標準化,標準化,就是減去它的均值,除以他的方差
    image = tf.image.per_image_standardization(image)
    # 生成批次  num_threads 有多少個線程根據電腦配置設置  capacity 隊列中 最多容納圖片的個數  tf.train.shuffle_batch 打亂順序,
    image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity)

    # 重新定義下 label_batch 的形狀
    label_batch = tf.reshape(label_batch, [batch_size])
    # 轉化圖片
    image_batch = tf.cast(image_batch, tf.float32)
    return image_batch, label_batch

 

predict_one.py

測試

import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import model
import os
import VGG_net
import cv2
# 從指定目錄中選取一張圖片
def get_one_image(train):
    files = os.listdir(train)  # os.listdir():得到路徑下所有圖片名字或者文件夾名字
    n = len(files)  # 得到長度,即總共多少張圖片
    ind = np.random.randint(0, n)  # 從0到n中隨機選擇一個數字
    img_dir = os.path.join(train, files[ind])  # 路徑並接,得到某個圖片的具體路徑
    image = Image.open(img_dir)  # 加載該圖片
    plt.imshow(image)  # 顯示
    plt.show()
    image = image.resize([224, 224])  # 把預測圖片尺寸修改成與訓練樣本尺寸一樣大小
    image = np.array(image)  # 轉換成數組格式
    return image


def evaluate_one_image():
    # 存放的是我從百度下載的貓狗圖片路徑
    train = './test1/'
    image_array = get_one_image(train)
    BATCH_SIZE = 1  # 因爲只讀取一副圖片 所以batch 設置爲1
    N_CLASSES = 2  # 2個輸出神經元,[1,0] 或者 [0,1]貓和狗的概率
    # 轉化圖片格式,模型卷積層裏面conv2d()要求輸入數據爲float32類型
    im = cv2.resize(image_array, (224, 224), interpolation=cv2.INTER_CUBIC)  # mnist檢測圖片尺寸爲28*28,所以改變測試圖片尺寸爲28*28
    image = tf.cast(im, tf.float32)
    # 圖片原來是三維的 [208, 208, 3] 重新定義圖片形狀 改爲一個4D  四維的 tensor
    image = tf.reshape(image, [1, 224, 224, 3])

    # 用最原始的輸入數據的方式向模型輸入數據 placeholder
    x = tf.placeholder(tf.float32, shape=[1,224, 224, 3])

    nets = VGG_net.net('../FCN.tensorflow-master/Model_zoo/imagenet-vgg-verydeep-19.mat', x)  # 進入VGG模型,傳入權重參數和預測圖像,獲得所有層輸出結果
    # 修改VGG網絡,最後層添加兩個全連接層
    with tf.variable_scope("dense1"):
        i = tf.reshape(nets["relu5_4"], [1, -1])  # 相當於在微調網絡模型,從relu5_4開始調整
        weights = tf.Variable(tf.random_normal(shape=[14 * 14 * 512, 10], stddev=0.1))
        bias = tf.Variable(tf.zeros(shape=[10]) + 0.1)
        dense1 = tf.nn.tanh(tf.matmul(i, weights) + bias)

    with tf.variable_scope("out"):
        weights = tf.Variable(tf.random_normal(shape=[10, 2], stddev=0.1))
        bias = tf.Variable(tf.zeros(shape=[2]) + 0.1)
        out = tf.matmul(dense1, weights) + bias



    # 因爲 inference 的返回沒有用激活函數,所以在這裏對結果用softmax 激活
    logit = tf.nn.softmax(out)


    # 存放模型的路徑
    logs_train_dir = 'save/'
    # 保存或者讀取模型
    saver = tf.train.Saver()
    # 打開模型
    ckpt = tf.train.get_checkpoint_state(logs_train_dir)  # tf.train.get_checkpoint_state加載模型路徑
    if ckpt and ckpt.model_checkpoint_path:
        # ckpt.model_checkpoint_path.split('-')[-1]得到最新的模型,再按‘-’分割取最後一個
        global_step = ckpt.model_checkpoint_path.split('-')[-1]
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('模型加載成功, 訓練的步數爲 %s' % global_step)
    else:
        print('模型加載失敗,,,文件沒有找到')
    # 將圖片輸入到模型計算
    prediction = sess.run(logit, feed_dict={x: image.eval()})
    print('貓的概率 %.6f' % prediction[:, 0])
    print('狗的概率 %.6f' % prediction[:, 1])
    if prediction[:, 0]>=prediction[:, 1]:
        print('圖片爲貓')
    else:
        print('圖片爲狗')



if __name__=="__main__":

    # 測試
    print("正在檢測")
    sess = tf.InteractiveSession()  # 創建tensorflow的默認會話:
    evaluate_one_image()

 

結果:

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章