TensorFlow貓狗大戰完整代碼實現和詳細註釋

運行環境:

Windows10,TensorFlow1.10(GPU版本),python3.6,編譯器pycharm

文件說明
(1)data文件夾下包含test和train兩個子文件夾,分別用於存放測試數據和訓練數據,從官網上下載的數據直接解壓到相應的文件夾下即可。
(2)venv文件夾用於存放加載anaconda環境。
(3)input_data.py負責實現讀取數據,生成批次(batch)。
(4)model.py負責實現我們的神經網絡模型。
(5)training.py負責實現模型的訓練以及評估。
(6)log文件用來保存訓練結果和參數

(7)test-1.py 從test文件中隨機測試一張圖片

代碼:

input_data.py完整代碼與註釋:

import tensorflow as tf
import numpy as np
import os


def get_files(file_dir):
    """
    輸入: 存放訓練照片的文件地址
    返回:  圖像列表, 標籤列表
    """
    # 建立空列表
    cats = []
    label_cats = []
    dogs = []
    label_dogs = []

    # 讀取標記好的圖像和加入標籤
    for file in os.listdir(file_dir):   # file就是要讀取的照片
        name = file.split(sep='.')      # 因爲照片的格式是cat.1.jpg/cat.2.jpg
        if name[0] == 'cat':            # 所以只用讀取 . 前面這個字符串
            cats.append(file_dir + file)
            label_cats.append(0)        # 把圖像和標籤加入列表
        else:
            dogs.append(file_dir + file)
            label_dogs.append(1)
    print('There are %d cats\nThere are %d dogs' % (len(cats), len(dogs)))

    image_list = np.hstack((cats, dogs))  # 在水平方向平鋪合成一個行向量
    label_list = np.hstack((label_cats, label_dogs))

    temp = np.array([image_list, label_list])  # 生成一個兩行數組列表,大小是2 X 25000
    temp = temp.transpose()   # 轉置向量,大小變成25000 X 2
    np.random.shuffle(temp)   # 亂序,打亂這25000個例子的順序

    image_list = list(temp[:, 0])  # 所有行,列=0
    label_list = list(temp[:, 1])  # 所有行,列=1
    label_list = [int(float(i)) for i in label_list]  # 把標籤列表轉化爲int類型

    return image_list, label_list


def get_batch(image, label, image_W, image_H, batch_size, capacity):
    """
    輸入:
    image,label :要生成batch的圖像和標籤
    image_W,image_H: 圖像的寬度和高度
    batch_size: 每個batch(小批次)有多少張圖片數據
    capacity: 隊列的最大容量
    返回:
    image_batch: 4D tensor [batch_size, width, height, 3], dtype=tf.float32
    label_batch: 1D tensor [batch_size], dtype=tf.int32
    """
    # 將列表轉換成tf能夠識別的格式
    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)

    # 生成隊列(牽扯到線程概念,便於batch訓練)
    """
    隊列的理解:每次訓練時,從隊列中取一個batch送到網絡進行訓練,
               然後又有新的圖片從訓練庫中注入隊列,這樣循環往復。
               隊列相當於起到了訓練庫到網絡模型間數據管道的作用,
               訓練數據通過隊列送入網絡。
    """
    input_queue = tf.train.slice_input_producer([image, label])

    # 圖像的讀取需要tf.read_file(),標籤則可以直接賦值
    image_contents = tf.read_file(input_queue[0])
    image = tf.image.decode_jpeg(image_contents, channels=3)  # 解碼彩色的.jpg圖像
    label = input_queue[1]

    # 統一圖片大小
    image = tf.image.resize_images(image, [image_H, image_W], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    image = tf.cast(image, tf.float32)
    image = tf.image.per_image_standardization(image)  # 標準化圖片,因爲前兩行代碼已經處理過了,所以可要可不要

    # 打包batch的大小
    image_batch, label_batch = tf.train.batch([image, label],
                                              batch_size=batch_size,
                                              num_threads=64,  # 涉及到線程,配合隊列
                                              capacity=capacity)

    # 下面兩行代碼應該也多餘了,放在這裏確保一下格式不會出問題
    image_batch = tf.cast(image_batch, tf.float32)
    label_batch = tf.cast(label_batch, tf.int32)

    return image_batch, label_batch

model.py完整代碼和註釋: 

import tensorflow as tf


def cnn_inference(images, batch_size, n_classes):
    """
    輸入
    images      輸入的圖像
    batch_size  每個批次的大小
    n_classes   n分類
    返回
    softmax_linear 還差一個softmax
    """
    # 第一層的卷積層conv1,卷積核爲3X3,有16個
    with tf.variable_scope('conv1') as scope:
        # 建立weights和biases的共享變量
        # conv1, shape = [kernel size, kernel size, channels, kernel numbers]
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 3, 16],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))  # stddev標準差
        biases = tf.get_variable('biases',
                                 shape=[16],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        # 卷積層 strides = [1, x_movement, y_movement, 1], padding填充周圍有valid和same可選擇
        conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)         # 加入偏差
        conv1 = tf.nn.relu(pre_activation, name=scope.name)  # 加上激活函數非線性化處理,且是在conv1的命名空間

    # 第一層的池化層pool1和規範化norm1(特徵縮放)
    with tf.variable_scope('pooling1_lrn') as scope:
        pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],
                               padding='SAME', name='pooling1')
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0,
                          beta=0.75,name='norm1')
        # ksize是池化窗口的大小=[1,height,width,1],一般height=width=池化窗口的步長
        # 池化窗口的步長一般是比卷積核多移動一位
        # tf.nn.lrn是Local Response Normalization,(局部響應歸一化)

    # 第二層的卷積層cov2,這裏的命名空間和第一層不一樣,所以可以和第一層取同名
    with tf.variable_scope('conv2') as scope:
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 16, 16],  # 這裏只有第三位數字16需要等於上一層的tensor維度
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[16],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1],padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(pre_activation, name='conv2')

    # 第二層的池化層pool2和規範化norm2
    with tf.variable_scope('pooling2_lrn') as scope:
        norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0,
                          beta=0.75,name='norm2')
        pool2 = tf.nn.max_pool(norm2, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1],
                               padding='SAME',name='pooling2')
        # 這裏選擇了先規範化再池化

    # 第三層爲全連接層local3
    with tf.variable_scope('local3') as scope:
        # flatten-把卷積過的多維tensor拉平成二維張量(矩陣)
        reshape = tf.reshape(pool2, shape=[batch_size, -1])  # batch_size表明了有多少個樣本

        dim = reshape.get_shape()[1].value  # 知道-1(代表任意)這裏具體是多少個
        weights = tf.get_variable('weights',
                                  shape=[dim, 256],  # 連接256個神經元
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[256],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)  # 矩陣相乘加上bias

    # 第四層爲全連接層local4
    with tf.variable_scope('local4') as scope:
        weights = tf.get_variable('weights',
                                  shape=[256, 512], # 再連接512個神經元
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[512],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')

    # 第五層爲輸出層softmax_linear
    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.get_variable('weights',
                                  shape=[512, n_classes],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[n_classes],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')
        # 這裏只是命名爲softmax_linear,真正的softmax函數放在下面的losses函數裏面和交叉熵結合在一起了,這樣可以提高運算速度。
        # softmax_linear的行數=local4的行數,列數=weights的列數=bias的行數=需要分類的個數
        # 經過softmax函數用於分類過程中,它將多個神經元的輸出,映射到(0,1)區間內,可以看成概率來理解

    return softmax_linear


def losses(logits, labels):
    """
    輸入
    logits: 經過cnn_inference處理過的tensor
    labels: 對應的標籤
    返回
    loss: 損失函數(交叉熵)
    """
    with tf.variable_scope('loss') as scope:
        # 下面把交叉熵和softmax合到一起寫是爲了通過spares提高計算速度
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='loss_per_eg')
        loss = tf.reduce_mean(cross_entropy, name='loss')  # 求所有樣本的平均loss
    return loss


def training(loss, learning_rate):
    """
    輸入
    loss: 損失函數(交叉熵)
    learning_rate: 學習率
    返回
    train_op: 訓練的最優值
    """
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        # global_step不是共享變量,初始值爲0,設定trainable=False 可以防止該變量被數據流圖的 GraphKeys.TRAINABLE_VARIABLES 收集,
        # 這樣我們就不會在訓練的時候嘗試更新它的值。
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op = optimizer.minimize(loss, global_step= global_step)
    return train_op


def evaluation(logits, labels):
    """
     輸入
    logits: 經過cnn_inference處理過的tensor
    labels:
    返回
    accuracy:正確率
    """
    with tf.variable_scope('accuracy') as scope:
        prediction = tf.nn.softmax(logits)  # 這個logits有n_classes列
        # prediction每行的最大元素(1)的索引和label的值相同則爲1 否則爲0。
        correct = tf.nn.in_top_k(prediction, labels, 1)
        # correct = tf.nn.in_top_k(logits, labels, 1)   也可以不需要prediction過渡,因爲最大值的索引沒變,這裏這樣寫是爲了更好理解
        correct = tf.cast(correct, tf.float16)  # 記得要轉換格式
        accuracy = tf.reduce_mean(correct)
    return accuracy

training.py完整代碼與註釋 

import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import input_data
import model


N_CLASSES = 2  # 貓和狗
IMG_W = 208  # resize圖像,太大的話訓練時間久
IMG_H = 208
BATCH_SIZE = 16
CAPACITY = 2000
MAX_STEP = 10000  # 一般5K~10k
learning_rate = 0.0001  # 一般小於0.0001

train_dir = 'D:/python/deep-learning/CatVsDog/Project/data/train/'
logs_train_dir = 'D:/python/deep-learning/CatVsDog/Project/log/'  # 記錄訓練過程與保存模型

train, train_label = input_data.get_files(train_dir)
train_batch, train_label_batch = input_data.get_batch(train,
                                                      train_label,
                                                      IMG_W,
                                                      IMG_H,
                                                      BATCH_SIZE,
                                                      CAPACITY)

train_logits = model.cnn_inference(train_batch, BATCH_SIZE, N_CLASSES)
train_loss = model.losses(train_logits, train_label_batch)
train_op = model.training(train_loss, learning_rate)
train__acc = model.evaluation(train_logits, train_label_batch)

summary_op = tf.summary.merge_all()  # 這個是log彙總記錄

# 可視化爲了畫折線圖
step_list = list(range(100))  # 因爲後來的cnn_list加了200個
cnn_list1 = []
cnn_list2 = []
fig = plt.figure()  # 建立可視化圖像框
ax = fig.add_subplot(1, 1, 1)  # 子圖總行數、列數,位置
ax.yaxis.grid(True)
ax.set_title('cnn_accuracy ', fontsize=14, y=1.02)
ax.set_xlabel('step')
ax.set_ylabel('accuracy')
bx = fig.add_subplot(1, 2, 2)
bx.yaxis.grid(True)
bx.set_title('cnn_loss ', fontsize=14, y=1.02)
bx.set_xlabel('step')
bx.set_ylabel('loss')


# 初始化,如果存在變量則是必不可少的操作
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # 產生一個writer來寫log文件
    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
    # 產生一個saver來存儲訓練好的模型
    saver = tf.train.Saver()

    # 隊列監控
    # batch訓練法用到了隊列,不想用隊列也可以用placeholder
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:
        # 執行MAX_STEP步的訓練,一步一個batch
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break
            # 啓動以下操作節點,這裏不能用train_op,因爲它在第二次迭代是None,會導致session出錯,改爲_
            _op, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc])
            # 每隔50步打印一次當前的loss以及acc,同時記錄log,寫入writer
            if step % 50 == 0:
                print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0))
                summary_str = sess.run(summary_op)
                train_writer.add_summary(summary_str, step)
            # 每隔100步畫個圖
            if step % 100 ==0:
                cnn_list1.append(tra_acc)
                cnn_list2.append(tra_loss)
            # 每隔5000步,保存一次訓練好的模型
            if step % 5000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

        ax.plot(step_list, cnn_list1)
        bx.plot(step_list, cnn_list2)
        plt.show()

    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()


test-1.py完整代碼與註釋 

from PIL import Image
import matplotlib.pyplot as plt
import input_data
import model
import os
import numpy as np
import tensorflow as tf


def get_one_image(train):
    '''Randomly pick one image from training data
    Return: ndarray
    '''
    n = len(train)
    ind = np.random.randint(0, n)
    img_dir = train[ind]

    image = Image.open(img_dir)
    plt.imshow(image)
    image = image.resize([208, 208])
    image = np.array(image)
    return image


def evaluate_one_image():
    train_dir = 'D:/python/deep-learning/CatVsDog/Project/data/test/'
    train, train_label = input_data.get_files(train_dir)
    image_array = get_one_image(train)

    with tf.Graph().as_default():
        BATCH_SIZE = 1
        N_CLASSES = 2

        image = tf.cast(image_array, tf.float32)
        image = tf.image.per_image_standardization(image)
        image = tf.reshape(image, [1, 208, 208, 3])
        logit = model.cnn_inference(image, BATCH_SIZE, N_CLASSES)

        logit = tf.nn.softmax(logit)

        x = tf.placeholder(tf.float32, shape=[208, 208, 3])

        # you need to change the directories to yours.
        logs_train_dir = 'D:/python/deep-learning/CatVsDog/Project/log/'

        saver = tf.train.Saver()

        with tf.Session() as sess:

            print("Reading checkpoints...")
            ckpt = tf.train.get_checkpoint_state(logs_train_dir)
            if ckpt and ckpt.model_checkpoint_path:
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Loading success, global_step is %s' % global_step)
            else:
                print('No checkpoint file found')

            prediction = sess.run(logit, feed_dict={x: image_array})
            max_index = np.argmax(prediction)
            if max_index == 0:
                print('This is a cat with possibility %.6f' % prediction[:, 0])
            else:
                print('This is a dog with possibility %.6f' % prediction[:, 1])
    plt.imshow(image_array)
    plt.show()


evaluate_one_image()

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章