Tensorflow 快速搭建手寫識別網絡(含源碼、註釋清晰,準確率超過98%!)

最近課程要求實現手寫識別任務,於是就學習了網絡上和已有的相關程序,編寫了一份Tensorfliow的手寫識別程序,準確率超過98%。之前也寫過一份Pytorch版本的代碼,有需要的同學也可以自取~。希望可以和大家多多交流~

 

Pytorch 實現手寫識別源碼見:https://blog.csdn.net/qq_33302004/article/details/106339687

 

運行結果:

程序講解:

程序主要使用了三個函數:

print_activations(用於展示每一特徵圖尺寸)

inference(搭建網絡結構)

do_train(完成網絡訓練)

網絡仿照LeNet5搭建,稍作改動:

1INPUT(輸入層):輸入圖片:28∗28

2 C1(卷積層):卷積核:5*5*32、輸出特徵圖:24*24*32

3S2(池化層):輸出:12*12*32

4C3(卷積層):卷積核:5*5*64、輸出特徵圖:8*8*64

5S4(池化層):輸出:4*4*64

6C5(卷積層):卷積核 4*4*256、輸出特徵圖 1*1*256(256維的向量)

7 F6(全連接層):256 —> 128

8Output(輸出層):128 —> 10

LeNet5網絡結構如下所示:

使用DropOut(見inference函數):

滑動平移、學習率衰減、損失函數(見do_train函數):

程序源碼:

# from sklearn.datasets import fetch_mldata
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data 
import matplotlib.pyplot as plt
import numpy as np
from math import floor, ceil

# 超參數  這組參數也超過98%
MNIST_data_path = 'MNIST/'          # 數據路勁
REGULARIZATION_RATE = 0.000001      # 正則化係數
MOVING_AVERAGE_DECAY = 0.999         # 滑動平均
DROPOUT_RATE= 0.50                   # dropout係數 
LEARNING_RATE_BASE = 0.0015          # 初始學習率
LEARNING_RATE_DECAY = 0.95          # 學習率衰減係數
DELAY_steps = 1000
BATCH_SIZE = 400                    # 批T處理數量
TRAINING_STEPS = 2000                # 迭代次數

def print_activations(t):##展示每一層卷積層 或池化層輸出tensor的尺寸
    print(t.op.name,'',t.get_shape().as_list())##輸出名字和尺寸
    return

# 構建模型,train控制是否dropout
def inference(x, dropout_rate, regularizer):
    print_activations(x)

    # 第一層卷積:輸入1通道,輸出6通道,卷積核5*5
    with tf.variable_scope('layer1-conv1', reuse = tf.AUTO_REUSE):
        conv1_weights = tf.get_variable(
            "weight", [5, 5, 1, 32],
            initializer=tf.truncated_normal_initializer(stddev=0.1))    # 生成隨機值,服從標準偏差爲0.1  initializer=tf.truncated_normal_initializer(stddev=0.1)
        conv1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0))   # 偏差,初始化爲常數0
        conv1_wx = tf.nn.conv2d(x, conv1_weights, strides=[1, 1, 1, 1], padding='VALID')        # 進行卷積操作,strides 卷積時在圖像每一維的步長,不考慮邊界
        conv1 = tf.nn.relu(tf.nn.bias_add(conv1_wx, conv1_biases))       # 卷積的結果加上偏差(矩陣+向量的操作),再進行ReLu
        print_activations(conv1)
        conv1=tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID', name='max_pooling')        # 進行最大池化操作
        print_activations(conv1)

    # 第二層卷積:輸入6通道,輸出16通道,卷積核5*5
    with tf.variable_scope("layer2-conv2", reuse = tf.AUTO_REUSE):
        conv2_weights = tf.get_variable(
            "weight", [5, 5, 32, 64],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable("bias", [64], initializer=tf.constant_initializer(0.0))
        conv2_wx = tf.nn.conv2d(conv1, conv2_weights, strides=[1, 1, 1, 1], padding='VALID')
        conv2 = tf.nn.relu(tf.nn.bias_add(conv2_wx, conv2_biases))
        print_activations(conv2)
        conv2=tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID',name='max_pooling')
        print_activations(conv2)
        reshaped = tf.reshape(conv2, [-1, 64*4*4])       # 轉化成爲向量

    # 全連接層1:16*4*4 ——> 120
    with tf.variable_scope('layer-fc1', reuse = tf.AUTO_REUSE):
        fc1_weights = tf.get_variable("weight", [64*4*4, 256],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))        # 正則化
        fc1_biases = tf.get_variable("bias", [256], initializer=tf.constant_initializer(0.1))

        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
        fc1 = tf.nn.dropout(fc1, dropout_rate)          # dropout
        print_activations(fc1)

    # 全連接層2:120 ——> 84
    with tf.variable_scope('layer-fc2', reuse=tf.AUTO_REUSE):
        fc2_weights = tf.get_variable("weight", [256, 128],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
        fc2_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.1))
        fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_biases)
        fc2 = tf.nn.dropout(fc2, dropout_rate)
        print_activations(fc2)

    # 全連接層3:84 ——> 10
    with tf.variable_scope('layer-fc3', reuse=tf.AUTO_REUSE):
        fc3_weights = tf.get_variable("weight", [128, 10],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        # if regularizer != None: tf.add_to_collection('losses', regularizer(fc3_weights))
        fc3_biases = tf.get_variable("bias", [10], initializer=tf.constant_initializer(0.1))
        # fc3 = tf.nn.relu(tf.matmul(fc2, fc3_weights) + fc3_biases)
        fc3 = tf.nn.softmax(tf.matmul(fc2, fc3_weights) + fc3_biases)
        print_activations(fc3)
    
    return fc3

# 訓練網絡模型
def do_train(train_x, train_y, test_x, test_y):
    tf.reset_default_graph()        # 清除默認圖形堆棧並重置全局默認圖形
    test_y_labelNumber = np.argmax(test_y, axis=1)          # 獲取label的數值
    NUM_TEST = len(test_y)
    NUM_TRAIN = len(train_y)

    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, [None, 28, 28, 1], name='x-input')
        y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')
        # tf.placeholder()函數是在神經網絡構建graph的時候在模型中的佔位,此時並沒有把要輸入的數據傳入模型,
        # 它只會分配必要的內存。等建立session後,運行模型時通過feed_dict()函數向佔位符喂入數據。
    
    # regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = inference(x,  dropout_rate=DROPOUT_RATE, regularizer = None) # dropout策略及正則化策略防止過擬合, y_是經過網絡前向傳播後的預測輸出值
    global_step = tf.Variable(0, trainable=False)

    # 定義滑動平均操作、損失函數、學習率、訓練過程。
    with tf.variable_scope("moving_average", reuse = tf.AUTO_REUSE): # 處理滑動平均的命名空間
        variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)        # 實現滑動平均模型和計算變量的移動平均值。
        variables_averages_op = variable_averages.apply(tf.trainable_variables())
    
    with tf.variable_scope("loss_function", reuse = tf.AUTO_REUSE): # 計算損失函數的命名空間。
        # loss = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))   # 交叉熵
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y,labels=y_))        # 交叉熵
        # loss = -tf.reduce_sum(y_ * tf.log(y))
        tf.summary.scalar("loss", loss)  # 記錄loss

    with tf.variable_scope("train_step", reuse = tf.AUTO_REUSE):# 定義學習率、優化方法及每一輪執行訓練的操作的命名空間。
        #exponential_decay()學習率衰減函數
        learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,DELAY_steps,LEARNING_RATE_DECAY,staircase=True)  #每1000(DELAY_steps)輪訓練後要乘以學習率的衰減值   #train.num_examples / BATCH_SIZE, 
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
        with tf.control_dependencies([train_step, variables_averages_op]):
            train_op = tf.no_op(name='train') 

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True #允許顯存增長
    config.gpu_options.per_process_gpu_memory_fraction = 0.9 #不完全分配所有的GPU內存,設爲90%

    log_device_placement=True 
    with tf.Session(config = config) as sess:
 
        tf.global_variables_initializer().run()   #sess.run(tf.global_variables_initializer()) 初始化變量
        
        for i in range(TRAINING_STEPS + 1):
            # 隨機取100訓練數據
            index_batch = np.random.randint(NUM_TRAIN, size=BATCH_SIZE)
            xs,ys = train_x[index_batch], train_y[index_batch]##訓練集
            re_xs = np.reshape(xs, [-1, 28, 28, 1])
            re_ys = np.reshape(ys, [-1, 10])
            # 訓練集 喂數據 
            train_feed={x: re_xs, y_: re_ys}  
            # 執行訓練
            _, loss_value, step = sess.run([train_step, loss, global_step], feed_dict=train_feed)
            
            if i % (100) == 0:
                # 測試集測試
                # index_batch = np.random.randint(NUM_TEST, size=BATCH_SIZE)
                # xs_, ys_ = test_x[index_batch], test_y[index_batch]
                xs_, ys_ = test_x, test_y
                re_xs_ = np.reshape(xs_, [-1, 28, 28, 1])
                re_ys_ = np.reshape(ys_, [-1, 10])
                # 測試集 喂數據
                test_feed = {x: re_xs_, y_: re_ys_} 

                loss_value_, y_pre = sess.run([loss, y], feed_dict=test_feed)
                y_pre_labelNumber = np.argmax(y_pre, axis=1)
                correct = 0
                for j in range(0, NUM_TEST):
                    if y_pre_labelNumber[j] == test_y_labelNumber[j]:
                        correct += 1

                print("訓練過程中, 當Train_Step = %d 時, trainloss = %g, testloss = %g, 測試準確率爲 = %g%%" % (step, loss_value, loss_value_, float(correct)*100/NUM_TEST))
                print("-" * 50)
            
    return

if __name__ == '__main__': 
    
    mnist = input_data.read_data_sets(MNIST_data_path,one_hot=True)      # 導入數據集 
    train_x, train_y, test_x, test_y = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels     # 讀取數據
    do_train(train_x, train_y, test_x, test_y)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章