MNIST_whole_net_XXX

MNIST訓練TensorFlow三層網絡

首先是源代碼:

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# about MNIST dataset
INPUT_NODE = 784
OUTPUT_NODE = 10

# about the Network
LAYER1_NODE = 500
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99

# an assistant function
# Given input and parameters, generate forward propogation.
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # if not using ExponentialMovingAverage class, directly use current as parm.
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    # else use avg_class.average compute EMA, then use for forward propogation.
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1))+avg_class.average(biases1))
        return tf.matmul(layer1,avg_class.average(weights2))+avg_class.average(biases2)




# training processes
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    #y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='add_1')

    # hidden layers parameters
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE], stddev = 0.1))
    biases1 = tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))

    # output layers parameters
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE], stddev = 0.1))
    biases2 = tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))

    # forward propogation without using EMA
    y = inference(x, None, weights1, biases1, weights2, biases2)

    # traning times
    global_step = tf.Variable(0, trainable=False)

    # given global_step, which will accelerate the process of early training
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    # tf.trainable_variables() gives collection of GraphKeys.TRAINABLE_VARIABLES
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # forward propogation with using EMA
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)

    # cross entropy loss function
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(y, tf.argmax(y_,1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    # l2 regularize loss
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularization

    #exponential decay rate settings
    # global_step, current iterate step
    # mnist.train.num_examples / BATCH_SIZE, steps needed for exhausting all data
    learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY)

    # optimize loss function
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # at one time refresh Weights and Exponential_Moving_Average
    # same as follow:
    # train_op = tf.group(train_step, variables_averages_op)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # examine results for true or negative.
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    # correct precision in this batch
    accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32) )

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        train_feed = {x: mnist.train.images, y:mnist.train.labels}
        validate_feed = {x: mnist.validation.images, y:mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels }

        print test_feed,"\n\n\n"
        print validate_feed,"\n\n\n"
        print train_feed,"\n\n\n"

        # iteratively train NN
        for i in range(TRAINING_STEPS):
            if i % 100 == 0:
                validate_acc = sess.run(accuracy, feed_dict = validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))

            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x: xs, y_: ys})

        # after training, validate final precision in testing dataset
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy using average model is %g" % (TRAINING_STEPS, test_acc) )

def main(argv=None):
    mnist = input_data.read_data_sets("/home/user9/DATA/MNIST_manual", one_hot = True)
    train(mnist)

# TensorFlow main app interaction, tf.app.run will call the main() function
if __name__ == '__main__':
    tf.app.run()

代碼理解

  • 首先是整體概覽

從這裏可以看出網絡只有三層:輸入層、隱層、輸出層:

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# Parameters
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500

把數據集按照每100個分成一組:

BATCH_SIZE = 100 

這裏制定了指數下降學習率的參數(基礎學習率,學習率的衰減率):

LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99

損失函數中權重正則項所佔的lambda,這個值一般通過cross_validation來選取,使得正確率達到最優:

REGULARIZATION_RATE = 0.0001

設定總訓練次數:

TRAINING_STEPS = 30000

滑動平均——可以看作訓練過程中,對於權重的平均值池化操作。
使用滑動平均模型的訓練過程得到的最終模型,在測試數據上具有更好的健壯性——即可以在wild數據上也可以取得好的效果。
滑動平均模型的衰減率:

MOVING_AVERAGE_DECAY = 0.99

構建前向網絡的輔助函數(參數:輸入,滑動平均類——可選,w1,b1,w2,b2. 可以支持選擇是否使用滑動平均模型):

def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2)

然後是訓練函數(需要輸入數據庫):

def train(mnist):

主函數,用於作爲單文件運行的時候:

def main(argv=None):
    mnist = input_data.read_data_sets( "/home/user9/DATA/MNIST_manual", one_hot = True)
    train(mnist)

如果是單文件運行時,執行main();否則不執行main(),作爲函數文件被調用。

if __name__ == '__main__':
    tf.app.run()
  • 然後看train(mnist)的內部。

接下來是我自己總結的簡化版本(相當於無語法規範的僞代碼,只是爲了方便理解。):

train():
    x = placeholder()
    y_ = placeholder()
    [W1, B1, W2, B2] = random()


# 兩種前向傳播方式
    # y ——不採用滑動平均
    y = inference(avr_cls = None)


    # average_y ——採用滑動平均
        #爲方便理解,這裏把global_step寫成cur_step,其實就是當前迭代的步數:一步輸入一個batch
    cur_step = 0 
        # 建立 滑動平均函數(類)
    var_aver = new_aver_class(MA_decay, cur_step) 
        # 實現 滑動平均 的操作
    var_aver_op = var_aver -> apply(  {W1, B1, W2, B2} )
        # 輸入了 滑動平均類,調用 "var_aver -> average()",從而採用 EMA 版本的前向傳播
    average_y = inference(avr_cls = var_aver)


# 後向傳播  
    # 平均交叉熵 loss項
    cross_entropy = softmax_cross_entropy(y, y_,1)
    cross_entropy_mean = mean( cross_entropy )

    # 權重 loss項
    regularizer = l2_regularizer( RGL_RATE )# 建立 正則函數
    regularization = regularizer( W1, W2 )

    # 總的 loss function
    loss = cross_entropy_mean + regularization


# 採用上述 指數下降的學習率 優化
    # 技巧:指數下降的學習率
    learning_rate = exponential_decay(
        LEARNING_RATE_BASE, 
        global_step, 
        mnist.train.num_examples / BATCH_SIZE, 
        LEARNING_RATE_DECAY)
    # 訓練 操作
    train_step = Optimizer( learning_rate ).minimize( loss, global_step )


# 訓練 和 滑動平均 操作 放在一塊,定義成新的操作:train_op
    train_op <--[ train_step, var_aver_op]

# 定義 accuracy 運算,得到的是一個 batch上 的正確率
    accuracy = mean( corrct_prediction )

# 初始化 Session,開始訓練
    with tf.Session() as sess:
        initialize all variables: W1,W2,B1,B2
        validate_feed = ...
        test_feed = ...

        # 迭代訓練階段
        for i in range(TR_STEPS):
            # 每1000次迭代 計算一次 accuracy
            validate_acc = sess.run( accuracy ,feed_dict = validate_feed )
            print validate_acc

            # 從 mnist 的 train 數據集上 選取 batch_size 個訓練數據
            xs, ys = mnist.train.next_batch( BATCH_SIZE )
            # 開始運行 train_op
            sess.run( train_op , feed_dict = {x: xs; y_: ys})

        # 訓練完成後,在測試集上計算 正確率
        test_acc = sess.run( accuracy , feed_dict = test_feed )
        print test_acc
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章