MNIST訓練TensorFlow三層網絡
首先是源代碼:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# about MNIST dataset
INPUT_NODE = 784
OUTPUT_NODE = 10
# about the Network
LAYER1_NODE = 500
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
# an assistant function
# Given input and parameters, generate forward propogation.
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
# if not using ExponentialMovingAverage class, directly use current as parm.
if avg_class == None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
return tf.matmul(layer1, weights2) + biases2
# else use avg_class.average compute EMA, then use for forward propogation.
else:
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1))+avg_class.average(biases1))
return tf.matmul(layer1,avg_class.average(weights2))+avg_class.average(biases2)
# training processes
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
#y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='add_1')
# hidden layers parameters
weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE], stddev = 0.1))
biases1 = tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))
# output layers parameters
weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE], stddev = 0.1))
biases2 = tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))
# forward propogation without using EMA
y = inference(x, None, weights1, biases1, weights2, biases2)
# traning times
global_step = tf.Variable(0, trainable=False)
# given global_step, which will accelerate the process of early training
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
# tf.trainable_variables() gives collection of GraphKeys.TRAINABLE_VARIABLES
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# forward propogation with using EMA
average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
# cross entropy loss function
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(y, tf.argmax(y_,1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# l2 regularize loss
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
regularization = regularizer(weights1) + regularizer(weights2)
loss = cross_entropy_mean + regularization
#exponential decay rate settings
# global_step, current iterate step
# mnist.train.num_examples / BATCH_SIZE, steps needed for exhausting all data
learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY)
# optimize loss function
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# at one time refresh Weights and Exponential_Moving_Average
# same as follow:
# train_op = tf.group(train_step, variables_averages_op)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
# examine results for true or negative.
correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
# correct precision in this batch
accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32) )
with tf.Session() as sess:
tf.initialize_all_variables().run()
train_feed = {x: mnist.train.images, y:mnist.train.labels}
validate_feed = {x: mnist.validation.images, y:mnist.validation.labels}
test_feed = {x: mnist.test.images, y_: mnist.test.labels }
print test_feed,"\n\n\n"
print validate_feed,"\n\n\n"
print train_feed,"\n\n\n"
# iteratively train NN
for i in range(TRAINING_STEPS):
if i % 100 == 0:
validate_acc = sess.run(accuracy, feed_dict = validate_feed)
print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op, feed_dict={x: xs, y_: ys})
# after training, validate final precision in testing dataset
test_acc = sess.run(accuracy, feed_dict=test_feed)
print("After %d training step(s), test accuracy using average model is %g" % (TRAINING_STEPS, test_acc) )
def main(argv=None):
mnist = input_data.read_data_sets("/home/user9/DATA/MNIST_manual", one_hot = True)
train(mnist)
# TensorFlow main app interaction, tf.app.run will call the main() function
if __name__ == '__main__':
tf.app.run()
代碼理解
- 首先是整體概覽
從這裏可以看出網絡只有三層:輸入層、隱層、輸出層:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# Parameters
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
把數據集按照每100個分成一組:
BATCH_SIZE = 100
這裏制定了指數下降學習率的參數(基礎學習率,學習率的衰減率):
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
損失函數中權重正則項所佔的lambda,這個值一般通過cross_validation來選取,使得正確率達到最優:
REGULARIZATION_RATE = 0.0001
設定總訓練次數:
TRAINING_STEPS = 30000
滑動平均——可以看作訓練過程中,對於權重的平均值池化操作。
使用滑動平均模型的訓練過程得到的最終模型,在測試數據上具有更好的健壯性——即可以在wild數據上也可以取得好的效果。
滑動平均模型的衰減率:
MOVING_AVERAGE_DECAY = 0.99
構建前向網絡的輔助函數(參數:輸入,滑動平均類——可選,w1,b1,w2,b2. 可以支持選擇是否使用滑動平均模型):
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2)
然後是訓練函數(需要輸入數據庫):
def train(mnist):
主函數,用於作爲單文件運行的時候:
def main(argv=None):
mnist = input_data.read_data_sets( "/home/user9/DATA/MNIST_manual", one_hot = True)
train(mnist)
如果是單文件運行時,執行main();否則不執行main(),作爲函數文件被調用。
if __name__ == '__main__':
tf.app.run()
- 然後看train(mnist)的內部。
接下來是我自己總結的簡化版本(相當於無語法規範的僞代碼,只是爲了方便理解。):
train():
x = placeholder()
y_ = placeholder()
[W1, B1, W2, B2] = random()
# 兩種前向傳播方式
# y ——不採用滑動平均
y = inference(avr_cls = None)
# average_y ——採用滑動平均
#爲方便理解,這裏把global_step寫成cur_step,其實就是當前迭代的步數:一步輸入一個batch
cur_step = 0
# 建立 滑動平均函數(類)
var_aver = new_aver_class(MA_decay, cur_step)
# 實現 滑動平均 的操作
var_aver_op = var_aver -> apply( {W1, B1, W2, B2} )
# 輸入了 滑動平均類,調用 "var_aver -> average()",從而採用 EMA 版本的前向傳播
average_y = inference(avr_cls = var_aver)
# 後向傳播
# 平均交叉熵 loss項
cross_entropy = softmax_cross_entropy(y, y_,1)
cross_entropy_mean = mean( cross_entropy )
# 權重 loss項
regularizer = l2_regularizer( RGL_RATE )# 建立 正則函數
regularization = regularizer( W1, W2 )
# 總的 loss function
loss = cross_entropy_mean + regularization
# 採用上述 指數下降的學習率 優化
# 技巧:指數下降的學習率
learning_rate = exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY)
# 訓練 操作
train_step = Optimizer( learning_rate ).minimize( loss, global_step )
# 訓練 和 滑動平均 操作 放在一塊,定義成新的操作:train_op
train_op <--[ train_step, var_aver_op]
# 定義 accuracy 運算,得到的是一個 batch上 的正確率
accuracy = mean( corrct_prediction )
# 初始化 Session,開始訓練
with tf.Session() as sess:
initialize all variables: W1,W2,B1,B2
validate_feed = ...
test_feed = ...
# 迭代訓練階段
for i in range(TR_STEPS):
# 每1000次迭代 計算一次 accuracy
validate_acc = sess.run( accuracy ,feed_dict = validate_feed )
print validate_acc
# 從 mnist 的 train 數據集上 選取 batch_size 個訓練數據
xs, ys = mnist.train.next_batch( BATCH_SIZE )
# 開始運行 train_op
sess.run( train_op , feed_dict = {x: xs; y_: ys})
# 訓練完成後,在測試集上計算 正確率
test_acc = sess.run( accuracy , feed_dict = test_feed )
print test_acc