Tensorflow 學習筆記(三):MNIST數字識別
前言
MNIST手寫數字識別是一個非常經典入門的深度學習的實驗,跟着《 Tensorflow:實戰Google深度學習框架》第五章學習如何利用Tensorflow框架完成這個實驗。
我發現每章內容知識點繁多,如果只是刷一遍不用也忘了,決定以小節爲單位慢慢刷書,後面要用到哪個再慢慢看。
最近還有各種算法要學,得趕快投入學理論知識。
慢慢更新…
MNIST數據
下載MNIST數據
-
首先你可以選擇去YannLeCun教授的網站下載用來訓練和測試的MNIST數據集。
-
或者利用Tensorflow提供的一個類下載並處理MNIST數據。
詳見下面代碼.
MNIST數據的處理
from tensorflow.examples.tutorials.mnist import input_data
#從指定路徑./data/載入數據,如果沒有就會從上面網址進行下載,耐心等待一會。
minist = input_data.read_data_sets("./data/" , one_hot = True)
#打印訓練數據大小
print("Traninig data size: ", minist.train.num_examples)
#打印驗證集數據大小
print("Validating data size: ", minist.validation.num_examples)
#打印測試集數據大小
print("Testing data size: ",minist.test.num_examples)
#打印訓練集第一個數據
print("Example training data : ",minist.train.images[0])
#打印訓練集第一個數據的結果
print("Example labels data : ",minist.train.labels[0])
上面代碼,通過input_data.read_data_sets
函數生成的類會自動將MNIST數據分成train
,validation
,test
三個數據集。Minist數據集是由大小爲28x28的手寫數字圖片構成,所以處理後的每一張圖片變成一個長度爲784(28x28)的一維數組。
同時爲了方便使用隨機梯度下降進行優化求解,input_data.read_data_sets
函數生成的類提供了mnist.train.next_batch(batch_size)
函數。可以從所有訓練數據裏讀取指定batch_size
大小的數據作爲一個訓練batch。
神經網絡模型訓練及不同模型結果的對比
【滑動平均模型】,【指數衰減模型的學習率】,【使用正則化】帶來的正確率提升不是要很明顯。這是由於滑動平均模型和指數衰減模型的學習率在一定程度上都是限制神經網絡中的參數更新速度,然而在MNIST數據上,因爲模型收斂速度很快,所以這兩種優化對最終模型的影響不大。
Tensorflow訓練神經網絡
這個代碼裏有上一章提到的所有的優化的模塊。說實話我看到這個代碼,【滑動平均模型】,【指數衰減模型的學習率】由於原理不是很懂,看到是很懵逼的,不管了,先當黑盒學習吧(弱)。其他的仔細閱讀註釋問題不大。
#!/usr/bin/env python
# encoding: utf-8
'''
@author: MrYx
@github: https://github.com/MrYxJ
@file: 全模型.py
@time: 18-12-15 下午10:25
'''
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE = 784 # 輸入節點
OUTPUT_NODE = 10 # 輸出節點
LAYER1_NODE = 800 # 隱藏層神經員個數
BATCH_SIZE = 100 # 每次batch打包的樣本個數
# 模型相關的參數
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARAZTION_RATE = 0.0001
TRAINING_STEPS = 50000
MOVING_AVERAGE_DECAY = 0.99
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
# 不使用滑動平均類
if avg_class == None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
return tf.matmul(layer1, weights2) + biases2
else:
# 使用滑動平均類
layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
# 生成隱藏層的參數。
weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
y = inference(x, None, weights1, biases1, weights2, biases2)
# 定義訓練輪數及相關的滑動平均類
global_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
# 計算交叉熵及其平均值
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
regularaztion = regularizer(weights1) + regularizer(weights2)
loss = cross_entropy_mean + regularaztion
# 設置指數衰減的學習率。
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# 反向傳播更新參數和更新每一個參數的滑動平均值
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
# 計算正確率
correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 初始化會話,並開始訓練過程。
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
test_feed = {x: mnist.test.images, y_: mnist.test.labels}
# 循環的訓練神經網絡。
for i in range(TRAINING_STEPS):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op, feed_dict={x: xs, y_: ys})
test_acc = sess.run(accuracy, feed_dict=test_feed)
print(("After %d training step(s), test accuracy using average model is %g" % (TRAINING_STEPS, test_acc)))
def main(argv=None):
mnist = input_data.read_data_sets("data/", one_hot=True)
train(mnist)
if __name__=='__main__':
main()
下面是去除【滑動平均模型】和隱藏層的代碼,可以運行起來和上面對比一下結果差異:
#!/usr/bin/env python
# encoding: utf-8
'''
@author: MrYx
@github: https://github.com/MrYxJ
@file: 無滑動模型優化-激活函數-sigmoid.py
@time: 19-1-4 下午4:15
'''
#!/usr/bin/env python
# encoding: utf-8
'''
@author: MrYx
@github: https://github.com/MrYxJ
@file: 無滑動模型優化.py
@time: 18-12-19 下午9:52
'''
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
INPUT_NODE = 784 # 輸入節點
OUTPUT_NODE = 10 # 輸出節點
LAYER1_NODE = 800 # 隱藏層神經員個數
BATCH_SIZE = 100 # 每次batch打包的樣本個數
# 模型相關的參數
LEARNING_RATE_BASE = 0.8
REGULARAZTION_RATE = 0.0001
TRAINING_STEPS = 50000
def train(mnist):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
# 生成隱藏層的參數。
weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, OUTPUT_NODE], stddev=0.1))
biases1 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
# weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
# biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
layer1 = tf.nn.sigmoid(tf.matmul(x, weights1) + biases1)
y = layer1
# 定義訓練輪數及相關的滑動平均類
global_step = tf.Variable(0, trainable=False)
# 計算交叉熵及其平均值
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
regularaztion = regularizer(weights1) + regularizer(weights1)
loss = cross_entropy_mean + regularaztion
# 設置指數衰減的學習率。
# learning_rate = tf.train.exponential_decay(
# LEARNING_RATE_BASE,
# global_step,
# mnist.train.num_examples / BATCH_SIZE,
# LEARNING_RATE_DECAY,
# staircase=True)
train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE_BASE).minimize(loss, global_step=global_step)
# 計算正確率
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 初始化會話,並開始訓練過程。
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
test_feed = {x: mnist.test.images, y_: mnist.test.labels}
# 循環的訓練神經網絡。
for i in range(TRAINING_STEPS):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_step, feed_dict={x: xs, y_: ys})
test_acc = sess.run(accuracy, feed_dict=test_feed)
print(("After %d training step(s), test accuracy using average model is %g" % (TRAINING_STEPS, test_acc)))
def main(argv=None):
mnist = input_data.read_data_sets("data/", one_hot=True)
train(mnist)
if __name__=='__main__':
main()
變量管理
主要利用命令空間:variable_scope()
和get_variable('')
函數來管理獲取變量,達到簡化函數參數的寫法,大大提高了程序的可讀性。
get_variable()
和Variable()
創建變量的過程是一樣的。
#下面這兩個定義等價
v = tf.get_variable("v",shape=[1],initalizer=tf.constant_initializer(1.0))
v = tf.Variable(tf.constant(1.0, shape=[1]),name="v")
他兩最大的區別在於,指定變量名稱的參數。對於Variable
函數變量名稱是一個可選的參數,但是對於get_variable
變量名稱是一個必填的參數,它會根據這個名字去創建或者獲取變量。
如果是獲取一個已創建的變量需要通過variable_scope
函數來生成一個上下文管理器,並明確指定在這個上下文管理器裏,並注意其參數reuse
來決定獲取是否是已經創建好的變量。
import tensorflow as tf
with tf.variable_scope("layer1",reuse = False):
v1 = tf.get_variable('v',[1])
print(v1.name) # layer1/v:0
在這裏命令了一個名稱爲’layer1’的命名空間,reuse
參數的bool值決定tf.get_variable()
獲取的是已經聲明的變量還是新建變量,reuse
默認爲Fasle,表示創建一個新的變量。
TensorFlow模型持久化
神經網絡的訓練往往時間很長,Tensorflow提供一個非常簡單的API來保存和還原神經網絡的模型,實現邊訓練邊測試效果。這個API 就是tf.train.Saver
類。
Tensorflow通過saver.save()
函數將模型保存到本地後綴爲.ckpt
的文件中,雖然路徑只指名了一個文件路徑,但文件目錄下會出現四個文件。
這是因爲TensorFlow會將計算圖的結構和圖上參數取值分開保存。
import tensorflow as tf
v1 = tf.Variable(tf.random_normal([1], stddev=1, seed=1))
v2 = tf.Variable(tf.random_normal([1], stddev=1, seed=1))
result = v1 + v2
init_op = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init_op)
#將模型保存到/path/to/model/model.ckpt
saver.save(sess, "Saved_model/model.ckpt")
# 加載保存的模型
with tf.Session() as sess:
saver.restore(sess, "Saved_model/model.ckpt")
print(sess.run(result))
本地saved_model
文件夾內會生成三個文件:
model.ckpt.meta
保存了Tensorflow計算圖結構。model.ckpt
保存了程序中每個變量的取值。checkpoint
文件保存了一個目錄下所有模型文件的列表。
持久化原理及數據格式
當調用saver.save
函數時,Tensorflow程序會自動生成四個文件。TensorFlow模型持久化就是通過這四個文件完成的。首先Tensorflow是通過元圖(MetaGraph)來記錄計算圖中的節點的信息以及運行計算圖中節點所需要的元數據。
(待補充)
加入持久化的MNIST實踐樣例程序
上一節已經給出加入所有優化的完整Mnist手寫識別實驗的程序。然而沒有加入持久化過程,當程序中途退出時,之前沒有保存的模型再也無法使用。一般神經網絡訓練時間比較長,保存訓練中間的結果是非常有必要。我們將上一節程序分成訓練和測試兩個獨立的過程,這樣使得每一個組件更加靈活。比如訓練模型可以持續輸出訓練的模型,測試模型每隔一段時間檢驗最新的模型的正確率。
重構代碼分成3個程序:
第一個是mnist_inference.py
程序,定義了前向傳播以及神經網絡中參數初始化的過程。
#!/usr/bin/env python
# encoding: utf-8
'''
@author: MrYx
@github: https://github.com/MrYxJ
@file: mnist_inference.py
@time: 19-1-17 上午10:28
'''
import tensorflow as tf
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
def get_weight_variable(shape, regularizer):
weights = tf.get_variable("weights", shape,
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses', regularizer(weights))
return weights
def inference(input_tensor, regularizer):
with tf.variable_scope('layer1'):
weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
with tf.variable_scope('layer2'):
weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1, weights) + biases
return layer2
第二個是mnist_train.py
,定義了神經網絡訓練的過程。
#!/usr/bin/env python
# encoding: utf-8
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
INPUT_NODE = 784 # 輸入節點
OUTPUT_NODE = 10 # 輸出節點
LAYER1_NODE = 800 # 隱藏層神經員個數
BATCH_SIZE = 100 # 每次batch打包的樣本個數
# 模型相關的參數
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "MNIST_model/"
MODEL_NAME = "mnist_model"
def train(mnist):
# 定義輸入輸出placeholder。
x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
y = mnist_inference.inference(x, regularizer)
global_step = tf.Variable(0, trainable=False)
# 定義損失函數、學習率、滑動平均操作以及訓練過程。
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
# 初始化TensorFlow持久化類。
saver = tf.train.Saver()
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(TRAINING_STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
if i % 1000 == 0:
print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
def main(argv=None):
mnist = input_data.read_data_sets("data/", one_hot=True)
train(mnist)
if __name__=='__main__':
tf.app.run()
第三個是mnist_eval.py
#!/usr/bin/env python
# encoding: utf-8
'''
@author: MrYx
@github: https://github.com/MrYxJ
@file: mnist_eval.py
@time: 19-1-17 下午8:30
'''
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
import mnist_train
# 加載的時間間隔。
EVAL_INTERVAL_SECS = 10
def evaluate(mnist):
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
y = mnist_inference.inference(x, None)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
variable_averages = tf.train.ExponentialMovingAverage(mnist_train.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
while True:
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
accuracy_score = sess.run(accuracy, feed_dict=validate_feed)
print("After %s training step(s), validation accuracy = %g" % (global_step, accuracy_score))
else:
print('No checkpoint file found')
return
time.sleep(EVAL_INTERVAL_SECS)
def main(argv=None):
mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
evaluate(mnist)
if __name__ == '__main__':
tf.app.run()