說明:
- 本文主要是將TensorFlow例程拿過來解釋了一下
- 首先是數據的下載,注意下載的時候可能會出錯,但是網上有很多其他的下載網址可用
- 1 加載mnist數據集
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
- 2 清除圖形堆棧
tf.reset_default_graph()
- 3 參數和輸入輸出佔位
tf.Variable和tf.placeholder
- 4 配置前向輸出
tf.nn.softmax(tf.matmul(x, W) + b)
- 5 配置交叉熵(或loss)與梯度下降
tf.reduce_mean, tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
- 6 設置訓練的參數設置
- 7 啓動訓練
- 8 測試
- 9 保存訓練模型
- 10 加載訓練模型
- 11 重新測試
一、數據集mnist下載
import tensorflow as tf #導入tensorflow庫
from tensorflow.examples.tutorials.mnist import input_data
import pylab
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # 下載並加載數據
# print('訓練集:', mnist.train.images) # 訓練集
print('訓練集shape:', mnist.train.images.shape) # 輸入數據打印shape: (55000, 784)
im = mnist.train.images[1]
im = im.reshape(-1, 28)
pylab.imshow(im)
pylab.show()
print('測試集shape:', mnist.test.images.shape) # 輸入數據打印shape: (10000, 784) 測試集
print('輸入數據打印shape:', mnist.validation.images.shape)
print("訓練數據總數:" , mnist.train.num_examples) # 550 00
print("測試數據總數:" , mnist.test.num_examples) # 1w
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
訓練集shape: (55000, 784)
測試集shape: (10000, 784)
輸入數據打印shape: (5000, 784)
訓練數據總數: 55000
測試數據總數: 10000
二、0-9分類訓練
import tensorflow as tf #導入tensorflow庫
from tensorflow.examples.tutorials.mnist import input_data
import pylab
# 1 加載數據
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
print("訓練數據總數:" , mnist.train.num_examples)
print(mnist.test.nu)
# 2 函數用於清除默認圖形堆棧並重置全局默認圖形
tf.reset_default_graph()
# 3 tf Graph Input 輸入輸出佔位
x = tf.placeholder(tf.float32, [None, 784]) # mnist data維度 28*28=784
y = tf.placeholder(tf.float32, [None, 10]) # 0-9 數字=> 10 classes
# 4 Set model weights 權重佔位,
W = tf.Variable(tf.random_normal([784, 10]))
b = tf.Variable(tf.zeros([10]))
# 5 前向輸出
pred = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax分類 # 激活函數,輸出爲總輸出的概率, 總和爲1,
# 6 生成的pred與樣本標籤y進行一次交叉熵運算, 然後在取平均值, 注意這裏的交叉熵運行在上一節已經提到過
# 將這個結果作爲一次正向傳播的誤差, 通過梯度下降的優化方法找到能夠使這個誤差最小化的b,w偏移量
# 更細b,w參數, 使其調整爲合適的參數
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
# 7 使用梯度下降優化器
learning_rate = 0.01 # 學習率, 對於梯度下降法的輸入參數
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# 8 訓練參數設置
training_epochs = 25
batch_size = 100
display_step = 1
saver = tf.train.Saver()
model_path = "log/521model.ckpt"
# 9 啓動session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())# Initializing OP
# 啓動循環開始訓練
for epoch in range(training_epochs): # 循環25次 25*550次循環,每次循環喂100
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size) # total_batch=55000/100=550 一次訓練
# 注意下面的for是訓練一遍數據的
# 遍歷全部數據集
for _1 in range(total_batch): # 1 - 550
batch_xs, batch_ys = mnist.train.next_batch(batch_size) # 拿出來100個餵給訓練獸
# Run optimization op (backprop) and cost op (to get loss value)
_2, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
y: batch_ys})
# Compute average loss 計算平均loss值,
avg_cost += (c / total_batch)
# 顯示訓練中的詳細信息
if (epoch+1) % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
print( " Finished!")
# 測試 model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# 計算準確率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
# Save model weights to disk
save_path = saver.save(sess, model_path)
print("Model saved in file: %s" % save_path)
Epoch: 0001 cost= 8.740313732
...
Epoch: 0025 cost= 0.849729373
Finished!
Accuracy: 0.8294
Model saved in file: log/521model.ckpt
'''
#讀取模型
print("Starting 2nd session...")
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess, model_path)
# 測試 model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# 計算準確率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
output = tf.argmax(pred, 1)
batch_xs, batch_ys = mnist.train.next_batch(2)
outputval,predv = sess.run([output,pred], feed_dict={x: batch_xs})
print(outputval,predv,batch_ys)
im = batch_xs[0]
im = im.reshape(-1,28)
pylab.imshow(im)
pylab.show()
im = batch_xs[1]
im = im.reshape(-1,28)
pylab.imshow(im)
pylab.show()
'''