《TensorFlow實戰Google深度學習架構》——MNIST數字識別問題勘誤

最近在學習《TensorFlow實戰Google深度學習架構》這本書，上面的代碼大部分都跑不通，真是一言難盡啊...

搜了好多資料，好容易把MNIST數字識別問題這段代碼改好了，自己寫個筆記紀念一下吧。

-----------------------------------------

# 導入必要的庫
import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data

# 下載數據，打印數據信息
mnist = input_data.read_data_sets('/MNIST_data/', one_hot=True)
print("Training data size: ", mnist.train.num_examples)
print("Validating data size: ", mnist.validation.num_examples)
print("Testing data size: ", mnist.test.num_examples)
print("Example training data: ", mnist.train.images[0] )
print("Example training data label: ", mnist.train.labels[0])

# 聲明全局變量
INPUT_NODE = 784  # 輸入層節點數，圖片是28*28*1的格式，每個像素點對應一個節點就是784
OUTPUT_NODE = 10  # 輸出層節點數，0-9十個數字

LAYER1_NODE = 500  # 第一個隱藏層的節點數

BATCH_SIZE = 100  # batch的大小，越大訓練過程越接近梯度下降，越小越接近隨機梯度下降

LEARNING_RATE_BASE = 0.8  # 基礎的學習率
LEARNING_RATE_DECAY = 0.99  # 學習率的衰減值

REGULARIZATION_RATE = 0.0001  # 正則化的λ係數
TRAINING_STEPS = 30000  # 訓練的輪數
MOVING_AVERAGE_DECAY = 0.99  # 滑動平均衰減率

def get_weight_variable(shape, regualrizer):
    # get_variable()獲取這個參數的現有變量或創建一個新變量。獲取的參數根據"name"指定
    # 生成的值服從具有指定平均值和標準偏差的正態分佈，
    # 如果生成的值大於平均值2個標準偏差的值則丟棄重新選擇。
    # stddev 要生成的隨機值的標準偏差
    weights = tf.get_variable("weights", shape, 
                              initializer=tf.random_normal_initializer(stddev=0.1))
    if regualrizer != None:
        # 傳入的參數regualrizer是一個函數
        # 如果定義了正則化函數（L1或者L2），則計算weights的正則化參數，並加入
        # 名爲“losses”的集合
        tf.add_to_collection("losses", regualrizer(weights))
    return weights


def inference(x, regularizer):
    """
    輔助函數，給定神經網絡的輸入和所有參數，計算向前傳播的結果
    定義了一個relu激活的三層全連接網絡(輸入層，隱藏層，輸出層)
    """
    # variable_scope()用於定義創建變量（層）的操作的上下文管理器。此上下文管理器驗證（可選）的
    # values來自同一圖形，確保圖形是默認圖形，並推送名稱範圍和變量範圍
    with tf.variable_scope('layer1', reuse = False):
        weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
        biases = tf.get_variable("biases", [LAYER1_NODE], 
                                 initializer=tf.constant_initializer(0.0))
        layer1 = tf.nn.relu(tf.matmul(x, weights) + biases)

    with tf.variable_scope('layer2', reuse = False):
        weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
        biases = tf.get_variable("biases", [OUTPUT_NODE], 
                                 initializer=tf.constant_initializer(0.0))
        layer2 = tf.matmul(layer1, weights) + biases

    return layer2


def train(mnist):
    """訓練模型"""
    x = tf.placeholder(tf.float32, shape=[None, INPUT_NODE], name="x-input")
    y_ = tf.placeholder(tf.float32, shape=[None, OUTPUT_NODE], name="y-input")

    # 定義正則化的函數
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    # 向前傳播求出y
    y = inference(x, regularizer)
    # 定義訓練的輪數，需要用trainable=False參數指定不訓練這個變量，
    # 這樣同時也可以避免這個變量被計算滑動平均值
    global_step = tf.Variable(0, trainable=False)

    # 給定滑動平均衰減速率和訓練輪數，初始化滑動平均類
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,
                                                          global_step)
    # 用tf.trainable_variable()獲取所有可以訓練的變量列表，全部使用滑動平均
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # 定義損失函數
    # 因爲標準答案是一個長度爲10的一維數組，argmax可以從這個矩陣（y_）的軸爲1的部分取最大值的序號
    # 在sparse_softmax_cross_entropy_with_logits()中，要將原來爲one-hot形式的labels
    # 轉換爲數字標籤[1],[2],...的格式。
    # tf.argmax(Y,asix)，axis = 0 或 1，分別表示按列或按行返回最大值的序號。
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,
                                                                   labels=tf.argmax(y_, 1))
    # 獲取總損失平均值
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    # 給損失加上正則化的損失
    # 使用get_collection獲取losses集合的全部值的列表，然後用add_n求列表的所有值的和
    loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))

    # 求加上指數衰減的學習率
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase = True
    )

    # 優化損失函數
    # global_step初始值爲0，在loss更新後會+1，用來記錄更新的次數
    # 返回值是訓練之後的梯度，會隨着global_step遞增
    train_step = tf.train.GradientDescentOptimizer(
        learning_rate).minimize(loss, global_step=global_step)

    # 反向傳播更新參數之後需要更新每一個參數的滑動平均值，用下面的代碼可以一次完成這兩個操作
    # train_step計算所有參數的梯度，variables_averages_op對所有參數進行滑動平均（利用train_step）
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name="train")

    # y是計算得出的預測答案，而y_是正確答案，用argmax獲取答案的序號（也即是數字的值）
    # equal()判斷兩個答案是否相等，是就返回True，否就返回False
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    # cast()把一個布爾類型的數轉換爲實數，然後用reduce_mean計算平均值，獲取準確率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.Session() as sess:
        # 初始化全局變量
        tf.global_variables_initializer().run()
        
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels}
        # 開始迭代
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x:xs, y_:ys})

            # tensorflow的數據集特有的一種batch_size獲取方法
            if i % 1000 == 0:
                # 獲取計算之後的loss和global_step
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d traing times, validate accuracy using average model is %g"
                     % (i, validate_acc)) 

        # 使用模型訓練測試集，獲取最終的準確率
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d traing times, test accuracy using average model is %g" 
              % (TRAINING_STEPS, test_acc))

# 主函數定義

def main(argv=None):
    tf.reset_default_graph()
    mnist = input_data.read_data_sets('/MNIST_data/', one_hot=True)
    train(mnist)


if __name__ == "__main__":
    main()

《TensorFlow實戰Google深度學習架構》——MNIST數字識別問題勘誤

《Sparse and Redundant Representations：From Theory to Applications in Signal and Image Processing》序論

《Sparse and Redundant Representations》第六章迭代收縮算法

《Sparse and Redundant Representations》第五章從精確解到近似解

Matlab運行時出現提示 too many input arguments問題

《Sparse and Redundant Representations》第二章：唯一性與不確定性

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結