Tensorflow: MNIST 专家级



  • 读取训练/测试数据集(MNIST)
  • 定义神经网络架构
  • 定义损失函数和优化方法
  • 根据数据批次训练神经网络
  • 评估测试数据的性能




import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

def train_network(training_data, labels, output, keep_prob=tf.placeholder(tf.float32)):
    learning_rate = 1e-4
    steps_number = 1000
    batch_size = 100

    # Read data
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

    # Define the loss function
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=output))

    # Training step
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    # Accuracy calculation
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Run the training
    sess = tf.InteractiveSession()

    for i in range(steps_number):
        # Get the next batch
        input_batch, labels_batch = mnist.train.next_batch(batch_size)

        # Print the accuracy progress on the batch every 100 steps
        if i%100 == 0:
            train_accuracy = accuracy.eval(feed_dict={training_data: input_batch, labels: labels_batch, keep_prob: 1.0})
            print("Step %d, training batch accuracy %g %%"%(i, train_accuracy*100))

        # Run the training step
        train_step.run(feed_dict={training_data: input_batch, labels: labels_batch, keep_prob: 0.5})

    print("The end of training!")

    # Evaluate on the test set
    test_accuracy = accuracy.eval(feed_dict={training_data: mnist.test.images, labels: mnist.test.labels, keep_prob: 1.0})
    print("Test accuracy: %g %%"%(test_accuracy*100))

其中dense.py包括简单的一个输出层网络,该网络与初学者教程中介绍的相同。在该场景的后面,我们将创建一个更复杂的模型。 dense.py代码如下:

import tensorflow as tf

image_size = 28
labels_size = 10

# Define placeholders
training_data = tf.placeholder(tf.float32, [None, image_size*image_size])
labels = tf.placeholder(tf.float32, [None, labels_size])

# Variables to be tuned
W = tf.Variable(tf.truncated_normal([image_size*image_size, labels_size], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[labels_size]))

# Build the network (only output layer)
output = tf.matmul(training_data, W) + b

# Train & test the network
import training
training.train_network(training_data, labels, output)


python dense.py

W_h = tf.Variable(tf.truncated_normal([image_size*image_size, hidden_size], stddev=0.1)) b_h = tf.Variable(tf.constant(0.1, shape=[hidden_size]))


hidden = tf.nn.relu(tf.matmul(training_data, W_h) + b_h)


W = tf.Variable(tf.truncated_normal([hidden_size, labels_size], stddev=0.1)) b = tf.Variable(tf.constant(0.1, shape=[labels_size]))

output = tf.matmul(hidden, W) + b


python hidden.py

import tensorflow as tf

image_size = 28
labels_size = 10
hidden_size = 1024

# Define placeholders
training_data = tf.placeholder(tf.float32, [None, image_size*image_size])
labels = tf.placeholder(tf.float32, [None, labels_size])

# Variables for the hidden layer
W_h = tf.Variable(tf.truncated_normal([image_size*image_size, hidden_size], stddev=0.1))
b_h = tf.Variable(tf.constant(0.1, shape=[hidden_size]))

# Hidden layer with reLU activation function
hidden = tf.nn.relu(tf.matmul(training_data, W_h) + b_h)

# Variables for the output layer
W = tf.Variable(tf.truncated_normal([hidden_size, labels_size], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[labels_size]))

# Connect hidden to the output layer
output = tf.matmul(hidden, W) + b

# Train & test the network
import training
training.train_network(training_data, labels, output)


接下来要添加的两层是卷积网络的组成部分。它们的工作方式不同于密集的,并且在二维或更多维度输入中表现尤其出色。卷积层参数是卷积窗口和步幅的大小。填充设置为'SAME'表示所得图层的大小相同。在此步骤之后,我们应用max pooling。我们将构建两个卷积层,并将其连接到密集的隐藏层。生成的体系结构可以如下所示:



training_data = tf.placeholder(tf.float32, [None, image_size*image_size]) training_images = tf.reshape(training_data, [-1, image_size, image_size, 1]) labels = tf.placeholder(tf.float32, [None, labels_size])


W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1)) b_conv1 = tf.Variable(tf.constant(0.1, shape=[32]))

conv1 = tf.nn.relu(tf.nn.conv2d(training_images, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1) pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1)) b_conv2 = tf.Variable(tf.constant(0.1, shape=[64]))

conv2 = tf.nn.relu(tf.nn.conv2d(pool1, W_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2) pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])




W_h = tf.Variable(tf.truncated_normal([7 * 7 * 64, hidden_size], stddev=0.1)) b_h = tf.Variable(tf.constant(0.1, shape=[hidden_size]))

hidden = tf.nn.relu(tf.matmul(pool2_flat, W_h) + b_h)




keep_prob = tf.placeholder(tf.float32) hidden_drop = tf.nn.dropout(hidden, keep_prob)


W = tf.Variable(tf.truncated_normal([hidden_size, labels_size], stddev=0.1)) b = tf.Variable(tf.constant(0.1, shape=[labels_size]))

output = tf.matmul(hidden_drop, W) + b


python convolutional.py
import tensorflow as tf

image_size = 28
labels_size = 10
hidden_size = 1024

# Define placeholders
training_data = tf.placeholder(tf.float32, [None, image_size*image_size])
training_images = tf.reshape(training_data, [-1, image_size, image_size, 1])

labels = tf.placeholder(tf.float32, [None, labels_size])

# 1st convolutional layer variables
W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1))
b_conv1 = tf.Variable(tf.constant(0.1, shape=[32]))

# 1st convolution & max pooling
conv1 = tf.nn.relu(tf.nn.conv2d(training_images, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1)
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# 2nd convolutional layer variables
W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1))
b_conv2 = tf.Variable(tf.constant(0.1, shape=[64]))

# 2nd convolution & max pooling
conv2 = tf.nn.relu(tf.nn.conv2d(pool1, W_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2)
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# Flatten the 2nd convolution layer
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])

#Variables for the hidden dense layer
W_h = tf.Variable(tf.truncated_normal([7 * 7 * 64, hidden_size], stddev=0.1))
b_h = tf.Variable(tf.constant(0.1, shape=[hidden_size]))

# Hidden layer with reLU activation function
hidden = tf.nn.relu(tf.matmul(pool2_flat, W_h) + b_h)

# Dropout
keep_prob = tf.placeholder(tf.float32)
hidden_drop = tf.nn.dropout(hidden, keep_prob)

# Variables to be tuned
W = tf.Variable(tf.truncated_normal([hidden_size, labels_size], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[labels_size]))

# Connect hidden to the output layer
output = tf.matmul(hidden_drop, W) + b

# Train & test the network
import training
training.train_network(training_data, labels, output, keep_prob)



