第4.4節介紹的是通過卷積神經網絡來分類MNIST數據集,而之前2.4節是一個簡單的全連接層組成的神經網絡。
import numpy as np
import tensorflow as tf
import tensorlayer as tl
from tensorlayer .layers.core import set_keep
import time
X_train, y_train, X_val, y_val, X_test, y_test = \
tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))
sess = tf.InteractiveSession()
batch_size = 128
x = tf.placeholder(tf.float32, shape=[batch_size, 28, 28, 1])
y_ = tf.placeholder(tf.int64, shape=[batch_size])
network = tl.layers.InputLayer(x, name='input')
network = tl.layers.Conv2d(network, 32, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn1')
network = tl.layers.MaxPool2d(network, (2, 2), (2, 2), padding='SAME', name='pool1')
network = tl.layers.Conv2d(network, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn2')
network = tl.layers.MaxPool2d(network, (2, 2), (2, 2), padding='SAME', name='pool2')
network = tl.layers.FlattenLayer(network, name='flatten')
network = tl.layers.DropoutLayer(network, keep=0.5, name='drop1')
network = tl.layers.DenseLayer(network, 256, act=tf.nn.relu, name='relu1')
network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
network = tl.layers.DenseLayer(network, 10, act=tf.identity, name='output')
y = network.outputs
cost = tl.cost.cross_entropy(y, y_, 'cost')
correct_prediction = tf.equal(tf.argmax(y, 1), y_)
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# n_epoch = 200
n_epoch = 10
learning_rate = 0.0001
print_freq = 2
train_params = network.all_params
train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost, var_list=train_params)
tl.layers.initialize_global_variables(sess)
network.print_params()
network.print_layers()
print(' learning_rate: %f' % learning_rate)
print(' batch_size: %d' % batch_size)
print('~~~~~~~~~~~training~~~~~~~~~~~')
for epoch in range(n_epoch):
start_time = time.time()
for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
feed_dict = {x: X_train_a, y_: y_train_a}
feed_dict.update(network.all_drop)
sess.run(train_op, feed_dict=feed_dict)
if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
train_loss, train_acc, n_batch = 0, 0, 0
for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
dp_dict = tl.utils.dict_to_one(network.all_drop) # disable noise layers
feed_dict = {x: X_train_a, y_: y_train_a}
feed_dict.update(dp_dict)
err, ac = sess.run([cost, acc], feed_dict=feed_dict)
train_loss += err
train_acc += ac
n_batch += 1
print(" train loss: %f" % (train_loss / n_batch))
print(" train acc: %f" % (train_acc / n_batch))
val_loss, val_acc, n_batch = 0, 0, 0
for X_val_a, y_val_a in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=True):
dp_dict = tl.utils.dict_to_one(network.all_drop)
feed_dict = {x: X_val_a, y_: y_val_a}
feed_dict.update(dp_dict)
err, ac = sess.run([cost, acc], feed_dict=feed_dict)
val_loss += err
val_acc += ac
n_batch += 1
print(" val loss: %f" % (val_loss / n_batch))
print(" val acc: %f" % (val_acc / n_batch))
try:
tl.vis.CNN2d(network.all_params[0].eval(), second=50, saveable=True, name='cnn1_' + str(epoch + 1), fig_idx=2012)
except:
print("You should change vis.CNN(), if you want to save the feature images for different dataset")
print('~~~~~~~~~~~~Evaluation~~~~~~~~~~~~~~~~~~')
test_loss, test_acc, n_batch = 0, 0, 0
for X_test_a, y_test_a in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=True):
dp_dict = tl.utils.dict_to_one(network.all_drop)
feed_dict = {x: X_test_a, y_: y_test_a}
feed_dict.update(dp_dict)
err, ac = sess.run([cost, acc], feed_dict=feed_dict)
test_loss += err
test_acc += ac
n_batch += 1
print(" test loss: %f" % (test_loss / n_batch))
print(" test acc: %f" % (test_acc / n_batch))
繼續縮水訓練,沒辦法,每個n_epoch就要5分鐘,所以只能10個,200個吃不消,跑出來的輸出如下:
[TL] Load or Download MNIST > data\mnist
[TL] data\mnist\train-images-idx3-ubyte.gz
[TL] data\mnist\t10k-images-idx3-ubyte.gz
[TL] InputLayer input: (128, 28, 28, 1)
[TL] Conv2dLayer cnn1: shape:(5, 5, 1, 32) strides:(1, 1, 1, 1) pad:SAME act:relu
[TL] PoolLayer pool1: ksize:[1, 2, 2, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool
[TL] Conv2dLayer cnn2: shape:(5, 5, 32, 64) strides:(1, 1, 1, 1) pad:SAME act:relu
[TL] PoolLayer pool2: ksize:[1, 2, 2, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool
[TL] FlattenLayer flatten: 3136
[TL] DropoutLayer drop1: keep:0.500000 is_fix:False
[TL] DenseLayer relu1: 256 relu
[TL] DropoutLayer drop2: keep:0.500000 is_fix:False
[TL] DenseLayer output: 10 identity
[TL] param 0: cnn1/W_conv2d:0 (5, 5, 1, 32) float32_ref (mean: -0.0009739032248035073, median: -0.0012249670689925551, std: 0.017941096797585487)
[TL] param 1: cnn1/b_conv2d:0 (32,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 )
[TL] param 2: cnn2/W_conv2d:0 (5, 5, 32, 64) float32_ref (mean: 0.0001029430641210638, median: 5.76816892134957e-05, std: 0.017625369131565094)
[TL] param 3: cnn2/b_conv2d:0 (64,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 )
[TL] param 4: relu1/W:0 (3136, 256) float32_ref (mean: -2.735738962655887e-05, median: -5.070196493761614e-05, std: 0.08805979043245316)
[TL] param 5: relu1/b:0 (256,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 )
[TL] param 6: output/W:0 (256, 10) float32_ref (mean: 0.003307575825601816, median: 0.002707334700971842, std: 0.08724205940961838)
[TL] param 7: output/b:0 (10,) float32_ref (mean: 0.0 , median: 0.0 , std: 0.0 )
[TL] num of params: 857738
[TL] layer 0: cnn1/Relu:0 (128, 28, 28, 32) float32
[TL] layer 1: pool1:0 (128, 14, 14, 32) float32
[TL] layer 2: cnn2/Relu:0 (128, 14, 14, 64) float32
[TL] layer 3: pool2:0 (128, 7, 7, 64) float32
[TL] layer 4: flatten:0 (128, 3136) float32
[TL] layer 5: drop1/mul:0 (128, 3136) float32
[TL] layer 6: relu1/Relu:0 (128, 256) float32
[TL] layer 7: drop2/mul:0 (128, 256) float32
[TL] layer 8: output/Identity:0 (128, 10) float32
learning_rate: 0.000100
batch_size: 128
~~~~~~~~~~~training~~~~~~~~~~~
Epoch 1 of 10 took 345.618607s
train loss: 0.265381
train acc: 0.924700
val loss: 0.235933
val acc: 0.933393
Epoch 2 of 10 took 334.714188s
train loss: 0.156615
train acc: 0.953886
val loss: 0.138608
val acc: 0.961138
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py:107: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
warnings.warn(message, mplDeprecation, stacklevel=1)
Epoch 4 of 10 took 333.122985s
train loss: 0.097069
train acc: 0.970573
val loss: 0.089125
val acc: 0.974159
Epoch 6 of 10 took 334.979388s
train loss: 0.073271
train acc: 0.976743
val loss: 0.068358
val acc: 0.979768
Epoch 8 of 10 took 335.088589s
train loss: 0.060125
train acc: 0.981050
val loss: 0.057651
val acc: 0.981871
Epoch 10 of 10 took 337.054192s
train loss: 0.051058
train acc: 0.983834
val loss: 0.051008
val acc: 0.984776
~~~~~~~~~~~~Evaluation~~~~~~~~~~~~~~~~~~
test loss: 0.046510
test acc: 0.983273
[Finished in 4250.1s]
另還保存了幾個pdf,但是因爲我訓練的步數少,發現不了什麼規律~~