tensorlayer學習日誌10_chapter4_4.4

第4.4節介紹的是通過卷積神經網絡來分類MNIST數據集,而之前2.4節是一個簡單的全連接層組成的神經網絡。

import numpy as np
import tensorflow as tf
import tensorlayer as tl
from tensorlayer .layers.core import set_keep
import time

X_train, y_train, X_val, y_val, X_test, y_test = \
                tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))

sess = tf.InteractiveSession()
batch_size = 128

x = tf.placeholder(tf.float32, shape=[batch_size, 28, 28, 1])
y_ = tf.placeholder(tf.int64, shape=[batch_size])

network = tl.layers.InputLayer(x, name='input')
network = tl.layers.Conv2d(network, 32, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn1')
network = tl.layers.MaxPool2d(network, (2, 2), (2, 2), padding='SAME', name='pool1')
network = tl.layers.Conv2d(network, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', name='cnn2')
network = tl.layers.MaxPool2d(network, (2, 2), (2, 2), padding='SAME', name='pool2')

network = tl.layers.FlattenLayer(network, name='flatten')
network = tl.layers.DropoutLayer(network, keep=0.5, name='drop1')
network = tl.layers.DenseLayer(network, 256, act=tf.nn.relu, name='relu1')
network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
network = tl.layers.DenseLayer(network, 10, act=tf.identity, name='output')

y = network.outputs

cost = tl.cost.cross_entropy(y, y_, 'cost')

correct_prediction = tf.equal(tf.argmax(y, 1), y_)
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# n_epoch = 200
n_epoch = 10
learning_rate = 0.0001
print_freq = 2

train_params = network.all_params
train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost, var_list=train_params)

tl.layers.initialize_global_variables(sess)
network.print_params()
network.print_layers()

print('   learning_rate: %f' % learning_rate)
print('   batch_size: %d' % batch_size)
print('~~~~~~~~~~~training~~~~~~~~~~~')

for epoch in range(n_epoch):
    start_time = time.time()
    for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
        feed_dict = {x: X_train_a, y_: y_train_a}
        feed_dict.update(network.all_drop)  
        sess.run(train_op, feed_dict=feed_dict)

    if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
        print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
        train_loss, train_acc, n_batch = 0, 0, 0
        for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
            dp_dict = tl.utils.dict_to_one(network.all_drop)  # disable noise layers
            feed_dict = {x: X_train_a, y_: y_train_a}
            feed_dict.update(dp_dict)
            err, ac = sess.run([cost, acc], feed_dict=feed_dict)
            train_loss += err
            train_acc += ac
            n_batch += 1
        print("   train loss: %f" % (train_loss / n_batch))
        print("   train acc: %f" % (train_acc / n_batch))

        val_loss, val_acc, n_batch = 0, 0, 0
        for X_val_a, y_val_a in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=True):
            dp_dict = tl.utils.dict_to_one(network.all_drop)  
            feed_dict = {x: X_val_a, y_: y_val_a}
            feed_dict.update(dp_dict)
            err, ac = sess.run([cost, acc], feed_dict=feed_dict)
            val_loss += err
            val_acc += ac
            n_batch += 1
        print("   val loss: %f" % (val_loss / n_batch))
        print("   val acc: %f" % (val_acc / n_batch))
        try:
            tl.vis.CNN2d(network.all_params[0].eval(), second=50, saveable=True, name='cnn1_' + str(epoch + 1), fig_idx=2012)
        except:  
            print("You should change vis.CNN(), if you want to save the feature images for different dataset")

print('~~~~~~~~~~~~Evaluation~~~~~~~~~~~~~~~~~~')
test_loss, test_acc, n_batch = 0, 0, 0
for X_test_a, y_test_a in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=True):
    dp_dict = tl.utils.dict_to_one(network.all_drop)  
    feed_dict = {x: X_test_a, y_: y_test_a}
    feed_dict.update(dp_dict)
    err, ac = sess.run([cost, acc], feed_dict=feed_dict)
    test_loss += err
    test_acc += ac
    n_batch += 1
print("   test loss: %f" % (test_loss / n_batch))
print("   test acc: %f" % (test_acc / n_batch))

繼續縮水訓練,沒辦法,每個n_epoch就要5分鐘,所以只能10個,200個吃不消,跑出來的輸出如下:

[TL] Load or Download MNIST > data\mnist
[TL] data\mnist\train-images-idx3-ubyte.gz
[TL] data\mnist\t10k-images-idx3-ubyte.gz
[TL] InputLayer  input: (128, 28, 28, 1)
[TL] Conv2dLayer cnn1: shape:(5, 5, 1, 32) strides:(1, 1, 1, 1) pad:SAME act:relu
[TL] PoolLayer   pool1: ksize:[1, 2, 2, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool
[TL] Conv2dLayer cnn2: shape:(5, 5, 32, 64) strides:(1, 1, 1, 1) pad:SAME act:relu
[TL] PoolLayer   pool2: ksize:[1, 2, 2, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool
[TL] FlattenLayer flatten: 3136
[TL] DropoutLayer drop1: keep:0.500000 is_fix:False
[TL] DenseLayer  relu1: 256 relu
[TL] DropoutLayer drop2: keep:0.500000 is_fix:False
[TL] DenseLayer  output: 10 identity
[TL]   param   0: cnn1/W_conv2d:0      (5, 5, 1, 32)      float32_ref (mean: -0.0009739032248035073, median: -0.0012249670689925551, std: 0.017941096797585487)   
[TL]   param   1: cnn1/b_conv2d:0      (32,)              float32_ref (mean: 0.0               , median: 0.0               , std: 0.0               )   
[TL]   param   2: cnn2/W_conv2d:0      (5, 5, 32, 64)     float32_ref (mean: 0.0001029430641210638, median: 5.76816892134957e-05, std: 0.017625369131565094)   
[TL]   param   3: cnn2/b_conv2d:0      (64,)              float32_ref (mean: 0.0               , median: 0.0               , std: 0.0               )   
[TL]   param   4: relu1/W:0            (3136, 256)        float32_ref (mean: -2.735738962655887e-05, median: -5.070196493761614e-05, std: 0.08805979043245316)   
[TL]   param   5: relu1/b:0            (256,)             float32_ref (mean: 0.0               , median: 0.0               , std: 0.0               )   
[TL]   param   6: output/W:0           (256, 10)          float32_ref (mean: 0.003307575825601816, median: 0.002707334700971842, std: 0.08724205940961838)   
[TL]   param   7: output/b:0           (10,)              float32_ref (mean: 0.0               , median: 0.0               , std: 0.0               )   
[TL]   num of params: 857738
[TL]   layer   0: cnn1/Relu:0          (128, 28, 28, 32)    float32
[TL]   layer   1: pool1:0              (128, 14, 14, 32)    float32
[TL]   layer   2: cnn2/Relu:0          (128, 14, 14, 64)    float32
[TL]   layer   3: pool2:0              (128, 7, 7, 64)    float32
[TL]   layer   4: flatten:0            (128, 3136)        float32
[TL]   layer   5: drop1/mul:0          (128, 3136)        float32
[TL]   layer   6: relu1/Relu:0         (128, 256)         float32
[TL]   layer   7: drop2/mul:0          (128, 256)         float32
[TL]   layer   8: output/Identity:0    (128, 10)          float32
   learning_rate: 0.000100
   batch_size: 128
~~~~~~~~~~~training~~~~~~~~~~~
Epoch 1 of 10 took 345.618607s
   train loss: 0.265381
   train acc: 0.924700
   val loss: 0.235933
   val acc: 0.933393
Epoch 2 of 10 took 334.714188s
   train loss: 0.156615
   train acc: 0.953886
   val loss: 0.138608
   val acc: 0.961138
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py:107: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance.  In a future version, a new instance will always be created and returned.  Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
  warnings.warn(message, mplDeprecation, stacklevel=1)
Epoch 4 of 10 took 333.122985s
   train loss: 0.097069
   train acc: 0.970573
   val loss: 0.089125
   val acc: 0.974159
Epoch 6 of 10 took 334.979388s
   train loss: 0.073271
   train acc: 0.976743
   val loss: 0.068358
   val acc: 0.979768
Epoch 8 of 10 took 335.088589s
   train loss: 0.060125
   train acc: 0.981050
   val loss: 0.057651
   val acc: 0.981871
Epoch 10 of 10 took 337.054192s
   train loss: 0.051058
   train acc: 0.983834
   val loss: 0.051008
   val acc: 0.984776
~~~~~~~~~~~~Evaluation~~~~~~~~~~~~~~~~~~
   test loss: 0.046510
   test acc: 0.983273
[Finished in 4250.1s]

另還保存了幾個pdf,但是因爲我訓練的步數少,發現不了什麼規律~~

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章