tensorlayer學習日誌8_chapter3_3.6

第三章最後一個示例,堆疊自編碼器

 

import tensorflow as tf
import tensorlayer as tl
import numpy as np
import time


# model = 'sigmoid'
model = 'relu'
# n_epoch = 200
n_epoch = 100
batch_size = 128
learning_rate = 0.0001
print_freq = 10


X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))

sess = tf.InteractiveSession()

if model == 'relu':
    act = tf.nn.relu
    act_recon = tf.nn.softplus
elif model == 'sigmoid':
    act = tf.nn.sigmoid
    act_recon = act

print("~~~~~~~~Build net~~~~~~~~~~")

x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
y_ = tf.placeholder(tf.int64, shape=[None], name='y_')


network = tl.layers.InputLayer(x, name='input')

# denoise layer for AE 降噪層
network = tl.layers.DropoutLayer(network, keep=0.5, name='denoising1')
network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')

# 1st layer 第一個降噪自編碼器
network = tl.layers.DenseLayer(network, n_units=800, act=act, name='dense1')
x_recon1 = network.outputs
recon_layer1 = tl.layers.ReconLayer(network, x_recon=x, n_units=784, act=act_recon, name='recon_layer1')
# 2nd layer 第二個降噪自編碼器
network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
network = tl.layers.DenseLayer(network, n_units=800, act=act_recon, name=model + '2')
recon_layer2 = tl.layers.ReconLayer(network, x_recon=x_recon1, n_units=800, act=act_recon, name='recon_layer2')
# 3rd layer 分類器
# network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3') # 書上無此drop層
network = tl.layers.DenseLayer(network, n_units=10, act=tf.identity, name='output')

# Define fine-tune process 定義微調步驟即損失函數
y = network.outputs
y_op = tf.argmax(tf.nn.softmax(y),1) # github示例無此行
cost = tl.cost.cross_entropy(y, y_, name='cost')

train_params = network.all_params

train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost, var_list=train_params)

# 開始逐層貪婪預訓練
sess = tf.InteractiveSession()
tl.layers.initialize_global_variables(sess)

# Pre-train
print("~~~~~~~~~~預訓練前的參數~~~~~~~~~~")
network.print_params()
print("\n~~~~~~~~~~~~~Pre-train Layer 1~~~~~~~~~~~~~")
recon_layer1.pretrain(
    sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=50, batch_size=128, print_freq=10,
    save=False, save_name='w1pre_')
print("\n~~~~~~~~~~~~~~~Pre-train Layer 2~~~~~~~~~~~")
recon_layer2.pretrain(
    sess, x=x, X_train=X_train, X_val=X_val, denoise_name='denoising1', n_epoch=50, batch_size=128, print_freq=10,
    save=False)

print("~~~~~~~~~~預訓練後的參數~~~~~~~~~~")
network.print_params()

print("\n~~~~~~~~~Fine-tune net 模型微調~~~~~~~~~~~~~~")
correct_prediction = tf.equal(tf.argmax(y, 1), y_)
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print('   learning_rate: %f' % learning_rate)
print('   batch_size: %d' % batch_size)

train_acc_list = []
val_acc_list = []

for epoch in range(n_epoch):
    start_time = time.time()
    for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
        feed_dict = {x: X_train_a, y_: y_train_a}
        feed_dict.update(network.all_drop)  # enable noise layers 微調階段開啓各降噪編碼器內部的dropout層
        # feed_dict[tl.layers.LayersConfig.set_keep['denoising1']] = 1  # 微調時關閉denoising1
        # feed_dict[set_keep['denoising1']] = 1 #這是書上的,上行是github上的,兩個都運行不了,後來我查源碼改成下行
        feed_dict[tl.layers.core.set_keep['denoising1']] = 1
        sess.run(train_op, feed_dict=feed_dict)

    # 每個epoch完結後,在訓練集上測試,類似多層感知器
    if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
        print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
        train_loss, train_acc, n_batch = 0, 0, 0
        for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
            dp_dict = tl.utils.dict_to_one(network.all_drop)  # 關閉所有dropout層
            feed_dict = {x: X_train_a, y_: y_train_a}
            feed_dict.update(dp_dict)
            err, ac = sess.run([cost, acc], feed_dict=feed_dict)
            train_loss += err
            train_acc += ac
            n_batch += 1
        print("   train loss: %f" % (train_loss / n_batch))
        print("   train acc: %f" % (train_acc / n_batch))

        train_acc_list.append(train_acc/n_batch)
        # 每個epoch完結後,在驗證集上測試,類似多層感知器
        val_loss, val_acc, n_batch = 0, 0, 0
        for X_val_a, y_val_a in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=True):
            dp_dict = tl.utils.dict_to_one(network.all_drop)  # 關閉所有dropout層
            feed_dict = {x: X_val_a, y_: y_val_a}
            feed_dict.update(dp_dict)
            err, ac = sess.run([cost, acc], feed_dict=feed_dict)
            val_loss += err
            val_acc += ac
            n_batch += 1
        print("   val loss: %f" % (val_loss / n_batch))
        print("   val acc: %f" % (val_acc / n_batch))

        val_acc_list.append(val_acc/n_batch)

        # 微調時可視化第一層的W
        # tl.visualize.W(network.all_params[0].eval(), second=10,
        #  saveable=True, shape=[28, 28], name='w1_' + str(epoch + 1), 
        #  fig_idx=2012) # 書上的表達,與github示例不一樣,這個不註釋有可能會與plt相沖突

print('~~~~~~~~~~Evaluation~~~~~~~~~~~~~~~~~')
test_loss, test_acc, n_batch = 0, 0, 0
for X_test_a, y_test_a in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=True):
    dp_dict = tl.utils.dict_to_one(network.all_drop)  # 關閉所有dropout層
    feed_dict = {x: X_test_a, y_: y_test_a}
    feed_dict.update(dp_dict)
    err, ac = sess.run([cost, acc], feed_dict=feed_dict)
    test_loss += err
    test_acc += ac
    n_batch += 1
print("   test loss: %f" % (test_loss / n_batch))
print("   test acc: %f" % (test_acc / n_batch))
print("   test acc_numpy: %f" % np.mean(y_test == sess.run(y_op, feed_dict=feed_dict)))

saver = tf.train.Saver()
save_path = saver.save(sess, "./model_relu3.6/relu_3.6.ckpt")
print("Model saved in file: %s" % save_path)
sess.close()

print('~~~~~~~~~~~~~~畫圖~~~~~~~~~~~~')

import matplotlib.pyplot as plt

# x = range(n_epoch/print_freq+1) # 作者用的是py2,所以用py3要變成地板除才行,否則報錯
x = range(n_epoch//print_freq+1)
x = [i*10 for i in x]
print(train_acc_list, val_acc_list)
assert len(x) == len(train_acc_list) and len(x) == len(val_acc_list), 'not in same length'
plt.plot(x,train_acc_list, 'r', label='train')
plt.plot(x,train_acc_list, 'ro')
plt.plot(x,val_acc_list, 'b', label='validate')
plt.plot(x,val_acc_list, 'bo')
plt.title('change of accuracy during training and validation')
plt.xlabel('number of epoch')
plt.ylabel('accuracy of classification')
plt.legend()
plt.show()

print('~~~~~end~~~~~~~~~')

沒什麼特別需要注意的地方,只是tl.visualize.W與plt有時會有衝突,兩個圖有時會嵌到一塊去,另一個 就是注意地板除就可以了。

家裏電腦不行,上班用單位電腦跑了一下,輸出如下:

[TL] Load or Download MNIST > data\mnist
[TL] data\mnist\train-images-idx3-ubyte.gz
[TL] data\mnist\t10k-images-idx3-ubyte.gz
~~~~~~~~Build net~~~~~~~~~~
[TL] InputLayer  input: (?, 784)
[TL] DropoutLayer denoising1: keep:0.500000 is_fix:False
[TL] DropoutLayer drop1: keep:0.800000 is_fix:False
[TL] DenseLayer  dense1: 800 relu
[TL] DenseLayer  recon_layer1: 784 softplus
[TL] recon_layer1 is a ReconLayer
[TL]      lambda_l2_w: 0.004000
[TL]      learning_rate: 0.000100
[TL]      use: mse, L2_w, L1_a
[TL] DropoutLayer drop2: keep:0.500000 is_fix:False
[TL] DenseLayer  relu2: 800 softplus
[TL] DenseLayer  recon_layer2: 800 softplus
[TL] recon_layer2 is a ReconLayer
[TL]      lambda_l2_w: 0.004000
[TL]      learning_rate: 0.000100
[TL]      use: mse, L2_w, L1_a
[TL] DenseLayer  output: 10 identity
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\client\session.py:1711: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).
  warnings.warn('An interactive session is already active. This can '
~~~~~~~~~~預訓練前的參數~~~~~~~~~~
[TL]   param   0: dense1/W:0           (784, 800)         float32_ref (mean: 6.581177876796573e-05, median: 0.00014223418838810176, std: 0.08800094574689865)   
[TL]   param   1: dense1/b:0           (800,)             float32_ref (mean: 0.0               , median: 0.0               , std: 0.0               )   
[TL]   param   2: relu2/W:0            (800, 800)         float32_ref (mean: -1.1675120731524657e-05, median: -0.00010272458894178271, std: 0.08785033971071243)   
[TL]   param   3: relu2/b:0            (800,)             float32_ref (mean: 0.0               , median: 0.0               , std: 0.0               )   
[TL]   param   4: output/W:0           (800, 10)          float32_ref (mean: 1.0434925570734777e-05, median: -0.0008875454077497125, std: 0.08872213959693909)   
[TL]   param   5: output/b:0           (10,)              float32_ref (mean: 0.0               , median: 0.0               , std: 0.0               )   
[TL]   num of params: 1276810

~~~~~~~~~~~~~Pre-train Layer 1~~~~~~~~~~~~~
[TL]      [*] recon_layer1 start pretrain
[TL]      batch_size: 128
[TL]      denoising layer keep: 0.500000
[TL] Epoch 1 of 50 took 17.095223s
[TL]    train loss: 88.028249
[TL]    val loss: 86.245282
[TL] Epoch 10 of 50 took 19.270888s
[TL]    train loss: 36.815318
[TL]    val loss: 36.658337
[TL] Epoch 20 of 50 took 18.525132s
[TL]    train loss: 21.167209
[TL]    val loss: 21.111036
[TL] Epoch 30 of 50 took 16.915757s
[TL]    train loss: 13.224903
[TL]    val loss: 13.263642
[TL] Epoch 40 of 50 took 16.985792s
[TL]    train loss: 10.561048
[TL]    val loss: 10.640262
[TL] Epoch 50 of 50 took 16.940433s
[TL]    train loss: 9.641511
[TL]    val loss: 9.742057

~~~~~~~~~~~~~~~Pre-train Layer 2~~~~~~~~~~~
[TL]      [*] recon_layer2 start pretrain
[TL]      batch_size: 128
[TL]      denoising layer keep: 0.500000
[TL] Epoch 1 of 50 took 20.277748s
[TL]    train loss: 120.322074
[TL]    val loss: 118.862179
[TL] Epoch 10 of 50 took 20.422251s
[TL]    train loss: 29.945802
[TL]    val loss: 29.772525
[TL] Epoch 20 of 50 took 20.163207s
[TL]    train loss: 17.507878
[TL]    val loss: 17.456964
[TL] Epoch 30 of 50 took 20.230780s
[TL]    train loss: 12.182709
[TL]    val loss: 12.152257
[TL] Epoch 40 of 50 took 20.170144s
[TL]    train loss: 10.211082
[TL]    val loss: 10.185099
[TL] Epoch 50 of 50 took 20.179144s
[TL]    train loss: 9.359315
[TL]    val loss: 9.332706
~~~~~~~~~~預訓練後的參數~~~~~~~~~~
[TL]   param   0: dense1/W:0           (784, 800)         float32_ref (mean: -0.001570492284372449, median: -3.225648334152515e-33, std: 0.02999226562678814)   
[TL]   param   1: dense1/b:0           (800,)             float32_ref (mean: 0.30943357944488525, median: 0.311123251914978 , std: 0.18920551240444183)   
[TL]   param   2: relu2/W:0            (800, 800)         float32_ref (mean: 0.00582640478387475, median: 0.004972907714545727, std: 0.041367869824171066)   
[TL]   param   3: relu2/b:0            (800,)             float32_ref (mean: 0.021519258618354797, median: 0.019698437303304672, std: 0.01693926937878132)   
[TL]   param   4: output/W:0           (800, 10)          float32_ref (mean: 1.0434925570734777e-05, median: -0.0008875454077497125, std: 0.08872213959693909)   
[TL]   param   5: output/b:0           (10,)              float32_ref (mean: 0.0               , median: 0.0               , std: 0.0               )   
[TL]   num of params: 1276810

~~~~~~~~~Fine-tune net 模型微調~~~~~~~~~~~~~~
   learning_rate: 0.000100
   batch_size: 128
Epoch 1 of 100 took 16.020422s
   train loss: 0.243982
   train acc: 0.927464
   val loss: 0.218726
   val acc: 0.934395
Epoch 10 of 100 took 15.944094s
   train loss: 0.056912
   train acc: 0.983393
   val loss: 0.074763
   val acc: 0.978265
Epoch 20 of 100 took 15.978466s
   train loss: 0.027014
   train acc: 0.992428
   val loss: 0.059782
   val acc: 0.983173
Epoch 30 of 100 took 15.886161s
   train loss: 0.015485
   train acc: 0.995933
   val loss: 0.059139
   val acc: 0.983574
Epoch 40 of 100 took 15.924763s
   train loss: 0.008806
   train acc: 0.997917
   val loss: 0.058602
   val acc: 0.983774
Epoch 50 of 100 took 15.961180s
   train loss: 0.005655
   train acc: 0.999099
   val loss: 0.057137
   val acc: 0.985577
Epoch 60 of 100 took 15.715373s
   train loss: 0.004176
   train acc: 0.999079
   val loss: 0.058809
   val acc: 0.985276
Epoch 70 of 100 took 15.746618s
   train loss: 0.003008
   train acc: 0.999499
   val loss: 0.062058
   val acc: 0.985777
Epoch 80 of 100 took 15.758000s
   train loss: 0.002446
   train acc: 0.999599
   val loss: 0.063915
   val acc: 0.984675
Epoch 90 of 100 took 15.798413s
   train loss: 0.001663
   train acc: 0.999840
   val loss: 0.058098
   val acc: 0.986478
Epoch 100 of 100 took 16.090352s
   train loss: 0.001275
   train acc: 0.999920
   val loss: 0.064153
   val acc: 0.986078
~~~~~~~~~~Evaluation~~~~~~~~~~~~~~~~~
   test loss: 0.056388
   test acc: 0.985677
   test acc_numpy: 0.000000
Model saved in file: ./model_relu3.6/relu_3.6.ckpt
~~~~~~~~~~~~~~畫圖~~~~~~~~~~~~
[0.9274639423076924, 0.9833934294871794, 0.9924278846153847, 0.9959334935897436, 0.9979166666666667, 0.9990985576923077, 0.9990785256410256, 0.9994991987179487, 0.9995993589743589, 0.9998397435897436, 0.9999198717948717] [0.934395032051282, 0.9782652243589743, 0.9831730769230769, 0.983573717948718, 0.9837740384615384, 0.9855769230769231, 0.9852764423076923, 0.9857772435897436, 0.9846754807692307, 0.9864783653846154, 0.9860777243589743]
~~~~~end~~~~~~~~~
[Finished in 9525.9s]

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章