圖像識別模型優化技巧之dropout 學習率遞減 L1、L2正則 bacth normal的對比

okay 本節在上一節mnist圖像識別的基礎上增加一些優化算法,例如 dropout 學習率遞減  L1、L2正則  bacth normal正則化 優化上一節的模型。

 

從以下四個實驗結果我們可以知道 batch normal是個利器直接能夠將模型的分數提高了0.5分左右,但是其他的幾個優化技巧,並沒有對模型產生太大的影響,這很正常,因爲我們可以發現訓練集和測試集的的分數幾乎差不多,沒有出現過擬合,而上面介紹的方法除了batch normal之外, 其他都是用於解決過擬合的問題。 okay,本來想把這四個代碼合併爲一個代碼,但是感覺麻煩直接分成四份,放到最後面了,l2正則那個我不確定是不是正確的,僅供參考。。。。。。。。。。。。。

實驗結果之使用batch normal對模型的影響

 

 

學習率遞減圖像識別的結果:

學習率遞減

實驗結果之dropout對模型的影響:

 

實驗結果之L2正則對模型的影響:


模型中增加了batch normal的代碼

# -*- coding: utf-8 -*-
# @Time    : 2019/7/1 7:43
# @Author  : YYLin
# @Email   : [email protected]
# @File    : Image_Classification_Mnist_batch_normal.py
# 在之前的加上一些過擬合的技巧  dropout  學習率遞減  L1、L2正則 bacth normal正則化
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 定義網絡中的一些通道數
batch_size = 128
img_high = 28
img_width = 28
Channel = 1
label = 10
dataset_name = 'mnist'
avg_of_test = []
# 首先是讀取fashion-mnits數據集
mnist = input_data.read_data_sets("../Dataset/mnist_data", one_hot=True)

# 定義輸入圖像的佔位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')


# 定義一個專門的卷積操作 默認的話卷積核大小是 5*5  步長是 2*2
def conv2d(input_, output_dim, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="conv2d"):
    with tf.variable_scope(name):
        w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
              initializer=tf.truncated_normal_initializer(stddev=stddev))
        conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')

        biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
        conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
        return conv


# 定義一個lrelu激活函數
def lrelu(x, leak=0.2, name="lrelu"):
    return tf.maximum(x, leak*x)


# 定義一個MLP全連接操作
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
    shape = input_.get_shape().as_list()
    with tf.variable_scope(scope or "Linear"):
        matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
                 tf.random_normal_initializer(stddev=stddev))
        bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start))
        if with_w:
            return tf.matmul(input_, matrix) + bias, matrix, bias
        else:
            return tf.matmul(input_, matrix) + bias


# 相對於第一個版本 增加的批量正則化
def bn(x, is_training, scope, axis=-1):
    return tf.layers.batch_normalization(x, epsilon=1e-5, momentum=0.9, training=is_training, name=scope, axis=axis) # add axis=1


# 圖像分類的模型經過兩層卷積層 以及一個全連接層之後對圖像類別的判斷
def classifier(x, is_training=True):
    net = lrelu(
        bn(conv2d(x, 64, 4, 4, 2, 2, name='d_conv1' + '_' + dataset_name), is_training=is_training,
           scope='d_bn1'))

    net = lrelu(
        bn(conv2d(net, 128, 4, 4, 2, 2, name='d_conv2' + '_' + dataset_name), is_training=is_training,
           scope='d_bn2'))

    net = tf.reshape(net, [batch_size, -1])

    net = lrelu(
        bn(linear(net, 1024, scope='d_fc3' + '_' + dataset_name), is_training=is_training,
           scope='d_bn3'))

    h_flat = linear(net, 10, scope='d_fc5' + '_' + dataset_name)
    return h_flat


# 開始調用分類器 輸出判斷結果
y_pred = classifier(inputs)
y_conv = tf.nn.softmax(y_pred)

# 相對於之前的版本 增加學習率遞減的方法
global_step = tf.Variable(0, trainable=False)
initial_learning_rate = 0.01
learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                           global_step,
                                           decay_steps=20, decay_rate=0.99)

cross_entropy = -tf.reduce_sum(y*tf.log(y_conv))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# 開始訓練數據
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(mnist.train.num_examples//batch_size):
        img, img_label = mnist.train.next_batch(batch_size)
        # print('使用 mnist.train.next_batch加載的數據集形狀', img.shape, type(img))
        img = img.reshape([batch_size, 28, 28, 1])
        # print('經過 tf.reshape之後數據的形狀以及類型是:', img.shape, type(img))
        if i % 20 == 0:
            train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label, global_step:i})
            print("step %d, training accuracy %g" % (i, train_accuracy))
        train_step.run(feed_dict={inputs: img, y: img_label})

    for test in range(mnist.test.num_examples//batch_size):
        img_test, img_test_label = mnist.test.next_batch(batch_size)
        img_test = img_test.reshape([batch_size, 28, 28, 1])
        test_socre = accuracy.eval(feed_dict={inputs: img_test, y: img_test_label})

        avg_of_test.append(test_socre)

all_score = 0
for i, socre in enumerate(avg_of_test):
    all_score = all_score + socre
print("使用batch normal測試集的平均分是:", (all_score/(len(avg_of_test))))

 

模型中增加了學習率遞減的代碼

# -*- coding: utf-8 -*-
# @Time    : 2019/7/1 6:11
# @Author  : YYLin
# @Email   : [email protected]
# @File    : Image_Classification_Mnist_Degrad_learning.py
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 定義網絡中的一些通道數
batch_size = 128
img_high = 28
img_width = 28
Channel = 1
label = 10
dataset_name = 'mnist'
avg_of_test = []
# 首先是讀取fashion-mnits數據集
mnist = input_data.read_data_sets("../Dataset/mnist_data", one_hot=True)

# 定義輸入圖像的佔位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')


# 定義一個專門的卷積操作 默認的話卷積核大小是 5*5  步長是 2*2
def conv2d(input_, output_dim, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="conv2d"):
    with tf.variable_scope(name):
        w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
              initializer=tf.truncated_normal_initializer(stddev=stddev))
        conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')

        biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
        conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
        return conv


# 定義一個lrelu激活函數
def lrelu(x, leak=0.2, name="lrelu"):
    return tf.maximum(x, leak*x)


# 定義一個MLP全連接操作
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
    shape = input_.get_shape().as_list()
    with tf.variable_scope(scope or "Linear"):
        matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
                 tf.random_normal_initializer(stddev=stddev))
        bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start))
        if with_w:
            return tf.matmul(input_, matrix) + bias, matrix, bias
        else:
            return tf.matmul(input_, matrix) + bias


# 圖像分類的模型經過兩層卷積層 以及一個全連接層之後對圖像類別的判斷
def classifier(x):
    net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name='d_conv1' + '_' + dataset_name))

    net = lrelu(conv2d(net, 128, 4, 4, 2, 2, name='d_conv2' + '_' + dataset_name))

    net = tf.reshape(net, [batch_size, -1])

    net = lrelu(linear(net, 1024, scope='d_fc3' + '_' + dataset_name))

    h_flat = linear(net, 10, scope='d_fc5' + '_' + dataset_name)
    return h_flat


# 開始調用分類器 輸出判斷結果
y_pred = classifier(inputs)
y_conv = tf.nn.softmax(y_pred)

# 相對於之前的版本 增加學習率遞減的方法
global_step = tf.Variable(0, trainable=False)
initial_learning_rate = 0.01
learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                           global_step,
                                           decay_steps=20, decay_rate=0.99)

cross_entropy = -tf.reduce_sum(y*tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# 開始訓練數據
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(mnist.train.num_examples//batch_size):
        img, img_label = mnist.train.next_batch(batch_size)
        # print('使用 mnist.train.next_batch加載的數據集形狀', img.shape, type(img))
        img = img.reshape([batch_size, 28, 28, 1])
        # print('經過 tf.reshape之後數據的形狀以及類型是:', img.shape, type(img))
        if i % 20 == 0:
            train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label, global_step: i})
            print("step %d, training accuracy %g" % (i, train_accuracy))
        train_step.run(feed_dict={inputs: img, y: img_label})

    for test in range(mnist.test.num_examples//batch_size):
        img_test, img_test_label = mnist.test.next_batch(batch_size)
        img_test = img_test.reshape([batch_size, 28, 28, 1])
        test_socre = accuracy.eval(feed_dict={inputs: img_test, y: img_test_label})

        avg_of_test.append(test_socre)

all_score = 0
for i, socre in enumerate(avg_of_test):
    all_score = all_score + socre
print("使用學習率遞減之後測試集的平均分是:", (all_score/(len(avg_of_test))))

 

模型中增加了dropout的代碼

# -*- coding: utf-8 -*-
# @Time    : 2019/7/1 17:13
# @Author  : YYLin
# @Email   : [email protected]
# @File    : Image_Classification_Mnist_Dropout.py
# 相對第一版本增加了dropout技術
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 定義網絡中的一些通道數
batch_size = 128
img_high = 28
img_width = 28
Channel = 1
label = 10
dataset_name = 'mnist'
avg_of_test = []
# 首先是讀取fashion-mnits數據集
mnist = input_data.read_data_sets("../Dataset/mnist_data", one_hot=True)

# 定義輸入圖像的佔位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')
keep_prob = tf.placeholder("float")


# 定義一個專門的卷積操作 默認的話卷積核大小是 5*5  步長是 2*2
def conv2d(input_, output_dim, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="conv2d"):
    with tf.variable_scope(name):
        w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
              initializer=tf.truncated_normal_initializer(stddev=stddev))
        conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')

        biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
        conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
        return conv


# 定義一個lrelu激活函數
def lrelu(x, leak=0.2, name="lrelu"):
    return tf.maximum(x, leak*x)


# 定義一個MLP全連接操作
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
    shape = input_.get_shape().as_list()
    with tf.variable_scope(scope or "Linear"):
        matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
                 tf.random_normal_initializer(stddev=stddev))
        bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start))
        if with_w:
            return tf.matmul(input_, matrix) + bias, matrix, bias
        else:
            return tf.matmul(input_, matrix) + bias


# 圖像分類的模型經過兩層卷積層 以及一個全連接層之後對圖像類別的判斷
def classifier(x, keep_prob=0.5):
    net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name='d_conv1' + '_' + dataset_name))

    net = lrelu(conv2d(net, 128, 4, 4, 2, 2, name='d_conv2' + '_' + dataset_name))

    net = tf.reshape(net, [batch_size, -1])

    net = lrelu(linear(net, 1024, scope='d_fc3' + '_' + dataset_name))
    # 在這裏增加一層 dropout
    net = tf.nn.dropout(net, keep_prob)

    h_flat = linear(net, 10, scope='d_fc5' + '_' + dataset_name)
    return h_flat


# 開始調用分類器 輸出判斷結果
y_pred = classifier(inputs, keep_prob)
y_conv = tf.nn.softmax(y_pred)

cross_entropy = -tf.reduce_sum(y*tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# 開始訓練數據
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(mnist.train.num_examples//batch_size):
        img, img_label = mnist.train.next_batch(batch_size)
        # print('使用 mnist.train.next_batch加載的數據集形狀', img.shape, type(img))
        img = img.reshape([batch_size, 28, 28, 1])
        dropout_rate = 0.5
        # print('經過 tf.reshape之後數據的形狀以及類型是:', img.shape, type(img))
        if i % 20 == 0:
            train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label, keep_prob: dropout_rate})
            print("step %d, training accuracy %g" % (i, train_accuracy))
        train_step.run(feed_dict={inputs: img, y: img_label, keep_prob: dropout_rate})

    for test in range(mnist.test.num_examples//batch_size):
        img_test, img_test_label = mnist.test.next_batch(batch_size)
        img_test = img_test.reshape([batch_size, 28, 28, 1])
        keep_prob_rate = 1.0
        test_socre = accuracy.eval(feed_dict={inputs: img_test, y: img_test_label, keep_prob: keep_prob_rate})

        avg_of_test.append(test_socre)

all_score = 0
for i, socre in enumerate(avg_of_test):
    all_score = all_score + socre
print("使用dropout之後測試集的平均分是:", (all_score/(len(avg_of_test))))

 

模型中增加使用l2正則化之後的結果:

# -*- coding: utf-8 -*-
# @Time    : 2019/7/1 17:24
# @Author  : YYLin
# @Email   : [email protected]
# @File    : Image_Classification_Mnist_l2_Regular.py
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 定義網絡中的一些通道數
batch_size = 128
img_high = 28
img_width = 28
Channel = 1
label = 10
dataset_name = 'mnist'
avg_of_test = []
# 首先是讀取fashion-mnits數據集
mnist = input_data.read_data_sets("../Dataset/mnist_data", one_hot=True)

# 定義輸入圖像的佔位符
inputs = tf.placeholder(tf.float32, [batch_size, img_high, img_width, Channel], name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=[batch_size, label], name='label')

# 增加相對應的 l2_regularizer正則的容器
reg = 0.01
regularizer = tf.contrib.layers.l2_regularizer(reg)


# 定義一個專門的卷積操作 默認的話卷積核大小是 5*5  步長是 2*2 在卷積神經網絡中增加一個 regularizer 項
def conv2d(input_, output_dim, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="conv2d"):
    with tf.variable_scope(name):
        w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim], regularizer=regularizer,
              initializer=tf.truncated_normal_initializer(stddev=stddev))
        conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')

        biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0), regularizer=regularizer)
        conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
        return conv


# 定義一個lrelu激活函數
def lrelu(x, leak=0.2, name="lrelu"):
    return tf.maximum(x, leak*x)


# 定義一個MLP全連接操作
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
    shape = input_.get_shape().as_list()
    with tf.variable_scope(scope or "Linear"):
        matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
                 tf.random_normal_initializer(stddev=stddev))
        bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start))
        if with_w:
            return tf.matmul(input_, matrix) + bias, matrix, bias
        else:
            return tf.matmul(input_, matrix) + bias


# 圖像分類的模型經過兩層卷積層 以及一個全連接層之後對圖像類別的判斷
def classifier(x):
    net = lrelu(conv2d(x, 64, 4, 4, 2, 2, name='d_conv1' + '_' + dataset_name))

    net = lrelu(conv2d(net, 128, 4, 4, 2, 2, name='d_conv2' + '_' + dataset_name))

    net = tf.reshape(net, [batch_size, -1])

    net = lrelu(linear(net, 1024, scope='d_fc1' + '_' + dataset_name))

    h_flat = linear(net, 10, scope='d_fc2' + '_' + dataset_name)
    return h_flat


# 開始調用分類器 輸出判斷結果
y_pred = classifier(inputs)
y_conv = tf.nn.softmax(y_pred)

# 增加一個正則化的損失函數
reg_set = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
regularization_loss = tf.add_n(reg_set)

cross_entropy = -tf.reduce_sum(y*tf.log(y_conv)) + regularization_loss

train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# 開始訓練數據
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(mnist.train.num_examples//batch_size):
        img, img_label = mnist.train.next_batch(batch_size)
        # print('使用 mnist.train.next_batch加載的數據集形狀', img.shape, type(img))
        img = img.reshape([batch_size, 28, 28, 1])
        # print('經過 tf.reshape之後數據的形狀以及類型是:', img.shape, type(img))
        if i % 20 == 0:
            train_accuracy = accuracy.eval(feed_dict={inputs: img, y: img_label})
            print("step %d, training accuracy %g" % (i, train_accuracy))
        train_step.run(feed_dict={inputs: img, y: img_label})

    for test in range(mnist.test.num_examples//batch_size):
        img_test, img_test_label = mnist.test.next_batch(batch_size)
        img_test = img_test.reshape([batch_size, 28, 28, 1])
        test_socre = accuracy.eval(feed_dict={inputs: img_test, y: img_test_label})

        avg_of_test.append(test_socre)

all_score = 0
for i, socre in enumerate(avg_of_test):
    all_score = all_score + socre
print("增加了l2正則化之後測試集的平均分是:", (all_score/(len(avg_of_test))))




 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章