本章節我們進入卷積神經網絡的實現,LeNet-5可以說是進入卷積神經網絡的入門級別,包含卷積層塊和全連接層塊兩個部分。
利用TF-2.x可以很方便的構建模型,主要構建模型的方法如下:
def model():
net=tf.keras.Sequential()
net.add(tf.keras.layers.Conv2D(filters=6,kernel_size=5,
activation='sigmoid',input_shape=(28,28,1)))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Conv2D(filters=16,kernel_size=5,activation='sigmoid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(120,activation='sigmoid'))
net.add(tf.keras.layers.Dense(84,activation='sigmoid'))
net.add(tf.keras.layers.Dense(10,activation='sigmoid'))
return net
本章主要使用兩種訓練方法來實現LeNet-5的調參訓練:
1)自定義方法
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow import data as tfdata
import numpy as np
# 1、讀取數據集
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
train_images = tf.reshape(x_train, (x_train.shape[0],x_train.shape[1],x_train.shape[2], 1))
test_images = tf.reshape(x_test, (x_test.shape[0],x_test.shape[1],x_test.shape[2], 1))
#2、讀取模型
batch_size = 64
train_dataset = tfdata.Dataset.from_tensor_slices((train_images, y_train)).shuffle(train_images.shape[0])
test_dataset = tfdata.Dataset.from_tensor_slices((test_images, y_test))
train_iter = train_dataset.batch(batch_size)
test_iter = test_dataset.batch(batch_size)
for x,y in train_iter:
print(x.shape,y.shape)
break
# 定義模型
def model():
net=tf.keras.Sequential()
net.add(tf.keras.layers.Conv2D(filters=6,kernel_size=5,
activation='sigmoid',input_shape=(28,28,1)))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Conv2D(filters=16,kernel_size=5,activation='sigmoid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(120,activation='sigmoid'))
net.add(tf.keras.layers.Dense(84,activation='sigmoid'))
net.add(tf.keras.layers.Dense(10,activation='sigmoid'))
return net
net=model()
net.summary()
'''-------第一種------'''
#定義損失函數
def loss_fu(y_pred, y_true):
loss = tf.losses.sparse_categorical_crossentropy(y_true, y_pred)
return loss
#定義優化器
trainer = tf.keras.optimizers.SGD(learning_rate=0.03)
# 計算準確率
def accuracy(y_hat, y):
return np.mean((tf.cast(tf.argmax(y_hat, axis=1), tf.int32) == tf.cast(y, tf.int32)))
# SGD優化器
def sgd(params, lr, batch_size, grads):
for i, param in enumerate(params):
print(param)
param.assign_sub(lr * grads[i] / batch_size)
# 計算測試集準確率
def evaluate_accuracy(test_iter,net):
acc_sum, n = 0.0, 0
for X, y in test_iter:
X=tf.cast(X,tf.float32)
n += 1
y = tf.cast(y, dtype=tf.int32)
output = net(X)
acc_sum += accuracy(output, y)
return acc_sum / n
#訓練
'''
通過調用tf.GradientTape記錄動態圖梯度,執行tape.gradient獲得動態圖中各變量梯度。
通過 model.trainable_variables 找到需要更新的變量,並用 trainer.apply_gradients 更新權重,
完成一步訓練。
'''
params=net.trainable_variables #獲得模型結構中需要更新的變量參數
trainer = tf.keras.optimizers.SGD(learning_rate=0.9)
num_epochs = 50
for epoch in range(0, num_epochs ):
train_acc_sum,train_loss, num_count =0.0, 0, 0
for X, y in train_iter:
X=tf.cast(X,dtype=tf.float32)
y = tf.cast(y, dtype=tf.int32)
num_count += 1 # 一個epoch取多少次
with tf.GradientTape() as t:
t.watch(params)
l = tf.reduce_mean(loss_fu(net(X,training=True), y))
grads = t.gradient(l, params) # 通過調用反向函數t.gradients計算小批量隨機梯度,並調用優化算法sgd迭代模型參數
# sgd(params, 0.01, batch_size, grads)
trainer.apply_gradients(zip(grads, params))
train_acc_sum += accuracy(net(X), y)
train_loss += l.numpy()
test_acc = evaluate_accuracy(test_iter,net)
print("epoch %d, train loss:%.6f, train acc:%.5f, test acc:%.5f" % (
epoch, train_loss / num_count, train_acc_sum / num_count, test_acc))
訓練結果:
2)Keras高階API
這種方法就比較簡單了,幾行代碼就能解決:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow import data as tfdata
import numpy as np
# 1、讀取數據集
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
train_images = tf.reshape(x_train, (x_train.shape[0],x_train.shape[1],x_train.shape[2], 1))
test_images = tf.reshape(x_test, (x_test.shape[0],x_test.shape[1],x_test.shape[2], 1))
#2、讀取模型
batch_size = 64
train_dataset = tfdata.Dataset.from_tensor_slices((train_images, y_train)).shuffle(train_images.shape[0])
test_dataset = tfdata.Dataset.from_tensor_slices((test_images, y_test))
train_iter = train_dataset.batch(batch_size)
test_iter = test_dataset.batch(batch_size)
for x,y in train_iter:
print(x.shape,y.shape)
break
# 定義模型
def model():
net=tf.keras.Sequential()
net.add(tf.keras.layers.Conv2D(filters=6,kernel_size=5,
activation='sigmoid',input_shape=(28,28,1)))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Conv2D(filters=16,kernel_size=5,activation='sigmoid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(120,activation='sigmoid'))
net.add(tf.keras.layers.Dense(84,activation='sigmoid'))
net.add(tf.keras.layers.Dense(10,activation='sigmoid'))
return net
net=model()
net.summary()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.9, momentum=0.0, nesterov=False)
net.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
net.fit(train_images, y_train, epochs=10, validation_data=(test_images,y_test))
訓練結果:
附上本章節所有代碼:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow import data as tfdata
import numpy as np
# 1、讀取數據集
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
train_images = tf.reshape(x_train, (x_train.shape[0],x_train.shape[1],x_train.shape[2], 1))
test_images = tf.reshape(x_test, (x_test.shape[0],x_test.shape[1],x_test.shape[2], 1))
#2、讀取模型
batch_size = 64
train_dataset = tfdata.Dataset.from_tensor_slices((train_images, y_train)).shuffle(train_images.shape[0])
test_dataset = tfdata.Dataset.from_tensor_slices((test_images, y_test))
train_iter = train_dataset.batch(batch_size)
test_iter = test_dataset.batch(batch_size)
for x,y in train_iter:
print(x.shape,y.shape)
break
# 定義模型
def model():
net=tf.keras.Sequential()
net.add(tf.keras.layers.Conv2D(filters=6,kernel_size=5,
activation='sigmoid',input_shape=(28,28,1)))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Conv2D(filters=16,kernel_size=5,activation='sigmoid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(120,activation='sigmoid'))
net.add(tf.keras.layers.Dense(84,activation='sigmoid'))
net.add(tf.keras.layers.Dense(10,activation='sigmoid'))
return net
net=model()
net.summary()
# '''-------第一種------'''
#
# #定義損失函數
# def loss_fu(y_pred, y_true):
# loss = tf.losses.sparse_categorical_crossentropy(y_true, y_pred)
# return loss
# #定義優化器
# trainer = tf.keras.optimizers.SGD(learning_rate=0.03)
# # 計算準確率
# def accuracy(y_hat, y):
# return np.mean((tf.cast(tf.argmax(y_hat, axis=1), tf.int32) == tf.cast(y, tf.int32)))
# # SGD優化器
# def sgd(params, lr, batch_size, grads):
# for i, param in enumerate(params):
# print(param)
# param.assign_sub(lr * grads[i] / batch_size)
# # 計算測試集準確率
# def evaluate_accuracy(test_iter,net):
# acc_sum, n = 0.0, 0
# for X, y in test_iter:
# X=tf.cast(X,tf.float32)
# n += 1
# y = tf.cast(y, dtype=tf.int32)
# output = net(X)
# acc_sum += accuracy(output, y)
# return acc_sum / n
#
# #訓練
# '''
# 通過調用tf.GradientTape記錄動態圖梯度,執行tape.gradient獲得動態圖中各變量梯度。
# 通過 model.trainable_variables 找到需要更新的變量,並用 trainer.apply_gradients 更新權重,
# 完成一步訓練。
# '''
# params=net.trainable_variables #獲得模型結構中需要更新的變量參數
# trainer = tf.keras.optimizers.SGD(learning_rate=0.9)
# num_epochs = 50
# for epoch in range(0, num_epochs ):
# train_acc_sum,train_loss, num_count =0.0, 0, 0
# for X, y in train_iter:
# X=tf.cast(X,dtype=tf.float32)
# y = tf.cast(y, dtype=tf.int32)
# num_count += 1 # 一個epoch取多少次
# with tf.GradientTape() as t:
# t.watch(params)
# l = tf.reduce_mean(loss_fu(net(X,training=True), y))
#
# grads = t.gradient(l, params) # 通過調用反向函數t.gradients計算小批量隨機梯度,並調用優化算法sgd迭代模型參數
# # sgd(params, 0.01, batch_size, grads)
# trainer.apply_gradients(zip(grads, params))
# train_acc_sum += accuracy(net(X), y)
# train_loss += l.numpy()
# test_acc = evaluate_accuracy(test_iter,net)
# print("epoch %d, train loss:%.6f, train acc:%.5f, test acc:%.5f" % (
# epoch, train_loss / num_count, train_acc_sum / num_count, test_acc))
'''------第二種------'''
optimizer = tf.keras.optimizers.SGD(learning_rate=0.9, momentum=0.0, nesterov=False)
net.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
net.fit(train_images, y_train, epochs=10, validation_data=(test_images,y_test))