本章節對應MXNET深度學習框架-05-從0開始的多分類邏輯迴歸。
1、數據集下載及讀取
本文使用fashionmnist數據集,通過tensorflow可以進行下載和讀取:
from tensorflow.keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
print(x_train.shape,y_train.shape)
# 將訓練數據的特徵和標籤組合
batch_size=256
train_dataset = tfdata.Dataset.from_tensor_slices((x_train, y_train)).shuffle(x_train.shape[0])
test_dataset = tfdata.Dataset.from_tensor_slices((x_test, y_test))
train_iter=train_dataset.batch(batch_size)
test_iter=test_dataset.batch(batch_size)
結果:
從結果可以說明這份數據集是的大小爲28*28的單通道圖像,訓練數據共計60000張。
2、顯示部分數據集圖片和標籤
def show_image(image): # 顯示圖像
n=image.shape[0]
_,figs=plt.subplots(1,n,figsize=(15,15))
for i in range(n):
figs[i].imshow(image[i].reshape((28,28)))
plt.show()
def get_fashion_mnist_labels(labels):# 顯示圖像標籤
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
show_image(x_train[:9])
print(get_fashion_mnist_labels(y_train[:9]))
結果:
3、數據歸一化
def normlize(data):
return tf.cast(data,tf.float32)/255.0
4、初始化參數
num_inputs=28*28
num_output=10
w=tf.Variable(tf.random.truncated_normal(shape=[num_inputs,num_output],stddev=0.01,dtype=tf.float32))
b=tf.Variable(tf.random.truncated_normal(shape=[num_output],dtype=tf.float32))
5、softmax分類器
在下面定義函數中,矩陣logits的行數是樣本數,列數是輸出個數。爲了表達樣本預測各個輸出的概率,softmax運算會先通過exp函數對每個元素做指數運算,再對exp矩陣同行元素求和,最後令矩陣每行各元素與該行元素之和相除。這樣一來,最終得到的矩陣每行元素和爲1且非負。因此,該矩陣每行都是合法的概率分佈。softmax運算的輸出矩陣中的任意一行元素代表了一個樣本在各個輸出類別上的預測概率。
def softmax(logits, axis=-1):
return tf.exp(logits)/tf.reduce_sum(tf.exp(logits), axis, keepdims=True)
6、定義模型
def net(x):
x=tf.reshape(x,[-1,28*28])
logits=tf.matmul(x,w)+b # 根據公式f(x)=Xw+b
return softmax(logits)
7、定義交叉熵損失函數
def cross_entropy(y_hat, y):
y = tf.cast(tf.reshape(y, shape=[-1, 1]),dtype=tf.int32)
y = tf.one_hot(y, depth=y_hat.shape[-1])
y = tf.cast(tf.reshape(y, shape=[-1, y_hat.shape[-1]]),dtype=tf.int32)
return -tf.math.log(tf.boolean_mask(y_hat, y)+1e-8)
8、計算準確率
def accuracy(y_hat, y):
return np.mean((tf.cast(tf.argmax(y_hat, axis=1),tf.int32) == tf.cast(y,tf.int32)))
9、計算測試集準確率
def evaluate_accuracy(test_iter):
acc_sum, n = 0.0, 0
for X, y in test_iter:
n+=1
y = tf.cast(y, dtype=tf.int32)
output=net(normlize(X))
acc_sum+=accuracy(output,y)
return acc_sum / n
10、SGD優化器
def sgd(params, lr, batch_size, grads):
for i, param in enumerate(params):
param.assign_sub(lr * grads[i] / batch_size)
11、訓練
def train(num_epochs,lr):
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
X=normlize(X)
n+=1 #一個epoch取多少次
with tf.GradientTape() as t:
t.watch([w, b])
l = tf.reduce_mean(cross_entropy(net(X), y))
grads = t.gradient(l, [w, b]) # 通過調用反向函數t.gradients計算小批量隨機梯度,並調用優化算法sgd迭代模型參數
sgd([w, b], lr, batch_size, grads)
train_acc_sum+=accuracy(net(X),y)
train_l_sum+=l.numpy()
test_acc=evaluate_accuracy(test_iter)
print(str(epoch),"train loss:%.6f, train acc:%.5f, test acc:%.5f"%(train_l_sum/n,train_acc_sum/n,test_acc))
最終訓練結果:
12、預測
def predict():
image_10, label_10 = x_test[:10],y_test[:10] # 拿到前10個數據
show_image(image_10)
print("真實樣本標籤:", label_10)
print("真實數字標籤對應的服飾名:", get_fashion_mnist_labels(label_10))
image_10 = normlize(image_10)
predict_label = tf.argmax(net(image_10),1).numpy()
print("預測樣本標籤:", predict_label.astype("int8"))
print("預測數字標籤對應的服飾名:", get_fashion_mnist_labels(predict_label))
預測結果:
可以看到,預測的準確率爲90%(當然有偶然性,畢竟只有10個測試數據)。
附上所有源碼:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import data as tfdata
# 1、獲取和讀取數據
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
print(x_train.shape,y_train.shape)
# 將訓練數據的特徵和標籤組合
batch_size=256
train_dataset = tfdata.Dataset.from_tensor_slices((x_train, y_train)).shuffle(x_train.shape[0])
test_dataset = tfdata.Dataset.from_tensor_slices((x_test, y_test))
train_iter=train_dataset.batch(batch_size)
test_iter=test_dataset.batch(batch_size)
# 2、顯示部分圖片
def show_image(image): # 顯示圖像
n=image.shape[0]
_,figs=plt.subplots(1,n,figsize=(15,15))
for i in range(n):
figs[i].imshow(image[i].reshape((28,28)))
plt.show()
def get_fashion_mnist_labels(labels):# 顯示圖像標籤
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
# show_image(x_train[:9]) # 前9張圖片
# print(get_fashion_mnist_labels(y_train[:9]))
# 3、數據歸一化
def normlize(data):
return tf.cast(data,tf.float32)/255.0
# 4、初始化參數
num_inputs=28*28
num_output=10
w=tf.Variable(tf.random.truncated_normal(shape=[num_inputs,num_output],stddev=0.01,dtype=tf.float32))
b=tf.Variable(tf.random.truncated_normal(shape=[num_output],dtype=tf.float32))
# 5、定義softmax分類器
def softmax(logits, axis=-1):
return tf.exp(logits)/tf.reduce_sum(tf.exp(logits), axis, keepdims=True)
# 6、定義模型
def net(x):
x=tf.reshape(x,[-1,28*28])
logits=tf.matmul(x,w)+b # 根據公式f(x)=Xw+b
return softmax(logits)
#7、定義交叉熵損失函數
def cross_entropy(y_hat, y):
y = tf.cast(tf.reshape(y, shape=[-1, 1]),dtype=tf.int32)
y = tf.one_hot(y, depth=y_hat.shape[-1])
y = tf.cast(tf.reshape(y, shape=[-1, y_hat.shape[-1]]),dtype=tf.int32)
return -tf.math.log(tf.boolean_mask(y_hat, y)+1e-8)
#8、計算準確率
def accuracy(y_hat, y):
return np.mean((tf.cast(tf.argmax(y_hat, axis=1),tf.int32) == tf.cast(y,tf.int32)))
#9、計算測試集準確率
def evaluate_accuracy(test_iter):
acc_sum, n = 0.0, 0
for X, y in test_iter:
n+=1
y = tf.cast(y, dtype=tf.int32)
output=net(normlize(X))
acc_sum+=accuracy(output,y)
return acc_sum / n
#10、SGD優化器
def sgd(params, lr, batch_size, grads):
for i, param in enumerate(params):
param.assign_sub(lr * grads[i] / batch_size)
#11、訓練
def train(num_epochs,lr):
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
X=normlize(X)
n+=1 #一個epoch取多少次
with tf.GradientTape() as t:
t.watch([w, b])
l = tf.reduce_mean(cross_entropy(net(X), y))
grads = t.gradient(l, [w, b]) # 通過調用反向函數t.gradients計算小批量隨機梯度,並調用優化算法sgd迭代模型參數
sgd([w, b], lr, batch_size, grads)
train_acc_sum+=accuracy(net(X),y)
train_l_sum+=l.numpy()
test_acc=evaluate_accuracy(test_iter)
print("epoch %d, train loss:%.6f, train acc:%.5f, test acc:%.5f"%(epoch,train_l_sum/n,train_acc_sum/n,test_acc))
# 12、預測
def predict():
image_10, label_10 = x_test[:10],y_test[:10] # 拿到前10個數據
show_image(image_10)
print("真實樣本標籤:", label_10)
print("真實數字標籤對應的服飾名:", get_fashion_mnist_labels(label_10))
image_10 = normlize(image_10)
predict_label = tf.argmax(net(image_10),1).numpy()
print("預測樣本標籤:", predict_label.astype("int8"))
print("預測數字標籤對應的服飾名:", get_fashion_mnist_labels(predict_label))
if __name__=="__main__":
train(50,0.3)
predict()# 預測