import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
batch_size=256(x_train, y_train),(x_test, y_test)= fashion_mnist.load_data()
x_train = tf.cast(x_train, tf.float32)/255#在進行矩陣相乘時需要float型,故強制類型轉換爲float型
x_test = tf.cast(x_test,tf.float32)/255#在進行矩陣相乘時需要float型,故強制類型轉換爲float型
train_iter = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
test_iter = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
num_inputs =784
num_outputs =10
W = tf.Variable(tf.random.normal(shape=(num_inputs, num_outputs), mean=0, stddev=0.01, dtype=tf.float32))
b = tf.Variable(tf.zeros(num_outputs, dtype=tf.float32))defsoftmax(logits, axis=-1):# softmax運算return tf.exp(logits)/tf.reduce_sum(tf.exp(logits), axis, keepdims=True)defnet(X):# 定義模型
logits = tf.matmul(tf.reshape(X, shape=(-1, W.shape[0])), W)+ b
return softmax(logits)defcross_entropy(y_hat, y):# 交叉熵損失函數
y = tf.cast(tf.reshape(y, shape=[-1,1]),dtype=tf.int32)
y = tf.one_hot(y, depth=y_hat.shape[-1])
y = tf.cast(tf.reshape(y, shape=[-1, y_hat.shape[-1]]),dtype=tf.int32)return-tf.math.log(tf.boolean_mask(y_hat, y)+1e-8)defaccuracy(y_hat, y):# 計算分類準確率return np.mean((tf.argmax(y_hat, axis=1)== y))# 描述,對於tensorflow2中,比較的雙方必須類型都是int型,所以要將輸出和標籤都轉爲int型defevaluate_accuracy(data_iter, net):
acc_sum, n =0.0,0for _,(X, y)inenumerate(data_iter):
y = tf.cast(y,dtype=tf.int64)
acc_sum += np.sum(tf.cast(tf.argmax(net(X), axis=1), dtype=tf.int64)== y)
n += y.shape[0]return acc_sum / n
num_epochs, lr =5,0.1deftrain_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None):for epoch inrange(num_epochs):
train_l_sum, train_acc_sum, n =0.0,0.0,0for X, y in train_iter:with tf.GradientTape()as tape:
y_hat = net(X)
l = tf.reduce_sum(loss(y_hat, y))
grads = tape.gradient(l, params)if trainer isNone:# 如果沒有傳入優化器,則使用原先編寫的小批量隨機梯度下降for i, param inenumerate(params):
param.assign_sub(lr * grads[i]/ batch_size)else:# tf.keras.optimizers.SGD 直接使用是隨機梯度下降 theta(t+1) = theta(t) - learning_rate * gradient# 這裏使用批量梯度下降,需要對梯度除以 batch_size
trainer.apply_gradients(zip([grad / batch_size for grad in grads], params))
y = tf.cast(y, dtype=tf.float32)
train_l_sum += l.numpy()
train_acc_sum += tf.reduce_sum(tf.cast(tf.argmax(y_hat, axis=1)== tf.cast(y, dtype=tf.int64), dtype=tf.int64)).numpy()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter, net)print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'%(epoch +1, train_l_sum / n, train_acc_sum / n, test_acc))
trainer = tf.keras.optimizers.SGD(lr)
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size,[W, b], lr)import matplotlib.pyplot as plt
X, y =iter(test_iter).next()defget_fashion_mnist_labels(labels):
text_labels =['t-shirt','trouser','pullover','dress','coat','sandal','shirt','sneaker','bag','ankle boot']return[text_labels[int(i)]for i in labels]defshow_fashion_mnist(images, labels):# 這⾥的_表示我們忽略(不使⽤)的變量
_, figs = plt.subplots(1,len(images), figsize=(12,12))# 這裏注意subplot 和subplots 的區別for f, img, lbl inzip(figs, images, labels):
f.imshow(tf.reshape(img, shape=(28,28)).numpy())
f.set_title(lbl)
f.axes.get_xaxis().set_visible(False)
f.axes.get_yaxis().set_visible(False)
plt.show()
true_labels = get_fashion_mnist_labels(y.numpy())
pred_labels = get_fashion_mnist_labels(tf.argmax(net(X), axis=1).numpy())
titles =[true +'\n'+ pred for true, pred inzip(true_labels, pred_labels)]
show_fashion_mnist(X[0:9], titles[0:9])
輸出
epoch 1, loss 0.7841, train acc 0.750, test acc 0.793
epoch 2, loss 0.5709, train acc 0.812, test acc 0.812
epoch 3, loss 0.5257, train acc 0.825, test acc 0.819
epoch 4, loss 0.5014, train acc 0.832, test acc 0.825
epoch 5, loss 0.4856, train acc 0.836, test acc 0.827