多層感知機(multilayer perceptron,MLP)其實與邏輯迴歸沒有太大的區別,主要就是在輸入層和輸出層之間加了幾層隱層:
下面來實現一下這個網絡:
PS:與上兩章一樣,依舊使用服飾類的mnist數據集。
1、數據集獲取
mnist_train = gn.data.vision.FashionMNIST(train=True)
mnist_test = gn.data.vision.FashionMNIST(train=False)
2、數據讀取
batch_size = 100
transformer = gn.data.vision.transforms.ToTensor()
train_data = gn.data.DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True)
test_data = gn.data.DataLoader(dataset=mnist_test, batch_size=batch_size, shuffle=False)
3、初始化模型參數
num_input = 28 * 28 * 1
num_output = 10
num_hidden=256 # 隱藏層神經元個數
w1 = nd.random_normal(shape=(num_input, num_hidden),scale=0.01) # 初始化(0.01內的值)
b1 = nd.zeros(shape=(num_hidden))
w2 = nd.random_normal(shape=(num_hidden, num_output),scale=0.01)
b2 = nd.zeros(shape=(num_output))
params = [w1, b1,w2,b2]
for param in params:
param.attach_grad() # 開闢臨時空間
4、激活函數
如果我們使用線性操作符來構造神經網絡,那麼整個模型依舊是一個線性函數:
,這裏。
所以爲了讓模型可以擬合非線性函數,引入一個非線性激活函數:
def relu(x):
return nd.maximum(0,x)
5、定義模型
def net(x):
x=x.reshape(-1,num_input)
h1=relu(nd.dot(x,w1)+b1)
output=nd.dot(h1,w2)+b2 # 最後一層一般不做激活
return output
6、定義準確率
# 定義準確率
def accuracy(output,label):
return nd.mean(output.argmax(axis=1)==label).asscalar()
def evaluate_accuracy(data_iter,net):# 定義測試集準確率
acc=0
for data,label in data_iter:
data,label=transform(data,label)
output=net(data)
acc+=accuracy(output,label)
return acc/len(data_iter)
7、梯度下降優化器
def SGD(params,lr):
for pa in params:
pa[:]=pa-lr*pa.grad # 參數沿着梯度的反方向走特定距離
8、訓練
(與前兩章一樣)
lr=0.5
epochs=20
for epoch in range(epochs):
train_loss=0
train_acc=0
for image,y in train_data:
image,y=transform(image,y) # 類型轉換,數據歸一化
with ag.record():
output=net(image)
loss=cross_loss(output,y)
loss.backward()
# 將梯度做平均,這樣學習率不會對batch_size那麼敏感
SGD(params,lr/batch_size)
train_loss+=nd.mean(loss).asscalar()
train_acc+=accuracy(output,y)
test_acc=evaluate_accuracy(test_data,net)
print("Epoch %d, Loss:%f, Train acc:%f, Test acc:%f"
%(epoch,train_loss/len(train_data),train_acc/len(train_data),test_acc))
訓練結果:
可以看到,同樣是訓練20輪(與前兩章比較),模型的準確率已經接近90%,可以說加入了隱層,分類效果確實較好。
9、預測
(與前兩章一樣)
# 訓練完成後,可對樣本進行預測
image_10,label_10=mnist_test[:10] #拿到前10個數據
show_image(image_10)
print("真實樣本標籤:",label_10)
print("真實數字標籤對應的服飾名:",get_fashion_mnist_labels(label_10))
image_10,label_10=transform(image_10,label_10)
predict_label=net(image_10).argmax(axis=1)
print("預測樣本標籤:",predict_label.astype("int8"))
print("預測數字標籤對應的服飾名:",get_fashion_mnist_labels(predict_label.asnumpy()))
預測結果:
附上所有源碼:
import mxnet.autograd as ag
import mxnet.ndarray as nd
import mxnet.gluon as gn
def transform(data, label):
return data.astype("float32") / 255, label.astype("float32") # 樣本歸一化
mnist_train = gn.data.vision.FashionMNIST(train=True)
mnist_test = gn.data.vision.FashionMNIST(train=False)
data, label = mnist_train[0:9]
print(data.shape, label) # 查看數據維度
import matplotlib.pyplot as plt
def show_image(image): # 顯示圖像
n = image.shape[0]
_, figs = plt.subplots(1, n, figsize=(15, 15))
for i in range(n):
figs[i].imshow(image[i].reshape((28, 28)).asnumpy())
plt.show()
def get_fashion_mnist_labels(labels): # 顯示圖像標籤
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
#
# show_image(data)
# print(get_fashion_mnist_labels(label))
'''----數據讀取----'''
batch_size = 100
transformer = gn.data.vision.transforms.ToTensor()
train_data = gn.data.DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True)
test_data = gn.data.DataLoader(dataset=mnist_test, batch_size=batch_size, shuffle=False)
'''----初始化模型參數----'''
num_input = 28 * 28 * 1
num_output = 10
num_hidden=256 # 隱藏層神經元個數
w1 = nd.random_normal(shape=(num_input, num_hidden),scale=0.01) # 初始化(0.01內的值)
b1 = nd.zeros(shape=(num_hidden))
w2 = nd.random_normal(shape=(num_hidden, num_output),scale=0.01)
b2 = nd.zeros(shape=(num_output))
params = [w1, b1,w2,b2]
for param in params:
param.attach_grad() # 開闢臨時空間
# 定義激活函數relu
def relu(x):
return nd.maximum(0,x)
'''----定義模型----'''
# 所謂的模型就是將全連接層與relu串起來
def net(x):
x=x.reshape(-1,num_input)
h1=relu(nd.dot(x,w1)+b1)
output=nd.dot(h1,w2)+b2 # 最後一層一般不做激活
return output
# softmax和交叉熵損失函數
# 由於將它們分開會導致數值不穩定(前兩章博文的結果可以對比),所以直接使用gluon提供的API
cross_loss=gn.loss.SoftmaxCrossEntropyLoss()
# 定義準確率
def accuracy(output,label):
return nd.mean(output.argmax(axis=1)==label).asscalar()
def evaluate_accuracy(data_iter,net):# 定義測試集準確率
acc=0
for data,label in data_iter:
data,label=transform(data,label)
output=net(data)
acc+=accuracy(output,label)
return acc/len(data_iter)
# 梯度下降優化器
def SGD(params,lr):
for pa in params:
pa[:]=pa-lr*pa.grad # 參數沿着梯度的反方向走特定距離
# 訓練
lr=0.1
epochs=20
for epoch in range(epochs):
train_loss=0
train_acc=0
for image,y in train_data:
image,y=transform(image,y) # 類型轉換,數據歸一化
with ag.record():
output=net(image)
loss=cross_loss(output,y)
loss.backward()
# 將梯度做平均,這樣學習率不會對batch_size那麼敏感
SGD(params,lr/batch_size)
train_loss+=nd.mean(loss).asscalar()
train_acc+=accuracy(output,y)
test_acc=evaluate_accuracy(test_data,net)
print("Epoch %d, Loss:%f, Train acc:%f, Test acc:%f"
%(epoch,train_loss/len(train_data),train_acc/len(train_data),test_acc))
'''----預測-------'''
# 訓練完成後,可對樣本進行預測
image_10,label_10=mnist_test[:10] #拿到前10個數據
show_image(image_10)
print("真實樣本標籤:",label_10)
print("真實數字標籤對應的服飾名:",get_fashion_mnist_labels(label_10))
image_10,label_10=transform(image_10,label_10)
predict_label=net(image_10).argmax(axis=1)
print("預測樣本標籤:",predict_label.astype("int8"))
print("預測數字標籤對應的服飾名:",get_fashion_mnist_labels(predict_label.asnumpy()))