直接提供代碼
一共分四個文件
trainer.py用於接受參數開始訓練
layer.py用於定義每個層的屬性和功能
net.py用於構建整個神經網絡
mlp用於讀取mnist數據集並進行訓練
記得把trainer,layer,net這三個文件拷到同一個文件夾下,方便mlp文件調用這三個文件的類
DogeTrainer.py
from dogelearning.DogeLayer import Layer
from dogelearning.DogeNet import Net
from tqdm.std import trange
import numpy as np
class Trainer:
def train(self,net,train_loader,batch_size,optimizer='sgd',epoch_num=50):
list = []
for i in trange(0, epoch_num):
for batch_idx, (data, target) in enumerate(train_loader):
if (data.shape[0] < batch_size):
break
data = np.squeeze(data.numpy()).reshape(batch_size, 784) # 把張量中維度爲1的維度去掉,並且改變維度爲(64,784)
target = target.numpy() # x矩陣 (64,10)
y_hat = net.forward(data)
net.backward( np.eye(10)[target] )
list.append(Accuracy(target, y_hat))
if (batch_idx == 50):
print("準確率爲" + str(Accuracy(target, y_hat)))
return list
def Accuracy(target, y_hat):
# y_hat.argmax(axis=1)==target 用於比較y_hat與target的每個元素,返回一個布爾數組
acc = y_hat.argmax(axis=1) == target
acc = acc + 0 # 將布爾數組轉爲0,1數組
return acc.mean() # 通過求均值算出準確率
DogeNet.py
from dogelearning.DogeLayer import *
class Net:
layers=[]
batch_size=0
input_num=0
def __init__(self,batch_size,input_num):
self.batch_size=batch_size
self.input_num=input_num
pass
def add(self,layer_type,node_num,activation=""):
if (len(self.layers)==0):
last_node_num=self.input_num
else:
last_node_num=self.layers[-1].node_num #獲取上一層的節點個數
if (layer_type=='Softmax'):
self.layers.append(Softmax((node_num)))
else:
self.layers.append(Layer(last_node_num,node_num,self.batch_size,activation))
def forward(self,data):
for layer in self.layers:
data=layer.forward(data)
return data #返回最後輸出的data用於反向傳播
def backward(self,y_hat):
dydx=y_hat
for layer in reversed(self.layers):
dydx=layer.backward(dydx)
def print(self):
print("網絡名 節點個數 激活函數")
for layer in self.layers:
print(layer,layer.node_num,layer.activation)
DogeLayer.py
# import minpy.numpy as mnp
import numpy as np
class Layer:
lamda = 3 # 正則化懲罰係數
w=0
b=0
last_node_num=0
node_num=0
batch_size=0
activation=''
learning_rate=0.1
x=0
activation_data =0
def __init__(self,last_node_num,node_num,batch_size,activation):
self.last_node_num=last_node_num
self.node_num=node_num
self.w = np.random.normal(scale=0.01, size=(last_node_num,node_num)) # 生成隨機正太分佈的w矩陣
self.b = np.zeros((batch_size, node_num))
self.activation=activation
self.batch_size=batch_size
def forward(self,data):
self.x=data
data=np.dot(data, self.w) + self.b
if self.activation=="Sigmoid":
data=1 / (1 + np.exp(-data))
# print(data.mean())
if self.activation=="Tahn":
data = (np.exp(data)- np.exp(-data)) / (np.exp(data)+ np.exp(-data))
if self.activation == "Relu":
data= (np.abs(data)+data)/2.0
self.activation_data = data
return data
def backward(self,y):
if self.activation == "Sigmoid":
y = self.activation_data * (1 - self.activation_data) * y
if self.activation == "Tahn":
y = (1 - self.activation_data**2) * y
if self.activation=="Relu":
self.activation_data[self.activation_data <= 0] = 0
self.activation_data[self.activation_data > 0] = 1
y = self.activation_data *y
w_gradient=np.dot(self.x.T, y)
b_gradient=y
y=np.dot(y,self.w.T)
self.w = self.w - (w_gradient+(self.lamda*self.w)) / self.batch_size * self.learning_rate
self.b = self.b - b_gradient / self.batch_size * self.learning_rate
return y
class Softmax (Layer):
y_hat=[]
def __init__(self,node_num):
self.node_num=node_num
pass
def forward(self,data):
data = np.exp(data.T) # 先把每個元素都進行exp運算
# print(label)
sum = data.sum(axis=0) # 對於每一行進行求和操作
# print((label/sum).T.sum(axis=1))
self.y_hat=(data / sum).T
return self.y_hat # 通過廣播機制,使每行分別除以各種的和
def backward(self,y):
return self.y_hat-y
MLP.py
from torchvision import datasets, transforms
import torch.utils.data as Data
# import minpy.numpy as mnp
# import numpy as np
from dogelearning.DogeNet import Net
from dogelearning.DogeLayer import Layer
from dogelearning.DogeTrainer import Trainer
batch_size = 256
learning_rate=0.001
def load_data():
# 加載torchvision包內內置的MNIST數據集 這裏涉及到transform:將圖片轉化成torchtensor
train_dataset = datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True)
# 加載小批次數據,即將MNIST數據集中的data分成每組batch_size的小塊,shuffle指定是否隨機讀取
train_loader = Data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
return train_loader
if __name__ == '__main__':
train_loader=load_data() #加載數據(這裏使用pytorch加載數據,後面用numpy手寫)
net = Net(batch_size,784)
print(net.batch_size)
# net.add("",256,activation="Sigmoid")
# net.add("", 64, activation="Sigmoid")
# net.add("", 10, activation="Sigmoid")
# net.add("Softmax", 10)
# net.add("", 256, activation="Tahn")
# net.add("", 64, activation="Tahn")
# net.add("", 10, activation="Tahn")
# net.add("Softmax", 10)
net.add("", 256, activation="Relu")
net.add("", 64, activation="Relu")
net.add("", 10, activation="Relu")
net.add("Softmax", 10)
net.print()
list=Trainer.train(train_loader,net,train_loader,batch_size,epoch_num=100)
import matplotlib.pyplot as plt
plt.plot(list)
plt.show()
最終效果如下
網絡名 節點個數 激活函數
256 Relu
64 Relu
10 Relu
10
準確率爲0.3046875
1%| | 1/100 [00:07<13:08, 7.97s/it]準確率爲0.24609375
2%|▏ | 2/100 [00:16<13:14, 8.11s/it]準確率爲0.4140625
3%|▎ | 3/100 [00:28<14:59, 9.27s/it]準確率爲0.68359375
4%|▍ | 4/100 [00:38<15:13, 9.51s/it]準確率爲0.81640625
5%|▌ | 5/100 [00:50<16:12, 10.23s/it]準確率爲0.87109375
tahn的優化圖像
relu的優化圖像