pytorch—CNN卷積神經網絡實現mnist手寫體識別

  • 接上篇文章的線性模型是一個實驗,這次使用pytorch實現神經網絡LENET5手寫識別MNIST
  • 卷積層塊裏的基本單位是卷積層後接平均池化層:卷積層用來識別圖像裏的空間模式,如線條和物體局部,之後的平均池化層則用來降低卷積層對位置的敏感性。
    科普一下LENET5:
    手寫字體識別模型LeNet5誕生於1994年,是最早的卷積神經網絡之一。
    LeNet5通過巧妙的設計,利用卷積、參數共享、池化等操作提取特徵,避免了大量的計算成本,
    最後再使用全連接神經網絡進行分類識別,這個網絡也是最近大量神經網絡架構的起點。

    在這裏插入圖片描述
  • 每層的參數個數的計算 原理 在備註中
  • 每層輸出的features map的shape 如何得出
  • 代碼中包含模型的調試信息
  • 代碼以兼容運行在GPU或者CPU上
  • 都在備註中有體現,詳細看代碼
  • MNIST的數據集 沒有上傳,可以去Google查找,或者給我留言我郵件給你均可
  • 代碼可以直接跑起來,不能運行的代碼都是耍流氓
  • 話不多說 直接上代碼:

    #引入使用庫
    import torch
    import torch.nn as nn
    import torch.optim as optim
    import time
    import torchvision
    import torchvision.transforms as transforms
    from torchviz import make_dot
    import matplotlib.pyplot as plt

net

#戰平 操作
class Flatten(torch.nn.Module):
def forward(self,x):
return x.view(x.shape[0],-1)

#將圖像 大小 重新定製
class Reshape(torch.nn.Module):
def forward(self, x):
return x.view(-1,1,28,28)
#創建 堆棧模型

net = torch.nn.Sequential(
Reshape(),# 將圖像裁剪大小
# 2維卷積 輸入1維 輸出6維 kernel_size = 5 padding =2 strid = 1 參數個數 num_nerual * input_channel kernel_size 65*5
# 輸出 features Map (input size + 2 padding -kernelsize)/ stride +1
nn.Conv2d(in_channels=1,out_channels=6,kernel_size=5,padding=2),
# 激活函數
#nn.Sigmoid(),
#nn.ReLU(),
nn.Tanh(),
# 池化操作 降低卷積層對位置的敏感程度
# (28 - 2)/2 +1 輸出feature map 14 14
nn.AvgPool2d(kernel_size=2,stride=2),
nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5),
#nn.Sigmoid(),
#nn.ReLU(),
nn.Tanh(),
nn.AvgPool2d(kernel_size=2,stride=2),
Flatten(),
nn.Linear(in_features=16
5
5,out_features=120),
#nn.Sigmoid(),
#nn.ReLU(),
nn.Tanh(),
nn.Linear(120,84),
nn.Sigmoid(),
nn.Linear(84,10)
)

讀取數據

batch_size = 256
num_workers = 4

#train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size = batch_size,root = “./datasets/input/FashionMNIST2065”)

mnist_train = torchvision.datasets.FashionMNIST(root=’./dataset/input/FashionMNIST2065’, train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root=’./dataset/input/FashionMNIST2065’, train=False, download=False, transform=transforms.ToTensor())

train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

#檢測可以運行設備
def try_gpu():
if torch.cuda.is_available():
device = torch.device(“cuda0”)
else:
device = torch.device(“cpu”)
return device

#計算準確率
def evaluate_accuracy(data_iter,net,device = torch.device(“cpu”)):

acc_sum ,n = torch.tensor([0],dtype=torch.float32,device=device),0

for X,y in data_iter:
    X,y = X.to(device),y.to(device)
    net.eval()
    with torch.no_grad():
        y = y.long()
        acc_sum += torch.sum((torch.argmax(net(X),dim=1) == y))
        n += y.shape[0]
return acc_sum.item() / n

def train_ch5(net,train_iter,test_iter,criterion,num_epochs,batch_size,device,lr = None):
print("train on ",device)

net.to(device)
optimizer = optim.SGD(net.parameters(),lr= lr)

for epoch in range(num_epochs):
    train_l_sum = torch.tensor([0.0],dtype=torch.float32,device = device)
    train_acc_sum = torch.tensor([0.0],dtype=torch.float32,device = device)
    n,start = 0,time.time()
    for X,y in train_iter:
        optimizer.zero_grad()
        X,y = X.to(device),y.to(device)
        y_hat = net(X)
        loss = criterion(y_hat,y)
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            y = y.long()
            train_l_sum += loss.float()
            train_acc_sum+= (torch.sum((torch.argmax(y_hat,dim=1)== y))).float()
            n += y.shape[0]

    test_acc = evaluate_accuracy(test_iter,net,device)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
          'time %.1f sec'
          % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
             time.time() - start))

lr,num_epochs = 0.1,10

def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
torch.nn.init.xavier_uniform_(m.weight)

net.apply(init_weights)
criterion = nn.CrossEntropyLoss() #交叉熵 描述了兩個概率分佈之間的距離,交叉熵越小說明兩者越接近

train_ch5(net,train_iter,test_iter,criterion,num_epochs,batch_size,device,lr)

for testdata,test_label in test_iter:
testdata,test_label = testdata.to(device),test_label.to(device)
break

print(testdata.shape,test_label.shape)

net.eval()

y_pre = net(testdata)

print(torch.argmax(y_pre,dim=1)[:10])

print(test_label[:10])

print(“well done”)

  • 項目中沒有精細的調整
  • 只調整了激活函數對準確率的影響
  • 調整了lr對準確率的影響
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章