Pytorch搭建ResNet18識別MNIST例程

Autograd自動求導

PyTorch 中所有神經網絡的核心是 autograd 包,autograd包爲張量上的所有操作提供了自動求導。torch.Tensor是這個包的核心類。如果設置 .requires_grad 爲 True,那麼將會追蹤所有對於該張量的操作。 當完成計算後通過調用 .backward(),自動計算所有的梯度, 這個張量的所有梯度將會自動積累到 .grad 屬性。

import torch

x = torch.ones(2,2,requires_grad = True)
y = x+2
z = y*y*3
out = z.mean()
out.backward()
print(x.grad)
"""
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
"""

構建神經網絡

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self,x):
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)),(2))
        x = x.view(-1,self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size = x.size()[1:]
        num_features= 1
        for s in size:
            num_features*=s
        return num_features
    
net = Net()
print(net)


params = list(net.parameters())
#print(params)
input = torch.randn(1,1,32,32)
output = net(input)


target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()
loss = criterion(output,target)


net.zero_grad()     # 清除梯度
print('\nconv1.bias.grad before backward')
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

optimizer = optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad()
print('\n',net.conv1.bias)
optimizer.step()
print(net.conv1.bias)

"""
Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([ 0.0152,  0.0140,  0.0029,  0.0044, -0.0089, -0.0060])

 Parameter containing:
tensor([-0.1782, -0.0445, -0.0111,  0.0683,  0.1568,  0.1192],
       requires_grad=True)
Parameter containing:
tensor([-0.1782, -0.0445, -0.0111,  0.0683,  0.1568,  0.1192],
       requires_grad=True)
"""

爲什麼會有torch.nn和torch.nn.functional

拿conv2d舉例,在nn中conv2d是個類,torch.nn.functional中是方法,nn中也是調用的torch.nn.functional的conv2d方法,涉及參數的比如卷積層和全連接層,帶有參數的都是通過nn的模塊來實現(不然每次定義卷積操作都要添加對weight和bias的定義很麻煩,所以nn中提前把重複的工作定義好了,內部還是調用了functional),relu和maxpooling等不需要參數訓練的工作都在functional。(有時候又不需要對weight和bias操作的簡單操作就可以共functional了,就比如relu和maxpooling)

所以說還是需要兩種形式的,就說nn其實是在functional上的在包裝。

torch使用GPU進行訓練,對cuda的操作

https://blog.csdn.net/qq_21578849/article/details/85240797

主要分三步

  1. 將模型部署到GPU
  2. 將數據部署到GPU
  3. 將結果從cuda轉爲numpy

多卡訓練-DataParallel

https://blog.csdn.net/weixin_40087578/article/details/87186613

並行計算只存在在前向傳播

多卡訓練的基本過程

  • 首先把模型加載到一個主設備
  • 把模型只讀複製到多個設備
  • 把大的batch數據也等分到不同的設備
  • 最後將所有設備計算得到的梯度合併更新主設備上的模型參數

搭建ResNet18識別MNIST例程

環境

torch1.0.0

torchvision0.20

包含mnist.py和myNet.py倆個腳本

mnist數據下載需要科學上網,或者將單獨下載好的四個文件所在的文件路徑,替換掉腳本的數據加載部分,如圖。可以在我github裏找到數據

https://github.com/AishuaiYao/PyTorch

mnist.py

import torch
import torch.nn.functional as  F
import torch.optim as optim
from torch.utils import data
from torchvision import datasets,transforms
from torchsummary import summary
import myNet

batch_size = 128
epochs = 20
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data = datasets.MNIST('./data', train=True, download=True,transform=transforms.Compose(
                            [transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
test_data =  datasets.MNIST('./data', train=False, transform=transforms.Compose(
                            [transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))


train_loader = torch.utils.data.DataLoader(train_data,batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data,batch_size=batch_size, shuffle=True)

# model = myNet.ConvNet().to(device)
model = myNet.ResNet().to(device)
summary(model,(1,28,28))
optimizer = optim.Adam(model.parameters())

def train(model,device,train_loader,optimizer,epoch):
    model.train()
    for batch_idx,(data,target) in enumerate(train_loader):
        data,target = data.to(device),target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output,target)
        loss.backward()
        optimizer.step()
        if (batch_idx)%30 == 0:
            print('train {} epoch : {}/{} \t loss : {:.6f}'.format(
                                                    epoch,batch_idx*len(data),len(train_loader.dataset),loss.item()))


def test(model,device,test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():#如果.requires_grad=True但是你又不希望進行autograd的計算, 那麼可以將變量包裹在 with torch.no_grad()中
        for data,target in test_loader:
            data,target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output,target,reduction='sum').item()
            pred = output.max(1,keepdim = True)[1]
            correct +=pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nvalid loss : {:.4f} \t accuracy : {:.3f}%\n'.format(
                                                    test_loss,100. * correct / len(test_loader.dataset)))

for epoch in range(epochs):
    train(model,device,train_loader,optimizer,epoch)
    test(model,device,test_loader)

myNet.py

import torch
import torch.nn as nn
import torch.nn.functional as F


class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,10,5)
        self.conv2 = nn.Conv2d(10,20,3)
        self.fc1 = nn.Linear(20*10*10,500)
        self.fc2 = nn.Linear(500,10)

    def forward(self, x):
        in_size = x.size(0)
        out = self.conv1(x)
        out = F.relu(out)
        out = F.max_pool2d(out,2,2)

        out = self.conv2(out)
        out = F.relu(out)
        out = out.view(in_size,-1)#torch.view: 可以改變張量的維度和大小,與Numpy的reshape類似

        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        out = F.log_softmax(out,dim = 1)
        return out


class BasicBlock(nn.Module):
    def __init__(self,inchannel,outchannel,s = 1):
        nn.Module.__init__(self)
        self.left = nn.Sequential(
            nn.Conv2d(inchannel,outchannel,kernel_size=3,stride = s,padding=1),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace = True),
            nn.Conv2d(outchannel,outchannel,kernel_size=3,stride = 1,padding=1),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if s != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel,outchannel,kernel_size=1,stride =s),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self,x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self,residualBlock=BasicBlock,n_class=10):
        nn.Module.__init__(self)
        self.inchannel = 64
        self.conv1  = nn.Sequential(
            nn.Conv2d(1,64,kernel_size=7,stride = 2,padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace = True),
        )
        self.pooling = nn.Sequential(nn.MaxPool2d(kernel_size=3,stride=2,padding=1))

        self.layer1 = self.maker_layer(residualBlock,64,2,s = 1)
        self.layer2 = self.maker_layer(residualBlock,128,2,s = 2)
        self.layer3 = self.maker_layer(residualBlock,256,2,s = 2)
        self.layer4 = self.maker_layer(residualBlock,512,2,s = 2)
        self.fc = nn.Linear(512,n_class)


    def maker_layer(self,block,channels,n_blocks,s):
        strides = [s]+[1]*(n_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel,channels,stride))
            self.inchannel = channels
        return nn.Sequential(*layers)


    def forward(self,x):
        out = self.conv1(x)
        out = self.pooling(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out,4)
        out = out.view(out.size(0),-1)
        out = self.fc(out)

        return out

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章