vs2017 VGG16處理cifar-10數據集的PyTorch實現

這是針對於博客vs2017安裝和使用教程(詳細)的VGG16-CIFAR10項目新建示例


目錄

一、說明

二、代碼

三、結果

四、注意事項


一、說明

1.網絡框架搭建教程請參看博主博客:PyTorch 入門實戰(四)——利用Torch.nn構建卷積神經網絡

2.這裏主要展示博主的代碼運行結果,希望可以幫助到正在學習PyTorch的人們

二、代碼

1.nn_module_sample.py裏面是VGG-16(帶有BatchNorm層)的網絡,注意classifier分類器部分(全連接部分)的輸入大小根據batch大小而定

import torch.nn as nn

class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            #1
            nn.Conv2d(3,64,kernel_size=3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            #2
            nn.Conv2d(64,64,kernel_size=3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #3
            nn.Conv2d(64,128,kernel_size=3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            #4
            nn.Conv2d(128,128,kernel_size=3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #5
            nn.Conv2d(128,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            #6
            nn.Conv2d(256,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            #7
            nn.Conv2d(256,256,kernel_size=3,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #8
            nn.Conv2d(256,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #9
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #10
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            #11
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #12
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            #13
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2),
            nn.AvgPool2d(kernel_size=1,stride=1),
            )
        self.classifier = nn.Sequential(
            #14
            nn.Linear(512,4096),
            nn.ReLU(True),
            nn.Dropout(),
            #15
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            #16
            nn.Linear(4096,num_classes),
            )
        #self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x) 
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out


class testNet(nn.Module):
    def __init__(self, num_classes=10):
        super(testNet, self).__init__()
        #定義自己的網絡
        self.conv1 = nn.Conv2d(3,64,kernel_size=3,padding=1)
        self.BN1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU(True)
        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)

        layer2 = nn.Sequential()
        layer2.add_module('conv2', nn.Conv2d(64,64,kernel_size=3,padding=1))
        layer2.add_module('BN2',nn.BatchNorm2d(64))
        layer2.add_module('relu2',nn.ReLU(True))
        layer2.add_module('pool2',nn.MaxPool2d(kernel_size=2,stride=2))
        self.layer2 = layer2

        self.layer3 = nn.Sequential(
            nn.Conv2d(64,128,kernel_size=3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            )
        self.classifier = nn.Sequential(
            nn.Linear(128,256),
            nn.ReLU(True),
            nn.Dropout(),

            nn.Linear(256, 256),
            nn.ReLU(True),
            nn.Dropout(),

            nn.Linear(256,num_classes),
            )
    def forward(self,x):
        #定義自己的前向傳播方式
        out = self.conv1(x)
        out = self.BN1(out)
        out = self.relu1(out)
        out = self.pool1(out)

        out = self.layer2(out)
        out = self.layer3(out)

        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

if __name__ == '__main__':
    import torch
    #使用gpu
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    net = VGG16().to(device)
    print(net)

2.train.py:包含參數設定、圖像預處理、數據集讀取、網絡創建、損失和優化、訓練和測試部分

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import os
import argparse

from tensorboardX import SummaryWriter

from nn_module_sample import VGG16
from torch.autograd import Variable

#參數設置
parser = argparse.ArgumentParser(description='cifar10')
parser.add_argument('--lr', default=1e-2,help='learning rate')
#parser.add_argument('--batch_size',default=50,help='batch size')
parser.add_argument('--epoch',default=15,help='time for ergodic')
parser.add_argument('--pre_epoch',default=0,help='begin epoch')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') #輸出結果保存路徑
parser.add_argument('--pre_model', default=True,help='use pre-model')#恢復訓練時的模型路徑
args = parser.parse_args()

#使用gpu
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

#數據預處理
# 圖像預處理和增強
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4), #先四周填充0,再把圖像隨機裁剪成32*32
    transforms.RandomHorizontalFlip(),  #圖像一半的概率翻轉,一半的概率不翻轉
    transforms.ToTensor(),
    #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
    ])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
    ])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=0)
#Cifar-10的標籤
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#模型定義 VGG16
net = VGG16().to(device)

# 定義損失函數和優化方式
criterion = nn.CrossEntropyLoss() #損失函數爲交叉熵,多用於多分類問題
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) #優化方式爲mini-batch momentum-SGD,並採用L2正則化(權重衰減)

#使用預訓練模型
if args.pre_model:
    print("Resume from checkpoint...")
    assert os.path.isdir('checkpoint'),'Error: no checkpoint directory found'
    state = torch.load('./checkpoint/ckpt.t7')
    net.load_state_dict(state['state_dict'])
    best_test_acc = state['acc']
    pre_epoch = state['epoch']
else:
    #定義最優的測試準確率
    best_test_acc = 0
    pre_epoch = args.pre_epoch

#訓練
if __name__ == "__main__":

    writer = SummaryWriter(log_dir='./log')
    print("Start Training, VGG-16...")
    with open("acc.txt","w") as acc_f:
        with open("log.txt","w") as log_f:
            for epoch in range(pre_epoch, args.epoch):
                print('\nEpoch: %d' % (epoch + 1))
                #開始訓練
                net.train()
                print(net)
                #總損失
                sum_loss = 0.0
                #準確率
                accuracy = 0.0
                total = 0.0

                for i, data in enumerate(trainloader):
                    #準備數據
                    length = len(trainloader) #數據大小
                    inputs, labels = data #取出數據
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad() #梯度初始化爲零(因爲一個batch的loss關於weight的導數是所有sample的loss關於weight的導數的累加和)
                    inputs, labels = Variable(inputs), Variable(labels)
                    #forward + backward + optimize
                    outputs = net(inputs) #前向傳播求出預測值
                    loss = criterion(outputs, labels) #求loss
                    loss.backward() #反向傳播求梯度
                    optimizer.step() #更新參數

                    # 每一個batch輸出對應的損失loss和準確率accuracy
                    sum_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那個元素,且返回其索引
                    total += labels.size(0)
                    accuracy += predicted.eq(labels.data).cpu().sum() #預測值和真實值進行比較,將數據放到cpu上並且求和

                    print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                         % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total))

                    #寫入日誌
                    log_f.write('[epoch:%d, iter:%d] |Loss: %.03f | Acc: %.3f%% '
                         % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total))
                    log_f.write('\n')
                    log_f.flush()

                #寫入tensorboard
                writer.add_scalar('loss/train',sum_loss / (i + 1),epoch)
                writer.add_scalar('accuracy/train',100. * accuracy / total,epoch)
                #每一個訓練epoch完成測試準確率
                print("Waiting for test...")
                #在上下文環境中切斷梯度計算,在此模式下,每一步的計算結果中requires_grad都是False,即使input設置爲requires_grad=True
                with torch.no_grad():
                    accuracy = 0
                    total = 0
                    for data in testloader:
                        #開始測試
                        net.eval()

                        images, labels = data
                        images, labels = images.to(device), labels.to(device)

                        outputs = net(images)

                        _, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那個元素,且返回其索引(得分高的那一類)
                        total += labels.size(0)
                        accuracy += (predicted == labels).sum()

                    #輸出測試準確率
                    print('測試準確率爲: %.3f%%' % (100 * accuracy / total))
                    acc = 100. * accuracy / total
                    
                    #寫入tensorboard
                    writer.add_scalar('accuracy/test', acc,epoch)
                    
                    #將測試結果寫入文件
                    print('Saving model...')
                    torch.save(net.state_dict(), '%s/net_%3d.pth' % (args.outf, epoch + 1))
                    acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
                    acc_f.write('\n')
                    acc_f.flush()

                    #記錄最佳的測試準確率
                    if acc > best_test_acc:
                        print('Saving Best Model...')
                        #存儲狀態
                        state = {
                            'state_dict': net.state_dict(),
                            'acc': acc,
                            'epoch': epoch + 1,
                        }
                        #沒有就創建checkpoint文件夾
                        if not os.path.isdir('checkpoint'):
                            os.mkdir('checkpoint')
                        #best_acc_f = open("best_acc.txt","w")
                        #best_acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
                        #best_acc_f.close()
                        torch.save(state, './checkpoint/ckpt.t7')
                        best_test_acc = acc
                        #寫入tensorboard
                        writer.add_scalar('best_accuracy/test', best_test_acc,epoch)
            
            #訓練結束
            print("Training Finished, Total Epoch = %d" % epoch)
            writer.close()



三、結果

1.打開cmd或者是Anaconda Prompt輸入指令,找到你的log目錄

tensorboard --logdir 你的文件夾目錄/log

例如博主的是這樣的

                          

然後打開最後一行的網址http://DESKTOP-xxxxxx:6006(這裏每個電腦是不一樣的),例如博主的是這樣的

最終訓練準確率89%左右,測試準確率87%左右~

2.在訓練過程中還會生成datamodelcheckpoint文件夾

                          

四、注意事項

1.代碼裏參數設置部分pre_model是用來繼續訓練的,讀取的是上一次epoch存儲的checkpoint,設置爲True即可繼續訓練,否則從頭開始訓練

2.代碼裏參數設置部分lr學習率如果再訓練過程中準確率變化緩慢可以適當減小

3.注意如果沒有gpu則需要在代碼裏註銷這個部分

#使用gpu
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

並且所有的xx.to(device)都需要刪除;

或者不註銷上面的gpu使用,在每一個xx.to(device)之前加一句話

if use_cuda:

例如:

#模型定義 VGG16
if use_cuda:
    net = VGG16().to(device)
else:
    net = VGG16()

返回至原博客:vs2017安裝和使用教程(詳細)

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章