這是針對於博客vs2017安裝和使用教程(詳細)的VGG16-CIFAR10項目新建示例
目錄
一、說明
1.網絡框架搭建教程請參看博主博客:PyTorch 入門實戰(四)——利用Torch.nn構建卷積神經網絡
2.這裏主要展示博主的代碼和運行結果,希望可以幫助到正在學習PyTorch的人們
二、代碼
1.nn_module_sample.py:裏面是VGG-16(帶有BatchNorm層)的網絡,注意classifier分類器部分(全連接部分)的輸入大小根據batch大小而定
import torch.nn as nn
class VGG16(nn.Module):
def __init__(self, num_classes=10):
super(VGG16, self).__init__()
self.features = nn.Sequential(
#1
nn.Conv2d(3,64,kernel_size=3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
#2
nn.Conv2d(64,64,kernel_size=3,padding=1),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#3
nn.Conv2d(64,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
#4
nn.Conv2d(128,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#5
nn.Conv2d(128,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
#6
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
#7
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#8
nn.Conv2d(256,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#9
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#10
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
#11
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#12
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
#13
nn.Conv2d(512,512,kernel_size=3,padding=1),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.AvgPool2d(kernel_size=1,stride=1),
)
self.classifier = nn.Sequential(
#14
nn.Linear(512,4096),
nn.ReLU(True),
nn.Dropout(),
#15
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
#16
nn.Linear(4096,num_classes),
)
#self.classifier = nn.Linear(512, 10)
def forward(self, x):
out = self.features(x)
out = out.view(out.size(0), -1)
out = self.classifier(out)
return out
class testNet(nn.Module):
def __init__(self, num_classes=10):
super(testNet, self).__init__()
#定義自己的網絡
self.conv1 = nn.Conv2d(3,64,kernel_size=3,padding=1)
self.BN1 = nn.BatchNorm2d(64)
self.relu1 = nn.ReLU(True)
self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)
layer2 = nn.Sequential()
layer2.add_module('conv2', nn.Conv2d(64,64,kernel_size=3,padding=1))
layer2.add_module('BN2',nn.BatchNorm2d(64))
layer2.add_module('relu2',nn.ReLU(True))
layer2.add_module('pool2',nn.MaxPool2d(kernel_size=2,stride=2))
self.layer2 = layer2
self.layer3 = nn.Sequential(
nn.Conv2d(64,128,kernel_size=3,padding=1),
nn.BatchNorm2d(128),
nn.ReLU(True),
)
self.classifier = nn.Sequential(
nn.Linear(128,256),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(256, 256),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(256,num_classes),
)
def forward(self,x):
#定義自己的前向傳播方式
out = self.conv1(x)
out = self.BN1(out)
out = self.relu1(out)
out = self.pool1(out)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0), -1)
out = self.classifier(out)
return out
if __name__ == '__main__':
import torch
#使用gpu
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
net = VGG16().to(device)
print(net)
2.train.py:包含參數設定、圖像預處理、數據集讀取、網絡創建、損失和優化、訓練和測試部分
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import os
import argparse
from tensorboardX import SummaryWriter
from nn_module_sample import VGG16
from torch.autograd import Variable
#參數設置
parser = argparse.ArgumentParser(description='cifar10')
parser.add_argument('--lr', default=1e-2,help='learning rate')
#parser.add_argument('--batch_size',default=50,help='batch size')
parser.add_argument('--epoch',default=15,help='time for ergodic')
parser.add_argument('--pre_epoch',default=0,help='begin epoch')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') #輸出結果保存路徑
parser.add_argument('--pre_model', default=True,help='use pre-model')#恢復訓練時的模型路徑
args = parser.parse_args()
#使用gpu
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
#數據預處理
# 圖像預處理和增強
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4), #先四周填充0,再把圖像隨機裁剪成32*32
transforms.RandomHorizontalFlip(), #圖像一半的概率翻轉,一半的概率不翻轉
transforms.ToTensor(),
#transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])
transform_test = transforms.Compose([
transforms.ToTensor(),
#transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=0)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=0)
#Cifar-10的標籤
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
#模型定義 VGG16
net = VGG16().to(device)
# 定義損失函數和優化方式
criterion = nn.CrossEntropyLoss() #損失函數爲交叉熵,多用於多分類問題
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) #優化方式爲mini-batch momentum-SGD,並採用L2正則化(權重衰減)
#使用預訓練模型
if args.pre_model:
print("Resume from checkpoint...")
assert os.path.isdir('checkpoint'),'Error: no checkpoint directory found'
state = torch.load('./checkpoint/ckpt.t7')
net.load_state_dict(state['state_dict'])
best_test_acc = state['acc']
pre_epoch = state['epoch']
else:
#定義最優的測試準確率
best_test_acc = 0
pre_epoch = args.pre_epoch
#訓練
if __name__ == "__main__":
writer = SummaryWriter(log_dir='./log')
print("Start Training, VGG-16...")
with open("acc.txt","w") as acc_f:
with open("log.txt","w") as log_f:
for epoch in range(pre_epoch, args.epoch):
print('\nEpoch: %d' % (epoch + 1))
#開始訓練
net.train()
print(net)
#總損失
sum_loss = 0.0
#準確率
accuracy = 0.0
total = 0.0
for i, data in enumerate(trainloader):
#準備數據
length = len(trainloader) #數據大小
inputs, labels = data #取出數據
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad() #梯度初始化爲零(因爲一個batch的loss關於weight的導數是所有sample的loss關於weight的導數的累加和)
inputs, labels = Variable(inputs), Variable(labels)
#forward + backward + optimize
outputs = net(inputs) #前向傳播求出預測值
loss = criterion(outputs, labels) #求loss
loss.backward() #反向傳播求梯度
optimizer.step() #更新參數
# 每一個batch輸出對應的損失loss和準確率accuracy
sum_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那個元素,且返回其索引
total += labels.size(0)
accuracy += predicted.eq(labels.data).cpu().sum() #預測值和真實值進行比較,將數據放到cpu上並且求和
print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total))
#寫入日誌
log_f.write('[epoch:%d, iter:%d] |Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * accuracy / total))
log_f.write('\n')
log_f.flush()
#寫入tensorboard
writer.add_scalar('loss/train',sum_loss / (i + 1),epoch)
writer.add_scalar('accuracy/train',100. * accuracy / total,epoch)
#每一個訓練epoch完成測試準確率
print("Waiting for test...")
#在上下文環境中切斷梯度計算,在此模式下,每一步的計算結果中requires_grad都是False,即使input設置爲requires_grad=True
with torch.no_grad():
accuracy = 0
total = 0
for data in testloader:
#開始測試
net.eval()
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)#返回每一行中最大值的那個元素,且返回其索引(得分高的那一類)
total += labels.size(0)
accuracy += (predicted == labels).sum()
#輸出測試準確率
print('測試準確率爲: %.3f%%' % (100 * accuracy / total))
acc = 100. * accuracy / total
#寫入tensorboard
writer.add_scalar('accuracy/test', acc,epoch)
#將測試結果寫入文件
print('Saving model...')
torch.save(net.state_dict(), '%s/net_%3d.pth' % (args.outf, epoch + 1))
acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
acc_f.write('\n')
acc_f.flush()
#記錄最佳的測試準確率
if acc > best_test_acc:
print('Saving Best Model...')
#存儲狀態
state = {
'state_dict': net.state_dict(),
'acc': acc,
'epoch': epoch + 1,
}
#沒有就創建checkpoint文件夾
if not os.path.isdir('checkpoint'):
os.mkdir('checkpoint')
#best_acc_f = open("best_acc.txt","w")
#best_acc_f.write("epoch = %03d, accuracy = %.3f%%" % (epoch + 1, acc))
#best_acc_f.close()
torch.save(state, './checkpoint/ckpt.t7')
best_test_acc = acc
#寫入tensorboard
writer.add_scalar('best_accuracy/test', best_test_acc,epoch)
#訓練結束
print("Training Finished, Total Epoch = %d" % epoch)
writer.close()
三、結果
1.打開cmd或者是Anaconda Prompt輸入指令,找到你的log目錄
tensorboard --logdir 你的文件夾目錄/log
例如博主的是這樣的
然後打開最後一行的網址http://DESKTOP-xxxxxx:6006(這裏每個電腦是不一樣的),例如博主的是這樣的
最終訓練準確率在89%左右,測試準確率在87%左右~
2.在訓練過程中還會生成data、model和checkpoint文件夾
四、注意事項
1.代碼裏參數設置部分pre_model是用來繼續訓練的,讀取的是上一次epoch存儲的checkpoint,設置爲True即可繼續訓練,否則從頭開始訓練
2.代碼裏參數設置部分lr學習率如果再訓練過程中準確率變化緩慢可以適當減小
3.注意如果沒有gpu則需要在代碼裏註銷這個部分
#使用gpu
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
並且所有的xx.to(device)都需要刪除;
或者不註銷上面的gpu使用,在每一個xx.to(device)之前加一句話:
if use_cuda:
例如:
#模型定義 VGG16
if use_cuda:
net = VGG16().to(device)
else:
net = VGG16()
返回至原博客:vs2017安裝和使用教程(詳細)