import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
print("torch", torch.__version__)
print("torchvision", torchvision.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch 1.2.0+cu92
torchvision 0.4.0+cpu
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1,
out_channels=20,
kernel_size=5,
stride=1) #todo: whats the params?
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(4*4*50, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = nn.functional.relu(self.conv1(x)) # 1*28*28 -> 20*24*24
x = nn.functional.max_pool2d(x, kernel_size=2, stride=2) # -> 20*12*12
x = nn.functional.relu(self.conv2(x)) # -> 50*8*8
x = nn.functional.max_pool2d(x, 2, 2) # -> 50*4*4
x = x.view(-1, 4*4*50) #
x = nn.functional.relu(self.fc1(x))
x = self.fc2(x)
return nn.functional.log_softmax(x, dim=1)
# 求平均值和標準差
data = [d[0].data.cpu().numpy() for d in mnist_data]
print(np.mean(data))
print(np.std(data))
batch_size = 32
train_dataloader = torch.utils.data.DataLoader(datasets.MNIST("./mnist_data", train=True, download=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=(0.13066062,),std=(0.30810776,))
])),
batch_size=batch_size,
shuffle=True,
num_workers=0,
pin_memory=True) # 把一些數據固定到內存中
test_dataloader = torch.utils.data.DataLoader(datasets.MNIST("./mnist_data", train=False, download=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=(0.13066062,),std=(0.30810776,))
])),
batch_size=batch_size,
shuffle=False,
num_workers=0,
pin_memory=True)
# Data loader. Combines a dataset and a sampler, and provides an iterable over
# the given dataset.
def train(model, train_loader, optimizer, epoch):
model.train()
for idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
pred = model(data) # shape: batch_size * 10
loss = nn.functional.nll_loss(pred, target)
loss.backward()
if idx % 10 == 0:
print(f"Train Epoch: {epoch}, iteration: {idx}, Loss: {loss.item()}")
def test(model, test_loader):
model.eval()
total_loss = 0.
correct = 0.
with torch.no_grad():
for idx, (data, target) in enumerate(test_loader):
data, target = data.to(device), target.to(device)
pred = model(data).argmax(dim=1) # shape: batch_size * 1
loss = nn.functional.nll_loss(pred, target, reduction='sum').item()
correct += pred.eq(target.view_as(pred)).sum().item()
if idx % 10 == 0:
print(f"Test Epoch: {epoch}, iteration: {idx}, Loss: {loss.item()}")
total_loss /= len(test_loader.dataset)
acc = correct / len(test_loader.dataset) * 100
print(f"Test loss: {total_loss}, Accuracy: {acc}")
lr = 0.01
momentum = 0.5
model = CNNModel().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
num_epochs = 2
for epoch in range(num_epochs):
train(model, train_dataloader, optimizer, epoch)
test(model, test_dataloader)
torch.save(model.state_dict(), "mnist_cnn.pytorch_model.bin")
Train Epoch: 0, iteration: 0, Loss: 2.318666934967041
Train Epoch: 0, iteration: 10, Loss: 2.2817282676696777
Train Epoch: 0, iteration: 20, Loss: 2.321716070175171
Train Epoch: 0, iteration: 30, Loss: 2.3526699542999268
Train Epoch: 0, iteration: 40, Loss: 2.3152010440826416
Train Epoch: 0, iteration: 50, Loss: 2.2989394664764404
...
Train Epoch: 0, iteration: 1870, Loss: 2.276136636734009
CNN模型的遷移學習
很多時候需要訓練一個新的圖像分類任務,我們不會完全從一個隨機的模型開始訓練,而是利用預訓練的模型加速訓練的過程。我們經常使用在ImageNet上的與訓練模型。
用兩種方法做遷移學習:
- 微調:從一個訓練模型開始,改變一些模型的架構,然後繼續訓練整個模型的參數
- 特徵提取:不再改變預訓練模型的參數,而是隻更新我們改變過的部分模型參數。
構建和遷移模型的基本過程:
- 初始化與訓練模型
- 把最後一層的輸出改變成我們想要分的類別總是
- 定義一個optimizer來更新參數
- 模型訓練
import numpy as np
import torchvision
from torchvision import datasets, transforms, models
import os
print(np.__version__)
print(torchvision.__version__)
import matplotlib.pyplot as plt
import time
import copy
1.18.4
0.4.0+cpu
數據
我們會使用hymenoptera_data數據集 下載地址: https://download.pytorch.org/tutorial/hymenoptera_data.zip.
這個數據集會包括兩類圖片,bees和ants,這些數據都被處理成了可以使用ImageFolder
來讀取的格式。
我們只需要把data_dir
設置成數據的根目錄,然後把model_name
設置成我們想要使用的預訓練模型:resnet, alexnet, vgg, squeezenet, densenet, inception
其它參數有:
num_classes
表示數據集分類的參數
data_dir = "./hymenoptera_data"
model_name = "resnet"
num_classes = 2
batch_size = 32
num_epochs = 15
feature_extract = True
input_size = 224
all_imgs = datasets.ImageFolder(os.path.join(data_dir, "train"), transforms.Compose([
transforms.RandomResizedCrop(input_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]))
loader = torch.utils.data.DataLoader(all_imgs, batch_size, shuffle=True)
data_transforms = {
"train": transforms.Compose([
transforms.RandomResizedCrop(input_size),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
"val": transforms.Compose([
transforms.Resize(input_size),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
image_datasets = {
"train": datasets.ImageFolder(os.path.join(data_dir, "train"), data_transforms["train"]),
"val": datasets.ImageFolder(os.path.join(data_dir, "val"), data_transforms["val"])
}
dataloaders_dict = {
"train": torch.utils.data.DataLoader(image_datasets["train"], batch_size=batch_size, shuffle=True, num_workers=4),
"val": torch.utils.data.DataLoader(image_datasets["val"], batch_size=batch_size, shuffle=True, num_workers=4)
}
unloader = transforms.ToPILImage()
plt.ion()
def imshow(tensor, title=None):
image = tensor.cpu().clone()
image = image.squeeze(0)
image = unloader(image)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.pause(0.001) # 暫停一會等待plot更新
plt.figure()
imshow(img[4], title="Image")
def set_parameter_requires_grad(model, feature_extract):
if feature_extract:
for param in model.parameters():
param.requires_grad = False
def initialize_model(model_name, n_classes, feature_extract, use_pretrained=True):
if model_name == 'resnet':
model_ft = models.resnet18(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
n_features = model_ft.fc.in_features
model_ft.fc = nn.Linear(n_features, n_classes)
input_size = 224
return model_ft, input_size
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, True)
# Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to C:\Users\Ken/.cache\torch\checkpoints\resnet18-5c106cde.pth
def train_model(model, dataloaders, loss_fn, optimizer, n_epochs=5):
best_model_weights = copy.deepcopy(model.state_dict())
best_acc = 0.
val_acc_history = []
for epoch in range(n_epochs):
for phase in ["train", "val"]:
running_loss = 0.
running_corrects = 0
if phase=="train":
model.train()
else:
model.eval()
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
with torch.autograd.set_grad_enabled(phase=="train"):
outputs = model(inputs) # bsize * 2
loss = loss_fn(outputs, labels)
print(".", end="")
preds = outputs.argmax(dim=1)
if phase=="train":
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds.view(-1)==labels.view(-1)).item()
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects / len(dataloaders[phase].dataset)
print(f"{phase} loss: {epoch_loss}, acc:{epoch_acc}")
if phase=="val" and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_weights = copy.deepcopy(model.state_dict())
if phase=="val":
val_acc_history.append(epoch_acc)
model.load_state_dict(best_model_weights)
return model, val_acc_history
model_ft = model_ft.to(device)
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model_ft.parameters()), lr=0.001, momentum=0.9)
loss_fn = nn.CrossEntropyLoss()
_, val_acc_history = train_model(model_ft, dataloaders_dict, loss_fn, optimizer, n_epochs=num_epochs)
........train loss: 0.661804810899203, acc:0.6024590163934426
.....val loss: 0.5719855747191734, acc:0.7189542483660131
........train loss: 0.5182735797811727, acc:0.7622950819672131
..
plt.title("Validation Accuracy vs. Number of Training Epochs")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(range(1, num_epochs+1), ohist, label="Pretrained")
plt.ylim((0, 1))
plt.ticks(np.arrange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()