Pytorch學習(5)——簡單圖片分類

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
print("torch", torch.__version__)
print("torchvision", torchvision.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch 1.2.0+cu92
torchvision 0.4.0+cpu
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, 
                               out_channels=20, 
                               kernel_size=5, 
                               stride=1)  #todo: whats the params?
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)
        
    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))  # 1*28*28 -> 20*24*24
        x = nn.functional.max_pool2d(x, kernel_size=2, stride=2)  # -> 20*12*12
        x = nn.functional.relu(self.conv2(x))  # -> 50*8*8
        x = nn.functional.max_pool2d(x, 2, 2)  # -> 50*4*4
        x = x.view(-1, 4*4*50)  # 
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return nn.functional.log_softmax(x, dim=1)
# 求平均值和標準差
data = [d[0].data.cpu().numpy() for d in mnist_data]
print(np.mean(data))
print(np.std(data))
batch_size = 32

train_dataloader = torch.utils.data.DataLoader(datasets.MNIST("./mnist_data", train=True, download=False, 
                                                    transform=transforms.Compose([
                                                        transforms.ToTensor(),
                                                        transforms.Normalize(mean=(0.13066062,),std=(0.30810776,))
                                                    ])),
                                               batch_size=batch_size, 
                                               shuffle=True, 
                                               num_workers=0, 
                                               pin_memory=True)  # 把一些數據固定到內存中
test_dataloader = torch.utils.data.DataLoader(datasets.MNIST("./mnist_data", train=False, download=False, 
                                                    transform=transforms.Compose([
                                                        transforms.ToTensor(),
                                                        transforms.Normalize(mean=(0.13066062,),std=(0.30810776,))
                                                    ])),
                                               batch_size=batch_size, 
                                               shuffle=False, 
                                               num_workers=0, 
                                               pin_memory=True)
# Data loader. Combines a dataset and a sampler, and provides an iterable over
# the given dataset.
def train(model, train_loader, optimizer, epoch):
    model.train()
    for idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        pred = model(data)  # shape: batch_size * 10
        loss = nn.functional.nll_loss(pred, target)
        loss.backward()
        if idx % 10 == 0:
            print(f"Train Epoch: {epoch}, iteration: {idx}, Loss: {loss.item()}")
def test(model, test_loader):
    model.eval()
    total_loss = 0.
    correct = 0.
    with torch.no_grad():
        for idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            
            pred = model(data).argmax(dim=1)  # shape: batch_size * 1
            loss = nn.functional.nll_loss(pred, target, reduction='sum').item()
            correct += pred.eq(target.view_as(pred)).sum().item()
            
            if idx % 10 == 0:
                print(f"Test Epoch: {epoch}, iteration: {idx}, Loss: {loss.item()}")
    
    total_loss /= len(test_loader.dataset)
    acc = correct / len(test_loader.dataset) * 100
    print(f"Test loss: {total_loss}, Accuracy: {acc}")
lr = 0.01
momentum = 0.5
model = CNNModel().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

num_epochs = 2
for epoch in range(num_epochs):
    train(model, train_dataloader, optimizer, epoch)
    test(model, test_dataloader)

torch.save(model.state_dict(), "mnist_cnn.pytorch_model.bin")
Train Epoch: 0, iteration: 0, Loss: 2.318666934967041
Train Epoch: 0, iteration: 10, Loss: 2.2817282676696777
Train Epoch: 0, iteration: 20, Loss: 2.321716070175171
Train Epoch: 0, iteration: 30, Loss: 2.3526699542999268
Train Epoch: 0, iteration: 40, Loss: 2.3152010440826416
Train Epoch: 0, iteration: 50, Loss: 2.2989394664764404
...
Train Epoch: 0, iteration: 1870, Loss: 2.276136636734009

CNN模型的遷移學習

很多時候需要訓練一個新的圖像分類任務,我們不會完全從一個隨機的模型開始訓練,而是利用預訓練的模型加速訓練的過程。我們經常使用在ImageNet上的與訓練模型。

用兩種方法做遷移學習:

  • 微調:從一個訓練模型開始,改變一些模型的架構,然後繼續訓練整個模型的參數
  • 特徵提取:不再改變預訓練模型的參數,而是隻更新我們改變過的部分模型參數。

構建和遷移模型的基本過程:

  • 初始化與訓練模型
  • 把最後一層的輸出改變成我們想要分的類別總是
  • 定義一個optimizer來更新參數
  • 模型訓練
import numpy as np
import torchvision
from torchvision import datasets, transforms, models
import os
print(np.__version__)
print(torchvision.__version__)
import matplotlib.pyplot as plt
import time
import copy
1.18.4
0.4.0+cpu

數據

我們會使用hymenoptera_data數據集 下載地址: https://download.pytorch.org/tutorial/hymenoptera_data.zip.

這個數據集會包括兩類圖片,beesants,這些數據都被處理成了可以使用ImageFolder來讀取的格式。

我們只需要把data_dir設置成數據的根目錄,然後把model_name設置成我們想要使用的預訓練模型:resnet, alexnet, vgg, squeezenet, densenet, inception

其它參數有:

  • num_classes 表示數據集分類的參數
data_dir = "./hymenoptera_data"
model_name = "resnet"
num_classes = 2
batch_size = 32
num_epochs = 15
feature_extract = True

input_size = 224
all_imgs = datasets.ImageFolder(os.path.join(data_dir, "train"), transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]))
loader = torch.utils.data.DataLoader(all_imgs, batch_size, shuffle=True)
data_transforms = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    "val": transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
image_datasets = {
    "train": datasets.ImageFolder(os.path.join(data_dir, "train"), data_transforms["train"]),
    "val": datasets.ImageFolder(os.path.join(data_dir, "val"), data_transforms["val"])
}
dataloaders_dict = {
    "train": torch.utils.data.DataLoader(image_datasets["train"], batch_size=batch_size, shuffle=True, num_workers=4),
    "val": torch.utils.data.DataLoader(image_datasets["val"], batch_size=batch_size, shuffle=True, num_workers=4)
}
unloader = transforms.ToPILImage()
plt.ion()

def imshow(tensor, title=None):
    image = tensor.cpu().clone()
    image = image.squeeze(0)
    image = unloader(image)
    plt.imshow(image)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # 暫停一會等待plot更新
    
plt.figure()
imshow(img[4], title="Image")

def set_parameter_requires_grad(model, feature_extract):
    if feature_extract:
        for param in model.parameters():
            param.requires_grad = False
    

def initialize_model(model_name, n_classes, feature_extract, use_pretrained=True):
    if model_name == 'resnet':
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        n_features = model_ft.fc.in_features
        model_ft.fc = nn.Linear(n_features, n_classes)
        input_size = 224
    
    return model_ft, input_size

model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, True)

# Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to C:\Users\Ken/.cache\torch\checkpoints\resnet18-5c106cde.pth
def train_model(model, dataloaders, loss_fn, optimizer, n_epochs=5):
    best_model_weights = copy.deepcopy(model.state_dict())
    best_acc = 0.
    val_acc_history = []
    for epoch in range(n_epochs):
        for phase in ["train", "val"]:
            running_loss = 0.
            running_corrects = 0
            if phase=="train":
                model.train()
            else:
                model.eval()
            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                with torch.autograd.set_grad_enabled(phase=="train"):
                    outputs = model(inputs)  # bsize * 2
                    loss = loss_fn(outputs, labels)
                    print(".", end="")
                    
                preds = outputs.argmax(dim=1)
                if phase=="train":
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds.view(-1)==labels.view(-1)).item()
                
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
            
            print(f"{phase} loss: {epoch_loss}, acc:{epoch_acc}")
            if phase=="val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weights = copy.deepcopy(model.state_dict())
                
            if phase=="val":
                val_acc_history.append(epoch_acc)
    model.load_state_dict(best_model_weights)
    return model, val_acc_history
model_ft = model_ft.to(device)
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model_ft.parameters()), lr=0.001, momentum=0.9)
loss_fn = nn.CrossEntropyLoss()
_, val_acc_history = train_model(model_ft, dataloaders_dict, loss_fn, optimizer, n_epochs=num_epochs)
........train loss: 0.661804810899203, acc:0.6024590163934426
.....val loss: 0.5719855747191734, acc:0.7189542483660131
........train loss: 0.5182735797811727, acc:0.7622950819672131
..
plt.title("Validation Accuracy vs. Number of Training Epochs")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(range(1, num_epochs+1), ohist, label="Pretrained")
plt.ylim((0, 1))
plt.ticks(np.arrange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章