《動手學深度學習》分別使用殘差和VGG訓練fashion-mnist分類模型

圖片數據集:fashion-mnist
下載地址:https://pan.baidu.com/s/1AepwYqg_wQhy9y2S4vkL5Q 密碼:aqq3
預覽:
在這裏插入圖片描述
在這裏插入圖片描述

使用殘差網絡訓練模型

導入包

import os
import sys
import time
import math
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(torch.cuda.is_available())

通用類和函數

# 展平圖像
class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)

class GlobalAvgPool2d(nn.Module):
    """
    全局平均池化層
    可通過將普通的平均池化的窗口形狀設置成輸入的高和寬實現
    """
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=x.size()[2:])

殘差網絡

class Residual(nn.Module): 
    def __init__(self, in_channels, out_channels, use_1x1conv=False, stride=1):
        """
            use_1×1conv: 是否使用額外的1x1卷積層來修改通道數
            stride: 卷積層的步幅, resnet使用步長爲2的卷積來替代pooling的作用,是個很讚的idea
        """
        super(Residual, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(Y + X)


def resnet_block(in_channels, out_channels, num_residuals, first_block=False):
    '''
    resnet block
    num_residuals: 當前block包含多少個殘差塊
    first_block: 是否爲第一個block
    一個resnet block由num_residuals個殘差塊組成
    其中第一個殘差塊起到了通道數的轉換和pooling的作用
    後面的若干殘差塊就是完成正常的特徵提取
    '''
    if first_block:
        assert in_channels == out_channels # 第一個模塊的輸出通道數同輸入通道數一致
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(in_channels, out_channels, use_1x1conv=True, stride=2))
        else:
            blk.append(Residual(out_channels, out_channels))
    return nn.Sequential(*blk)
    
def get_resnet_net():
    # 定義resnet模型結構
    net = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),   # TODO: 縮小感受野, 縮channel
            nn.BatchNorm2d(32),
            nn.ReLU())
            #nn.ReLU(),
            #nn.MaxPool2d(kernel_size=2, stride=2))   # TODO:去掉maxpool縮小感受野
    
    # 然後是連續4個block
    net.add_module("resnet_block1", resnet_block(32, 32, 2, first_block=True))   # TODO: channel統一減半
    net.add_module("resnet_block2", resnet_block(32, 64, 2))
    net.add_module("resnet_block3", resnet_block(64, 128, 2))
    net.add_module("resnet_block4", resnet_block(128, 256, 2))
    # global average pooling
    net.add_module("global_avg_pool", GlobalAvgPool2d()) 
    # fc layer
    net.add_module("fc", nn.Sequential(FlattenLayer(), nn.Linear(256, 10)))
    
    print('打印網絡結構(主要是爲了確認如何調整)')
    print(net)
    
    return net

工具函數

# 定義加載數據集的函數
def load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065',
                            use_normalize=False, mean=None, std=None):
    """Download the fashion mnist dataset and then load into memory."""
    # 歸一化和數據增廣操作
    if use_normalize:
        normalize = transforms.Normalize(mean=[mean], std=[std])
        train_augs = transforms.Compose([transforms.RandomCrop(28, padding=2),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(), 
                    normalize])
        test_augs = transforms.Compose([transforms.ToTensor(), normalize])
    # 不歸一化,僅僅轉爲張量
    else:
        train_augs = transforms.Compose([transforms.ToTensor()])
        test_augs = transforms.Compose([transforms.ToTensor()])
    # 加載數據
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=train_augs)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=test_augs)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用額外的進程來加速讀取數據
    else:
        num_workers = 4
    # 轉爲迭代器
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size,
                                            shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size,
                                            shuffle=False, num_workers=num_workers)

    return train_iter, test_iter
    
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果沒指定device就使用net的device
        device = list(net.parameters())[0].device
    net.eval() 
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            n += y.shape[0]
    net.train() # 改回訓練模式
    return acc_sum / n
    
def train_model(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs, model_path):
    global best_test_acc
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.4f, test acc %.4f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
        if test_acc > best_test_acc:
            print('find best! save at model/best.pth')
            best_test_acc = test_acc
            torch.save(net.state_dict(), '{}/model/best.pth'.format(model_path))

圖像增廣

print('計算數據集均值標準差')
batch_size = 64  
train_iter, test_iter = load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065',
                                                use_normalize=False)
# 求整個數據集的均值
temp_sum = 0
cnt = 0
for X, y in train_iter:
    if y.shape[0] != batch_size:
        break   # 最後一個batch不足batch_size,這裏就忽略了
    channel_mean = torch.mean(X, dim=(0,2,3))  # 按channel求均值(不過這裏只有1個channel)
    cnt += 1   # cnt記錄的是batch的個數,不是圖像
    temp_sum += channel_mean[0].item()
dataset_global_mean = temp_sum / cnt
print('整個數據集的像素均值:{}'.format(dataset_global_mean))
# 求整個數據集的標準差
cnt = 0
temp_sum = 0
for X, y in train_iter:
    if y.shape[0] != batch_size:
        break   # 最後一個batch不足batch_size,這裏就忽略了
    residual = (X - dataset_global_mean) ** 2
    channel_var_mean = torch.mean(residual, dim=(0,2,3))  
    cnt += 1   # cnt記錄的是batch的個數,不是圖像
    temp_sum += math.sqrt(channel_var_mean[0].item())
dataset_global_std = temp_sum / cnt
print('整個數據集的像素標準差:{}'.format(dataset_global_std))

# 重新獲取應用了歸一化的數據集迭代器
batch_size = 256  
train_iter, test_iter = load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065',
                                                use_normalize=True,
                                                mean = dataset_global_mean, std = dataset_global_std)

訓練過程

net = get_resnet_net()
# model_path = "2_baseline"
# net.load_state_dict(torch.load('{}/model/best.pth'.format(model_path)))
net = net.to(device)

best_test_acc = 0
# optimizer = optim.Adam(net.parameters(), lr=lr)
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) 

lr, num_epochs = 0.001, 20
print('訓練...')
train_model(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs, model_path)

訓練模型使訓練集準確率趨近於1,大概100 epoch後:

training on  cuda
epoch 1, loss 0.0539, train acc 0.9807, test acc 0.9370, time 59.1 sec
epoch 2, loss 0.0556, train acc 0.9799, test acc 0.9354, time 59.0 sec
epoch 3, loss 0.0504, train acc 0.9820, test acc 0.9380, time 59.0 sec
epoch 4, loss 0.0536, train acc 0.9811, test acc 0.9383, time 59.0 sec
epoch 5, loss 0.0526, train acc 0.9813, test acc 0.9349, time 59.0 sec
epoch 6, loss 0.0519, train acc 0.9815, test acc 0.9399, time 59.0 sec
epoch 7, loss 0.0509, train acc 0.9821, test acc 0.9399, time 59.0 sec
epoch 8, loss 0.0496, train acc 0.9821, test acc 0.9392, time 59.0 sec
epoch 9, loss 0.0526, train acc 0.9812, test acc 0.9382, time 59.0 sec
epoch 10, loss 0.0501, train acc 0.9821, test acc 0.9393, time 59.1 sec
epoch 11, loss 0.0488, train acc 0.9827, test acc 0.9340, time 59.0 sec
epoch 12, loss 0.0512, train acc 0.9813, test acc 0.9360, time 59.1 sec
epoch 13, loss 0.0471, train acc 0.9831, test acc 0.9383, time 59.0 sec
epoch 14, loss 0.0455, train acc 0.9837, test acc 0.9404, time 59.0 sec
epoch 15, loss 0.0470, train acc 0.9836, test acc 0.9376, time 59.0 sec
epoch 16, loss 0.0470, train acc 0.9837, test acc 0.9403, time 59.0 sec
epoch 17, loss 0.0467, train acc 0.9831, test acc 0.9365, time 59.0 sec
epoch 18, loss 0.0469, train acc 0.9833, test acc 0.9392, time 59.0 sec
epoch 19, loss 0.0480, train acc 0.9825, test acc 0.9379, time 59.0 sec
epoch 20, loss 0.0459, train acc 0.9838, test acc 0.9396, time 59.1 sec

預測

for X, y in train_iter:
    X = X.to(device)
    predict_y = net(X)
    print(y)
    print(predict_y.argmax(dim=1))
    break

# predict_y.argmax(dim=1)

test_acc = evaluate_accuracy(test_iter, net)
print("test_acc", test_acc)

使用VGG網絡訓練模型

將上面的殘差網絡部分的定義替換爲下面的部分,然後

def vgg_block(num_convs, in_channels, out_channels): #卷積層個數,輸入通道數,輸出通道數
    blk = []
    for i in range(num_convs):
        if i == 0:
            blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        else:
            blk.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))
        blk.append(nn.ReLU())
    blk.append(nn.MaxPool2d(kernel_size=2, stride=2)) # 這裏會使寬高減半
    return nn.Sequential(*blk)

def vgg(conv_arch, fc_features, fc_hidden_units=4096):
    net = nn.Sequential()
    # 卷積層部分
    for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch):
        # 每經過一個vgg_block都會使寬高減半
        net.add_module("vgg_block_" + str(i+1), vgg_block(num_convs, in_channels, out_channels))
    # 全連接層部分
    net.add_module("fc", nn.Sequential(FlattenLayer(),
                                 nn.Linear(fc_features, fc_hidden_units),
                                 nn.ReLU(),
                                 nn.Dropout(0.5),
                                 nn.Linear(fc_hidden_units, fc_hidden_units),
                                 nn.ReLU(),
                                 nn.Dropout(0.5),
                                 nn.Linear(fc_hidden_units, 10)
                                ))
    return net
    
def get_vgg():
    conv_arch = ((1, 1, 64), (1, 64, 128))
    # 經過5個vgg_block, 寬高會減半5次, 變成 224/32 = 7
    fc_features = 128 * 7 * 7 # c * w * h
    fc_hidden_units = 4096 # 任意
    
    net = vgg(conv_arch, fc_features, fc_hidden_units)
    
    return net

然後將訓練過程的第一句修改爲下面的語句:

net = get_vgg()

即可開始訓練。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章