李宏毅機器學習課程筆記-7.4基於CNN和PyTorch的食物圖片分類

本文爲作者學習李宏毅機器學習課程時參照樣例完成homework3的記錄。

全部課程PPT、數據和代碼下載鏈接:

鏈接:https://pan.baidu.com/s/1n_N7aoaNxxwqO03EmV5Bjg 提取碼:tpmc 複製這段內容後打開百度網盤手機App,操作更方便哦

  • 任務描述

    通過CNN實現食物圖片分類,數據集已提供

  • 數據集描述

    11個圖片類別,訓練集中有9866張圖片,驗證集中有3430張圖片,測試集中有3347張圖片。

    訓練集和驗證集中圖片命名格式爲類別_編號.jpg,編號不重要。

  • 代碼

import os
import cv2
import time
import torch
import torch.nn as nn
from torch.nn.modules.activation import ReLU
from torch.nn.modules.batchnorm import BatchNorm2d
from torch.nn.modules.pooling import MaxPool1d, MaxPool2d
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import numpy as np


"""加載數據"""
def read_files(dir_path): # 讀取文件夾中的所有圖片
    filenames = sorted(os.listdir(dir_path))
    x = np.zeros((len(filenames), 128, 128, 3), dtype=np.uint8) # (N,H,W,C)
    y = np.zeros((len(filenames)), dtype=np.uint8)
    for i, filename in enumerate(filenames):
        img = cv2.imread(os.path.join(dir_path, filename))
        x[i, : , :] = cv2.resize(img, (128, 128))
        y[i] = int(filename.split("_")[0])
    return x, y

train_x, train_y = read_files("./data/training")
val_x, val_y = read_files("./data/validation")
print("Data Loaded")
print("Size of training data : %d" % len(train_x))
print("Size of validation data : %d" % len(val_x))


"""數據變換(訓練時進行數據增強)"""
train_transform = transforms.Compose([
    transforms.ToPILImage(mode=None), # 將圖片格式轉換成PIL格式
    transforms.RandomHorizontalFlip(p=0.5), # 隨機水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片
    transforms.ToTensor(), # 轉換成torch中的tensor並將值normalize到[0.0,1.0]
])
val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])


"""加載數據"""
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        return X

batch_size = 4
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, val_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)


"""定義模型"""
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [512, 8, 8]

            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size()[0], -1) # torch.nn只支持mini-batches而不支持單個sample,第1個維度是mini-batch中圖片(特徵)的索引,即將每張圖片都展開
        return self.fc(x)


"""訓練並測試模型"""
model = Model() # model = Model().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 30
for epoch in range(epochs):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train()
    for i, data in enumerate(train_loader):
        pred = model(data[0]) # pred = model(data[0].cuda())
        batch_loss = criterion(pred, data[1]) # batch_loss = criterion(pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(pred.numpy(), axis=1) == data[1].numpy())
        # train_acc = np.sum(np.argmax(pred.cpu().numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            pred = model[data[0]] # pred = model(data[0].cuda())
            batch_loss = criterion(pred, data[1])
            val_acc += np.sum(np.argmax(pred.numpy(), axis=1) == data[1].numpy())
            # val_acc += np.sum(np.argmax(pred.cpu().numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()
    
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch+1, epochs, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))


Github(github.com):@chouxianyu

Github Pages(github.io):@臭鹹魚

知乎(zhihu.com):@臭鹹魚

博客園(cnblogs.com):@臭鹹魚

B站(bilibili.com):@絕版臭鹹魚

微信公衆號:@臭鹹魚

轉載請註明出處,歡迎討論和交流!


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章