利用PyTorch自定義數據集實現貓狗分類

看了許多關於PyTorch的入門文章,大抵是從torchvision.datasets中自帶的數據集進行訓練,導致很難把PyTorch運用於自己的數據集上,真正地靈活運用PyTorch。

這裏我採用從Kaggle上下載的貓狗數據集,利用自定義數據集訓練自己的二分類神經網絡。

解壓後,一個文件裏面有12500張圖,貓狗各一半,文件名類似於這樣:cat.0.jpg、dog.12499.jpg

因爲只是練手,所以不用這麼大的,僅僅採用子數據集。

利用Python的os庫,將數據集進行拆分。分爲train與test兩個文件架,每個裏面都有cats和dogs兩個文檔。train裏面每種動物有1000張圖,test裏面每種動物有500張圖。圖片大概是這個樣子(大小不一):

接下里開始編碼:

# 導入庫
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

# 設置超參數
BATCH_SIZE = 50
EPOCHS = 30
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 數據預處理
transform = transforms.Compose([
    transforms.RandomResizedCrop(150),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# 讀取數據
root = 'Cats_Dogs'
dataset_train = datasets.ImageFolder(root + '\\train', transform)
dataset_test = datasets.ImageFolder(root + '\\test', transform)

# 導入數據
train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True)

# 定義網絡
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.max_pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.max_pool2 = nn.MaxPool2d(2)
        self.conv3 = nn.Conv2d(64, 128, 3)
        self.max_pool3 = nn.MaxPool2d(2)
        self.conv4 = nn.Conv2d(128, 128, 3)
        self.max_pool4 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(6272, 512)
        self.fc2 = nn.Linear(512, 1)
        
    def forward(self, x):
        in_size = x.size(0)
        x = self.conv1(x)
        x = F.relu(x)
        x = self.max_pool1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.max_pool2(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.max_pool3(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = self.max_pool4(x)
        # 展開
        x = x.view(in_size, -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = torch.sigmoid(x)
        return x

# 實例化模型並且移動到GPU
model = ConvNet().to(DEVICE)
# 選擇簡單暴力的Adam優化器,學習率調低
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# 定義訓練過程
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device).float().reshape(50, 1)
        optimizer.zero_grad()
        output = model(data)
        # print(output)
        loss = F.binary_cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if(batch_idx+1)%10 == 0: 
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx+1) * len(data), len(train_loader.dataset),
                100. * (batch_idx+1) / len(train_loader), loss.item()))

# 定義測試過程
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device).float().reshape(50, 1)
            output = model(data)
            # print(output)
            test_loss += F.binary_cross_entropy(output, target, reduction='sum').item() # 將一批的損失相加
            pred = torch.tensor([[1] if num[0] >= 0.5 else [0] for num in output]).to(device)
            correct += pred.eq(target.long()).sum().item()
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

# 訓練
    for epoch in range(1, EPOCHS + 1):
        train(model, DEVICE, train_loader, optimizer, epoch)
        test(model, DEVICE, test_loader)

在2000張圖片上,我們利用小的深度神經網絡,訓練出了一個正確率爲72%的分類器。雖然結果不太理想,但是在沒有進行任何防止過擬合操作的情況下,還算是過得去的成績。如果添加Dropout或者正則化、數據增強的話,相信結果會有不錯的提升。而我們使用的數據纔是整個數據集的很小一部分而已。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章