本文將介紹如何用pytorch實現基於遷移學習的圖像分類,並在貓狗數據集上進行訓練和預測。
數據集下載地址:https://www.kaggle.com/tongpython/cat-and-dog/data#
本文代碼下載地址:https://download.csdn.net/download/u012223913/12273428
1. 數據預處理
數據集中已經把訓練集和測試集分好了,分別在training_set和test_set文件夾中,我們可以用pytorch的torchvision.datasets.ImageFolder
和data.DataLoader
方便的構造數據生成器。
from __future__ import print_function, division
import numpy as np
import os
import copy
import json
import torch
import torchvision
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torch.utils.tensorboard import SummaryWriter
import torch.utils.data as data
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
#設置超參數
batch_size = 64
learning_rate = 0.003
L2reg = 0
epoch = 50
lr_decay_epoch = 7
freeze_layer = 9
class_label = ["cats", "dogs"]
class_num = len(class_label)
TRAIN_DATA_PATH = "G:/tutorial/cat-and-dog/training_set/training_set/"
TEST_DATA_PATH = "G:/tutorial/cat-and-dog/test_set/test_set/"
# dataset preprocess. normalizing using imagenet mean std
transform = transforms.Compose(
[transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
transform_test = transforms.Compose(
[transforms.Resize(224), #注意testset不要randomresize
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
# data generator
train_data = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=transform)
trainloader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=1)
test_data = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=transform_test)
testloader = data.DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=1)
2. 網絡模型
我們可以直接調用pytorch官方提供的mobilenet_v2模型(在imagenet數據集中預訓練過的模型),並且通過freeze_layer參數控制凍結參數的層數,加快訓練速度。
model_ft = models.mobilenet_v2(pretrained=True)
set_parameter_requires_grad(model_ft, False)
num_ftrs = model_ft.last_channel
model_ft.classifier = nn.Linear(num_ftrs, class_num)
res_mod = model_ft
def freeze_model_layer(n):
"""
freeze top n layer
:param n: freeze layer number
:return:
"""
for name, child in model.named_children():
print(name)
# freeze front layer
ct = 0
for child in model.features: # features
ct += 1
if ct < n:
for param in child.parameters():
param.requires_grad = False
if freeze_layer > 0:
freeze_model_layer(freeze_layer)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
3. 選擇優化算法和損失函數
weight_l = np.array([0.7, 0.3])
weight_loss = torch.from_numpy(weight_l).float().to(device)
criterion = nn.CrossEntropyLoss(weight=weight_loss)
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=L2reg)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=lr_decay_epoch, gamma=0.8)
4. 訓練
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
print("start training...")
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
# test before training
print("evaluate before training")
report = evaluate_model(model, ["cats", "dogs"])
with open(os.path.join(res_dir, 'origin_test_result.txt'), 'w') as f:
json.dump(report, f)
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train() # Set model to training mode
dataloader = trainloader
samples_number = train_num
# writer = train_writer
else:
model.eval() # Set model to evaluate mode
dataloader = testloader
samples_number = test_num
# writer = valid_writer
current_loss = 0.0
current_corrects = 0
running_loss = 0.0
# Here's where the training happens
print('Iterating through data...')
for i, data in enumerate(dataloader):
inputs = data[0].to(device)
labels = data[1].to(device)
# We need to zero the gradients, don't forget it
optimizer.zero_grad()
# Time to carry out the forward training poss
# We only need to log the loss stats if we are in training phase
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# every 100 batch print loss info
running_loss += loss.item()
if i % 10 == 9:
print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 10))
running_loss = 0.0
# We want variables to hold the loss statistics
current_loss += loss.item() * inputs.size(0)
current_corrects += (preds == labels).sum().item()
epoch_loss = current_loss / samples_number
epoch_acc = current_corrects / samples_number
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# ...log the running loss
if phase == 'train':
train_writer.add_scalar("loss",
epoch_loss,
epoch)
train_writer.add_scalar("accuracy",
epoch_acc,
epoch)
else:
valid_writer.add_scalar("loss", epoch_loss, epoch)
valid_writer.add_scalar("accuracy",
epoch_acc,
epoch)
# Make a copy of the model if the accuracy on the validation set has improved
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
train_writer.close()
valid_writer.close()
time_since = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_since // 60, time_since % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# Now we'll load in the best model weights and return it
model.load_state_dict(best_model_wts)
torch.save(best_model_wts, model_save_path)
return model
5. 評估
用sklearn包的classification_report快速生成模型的評估報告,包括precision、recall等值。
def evaluate_model(model, label_names):
# Initialize the prediction and label lists(tensors)
predlist = torch.zeros(0, dtype=torch.long, device='cpu')
lbllist = torch.zeros(0, dtype=torch.long, device='cpu')
with torch.no_grad():
for i, (inputs, classes) in enumerate(testloader):
inputs = inputs.to(device)
classes = classes.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
# Append batch prediction results
predlist = torch.cat([predlist, preds.view(-1).cpu()])
lbllist = torch.cat([lbllist, classes.view(-1).cpu()])
report = classification_report(lbllist.numpy(), predlist.numpy(), target_names=label_names,
digits=2)
print(report)
return report
if __name__ == '__main__':
model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=epoch)
report = evaluate_model(model, ["cats", "dogs"])
預測結果:
reference:
- https://stackabuse.com/image-classification-with-transfer-learning-and-pytorch/#visualization
- https://pytorch.org/tutorials/intermediate/tensorboard_tutorial.html#writing-to-tensorboard
- https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html#load-data