pytorch 入門筆記

1.數據加載

自定義dataset

import os
import os.path as osp

from torch.utils.data import Dataset


class DogCatDataset(Dataset):

    def __init__(self, root_path, is_train):
        self.is_train = is_train
        self.train_set_path = osp.join(root_path, 'train_set')
        self.test_set_path = osp.join(root_path, 'test_set')
        self.train_data = os.listdir(self.train_set_path)
        self.test_data = os.listdir(self.test_set_path)

    def __len__(self):
        return len(self.train_data) if self.is_train else len(self.test_data)

    def __getitem__(self, idx):
        if self.is_train:
            return osp.join(self.train_set_path, self.train_data[idx])
        else:
            return osp.join(self.test_set_path, self.test_data[idx])

這種方式無法實現batch、shuffle數據或通過多線程讀取,所以還可以通過DataLoader定義迭代器

from torch.utils.data import DataLoader

dataset = DogCatDataset('../data', is_train=True)
dataiter = DataLoader(dataset, batch_size=32, shuffle=True)

對於圖片數據可以使用torchversion.dataset下的ImageFolder

from torchvision.datasets import ImageFolder
# transform 和 target_transform 用於數據增強
ImageFolder(root='../data/train_set',transform=None, target_transform=None)

2. nn 模組  Module(網絡)

pytorch中定義計算圖,層結構和損失函數均來着torch.nn模組

代碼段 小部件

3. 一個完整的LinearRegression示例

from abc import ABC

import numpy as np
from torch.autograd import Variable
from torch.nn import Module
from torch.nn import Linear
import torch
from torch.nn import MSELoss
import torch.optim as optim
import matplotlib.pyplot as plt


class LinearRegression(Module, ABC):
    def __init__(self):
        super(LinearRegression, self).__init__()
        self.linear = Linear(1, 1)  # input output is 1 dim

    def forward(self, x):
        out = self.linear(x)
        return out


x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
                    [9.779], [6.182], [7.59], [2.167], [7.042],
                    [10.791], [5.313], [7.997], [3.1]], dtype=np.float32)

y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
                    [3.366], [2.596], [2.53], [1.221], [2.827],
                    [3.465], [1.65], [2.904], [1.3]], dtype=np.float32)

x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train)

if torch.cuda.is_available():
    model = LinearRegression().cuda()
else:
    model = LinearRegression()

criterion = MSELoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)

num_epochs = 1000

for epoch in range(num_epochs):
    if torch.cuda.is_available():
        inputs = Variable(x_train).cuda()
        targets = Variable(y_train).cuda()
    else:
        inputs = Variable(x_train)
        targets = Variable(y_train)

    # forward
    out = model(inputs)
    loss = criterion(out, targets)

    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 20 == 0:
        print('Epoch[{}/{}], loss:{:6f}'.format(epoch + 1, num_epochs, loss.data.item()))

# evaluation
# 將模型轉變爲測試模式 因爲有些層 Dropout BN 等 訓練和預測時的操作不同
model.eval()
predict = model(Variable(x_train))
predict = predict.data.numpy()
plt.plot(x_train.numpy(), y_train.numpy(), 'ro', label='original data')
plt.plot(x_train.numpy(), predict, label='fitting line')
plt.show()

注意:在模型預測前,需要將模型轉換爲eval模型

4. 一個簡化的 VGG 示例

import sys
from abc import ABC

from torch.utils.data import DataLoader

sys.path.append('..')

import numpy as np
import torch
from torch import nn
from torchvision.datasets import CIFAR10
from torch.nn import Module
from torch.nn import Sequential, Conv2d, MaxPool2d, ReLU, Linear
from d_cnn.utils import train


class VggLite(Module, ABC):

    def __init__(self):
        super(VggLite, self).__init__()
        self.feature = Sequential(
            Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(True),
            MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1)),

            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(True),
            MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1)),

            Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(True),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(True),
            MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1)),

            Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(True),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(True),
            MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1)),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(True),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(True),
            MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
        )

        self.classifier = Sequential(
            Linear(512, 100),
            ReLU(True),
            Linear(100, 10)
        )

    def forward(self, x):
        x = self.feature(x)
        x = x.view(x.shape[0], -1)
        x = self.classifier(x)
        return x


def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5  # 標準化,這個技巧之後會講到
    x = x.transpose((2, 0, 1))  # 將 channel 放到第一維,只是 pytorch 要求的輸入方式
    x = torch.from_numpy(x)
    return x


train_set = CIFAR10('../data', train=True, transform=data_tf, download=True)
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_set = CIFAR10('../data', train=False, transform=data_tf, download=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False)

net = VggLite()
optimizer = torch.optim.SGD(net.parameters(), lr=1e-1)
criterion = nn.CrossEntropyLoss()

train(net, train_data, test_data, 20, optimizer, criterion)

5. 學習率衰減

# 定義一個優化器 單組參數
optimizer = torch.optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-4)
# 多組參數
optim.SGD([{'params': model.base.parameters()},
           {'params': model.classifier.parameters(), 'lr': 1e-3}], 
            lr=1e-2, momentum=0.9)
# 打印優化器的參數
print('learning rate: {}'.format(optimizer.param_groups[0]['lr']))
print('weight decay: {}'.format(optimizer.param_groups[0]['weight_decay']))
learning rate: 0.01
weight decay: 0.0001

所以可以在循環過程中指定優化器的 lr 值來衰減學習率

# 單組參數時 直接更改
optimizer.param_groups[0]['lr'] = 1e-5
# 多組參數時 使用循環更改
for param_group in optimizer.param_groups:
    param_group['lr'] = 1e-5

6. 正則化

L2正則化

# 通過添加 weight_decay 設置L2正則
optimizer = torch.optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-4) 

7.數據增強

from PIL import Image
from torchvision import transforms as tfs

’‘’
常用的數據增強方法如下:
1.對圖片進行一定比例縮放
2.對圖片進行隨機位置的截取
3.對圖片進行隨機的水平和豎直翻轉
4.對圖片進行隨機角度的旋轉
5.對圖片進行亮度、對比度和顏色的隨機變化
‘’‘
im = Image.open('./cat.png')

# 隨機比例縮放
new_im = tfs.Resize((100, 200))(im)

# 隨機裁剪
random_im1 = tfs.RandomCrop(100)(im) # 100 x 100
random_im2 = tfs.RandomCrop((150, 100))(im) # 100 x 100
center_im = tfs.CenterCrop(100)(im) # 中心 100 x 100
# 隨機尺寸裁剪後 ,縮放到定義大小 150 x 150
random_im3 = tfs.RandomSizedCrop(150)(im)

# 隨機翻轉
h_filp = tfs.RandomHorizontalFlip()(im) # 隨機水平翻轉
v_flip = tfs.RandomVerticalFlip()(im)   # 隨機豎直翻轉

# 隨機角度旋轉
rot_im = tfs.RandomRotation(45)(im)

# 亮度、對比度和顏色的變化
# 亮度
bright_im = tfs.ColorJitter(brightness=1)(im) # 隨機從 0 ~ 2 之間亮度變化,1 表示原圖
# 對比度
contrast_im = tfs.ColorJitter(contrast=1)(im) # 隨機從 0 ~ 2 之間對比度變化,1 表示原圖
# 顏色
color_im = tfs.ColorJitter(hue=0.5)(im) # 隨機從 -0.5 ~ 0.5 之間對顏色變化

# 使用 torchvision.transforms.Compose() 組合變換
im_aug = tfs.Compose([
    tfs.Resize(120),
    tfs.RandomHorizontalFlip(),
    tfs.RandomCrop(96),
    tfs.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5)
])

im_aug(im)

8.數據標準化

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章