深度學習之Pytorch

PyTorch 官方文檔:pytorch.org/docs/stable/index.html
PyTorch 中文文檔:github.com/zergtant/pytorch-handbook

小萌新自己的學習記錄,根據學習進度不定期更新,內容可能比較亂,想到哪寫哪,各位大佬勿嫌 ~



2019/08/26 Mon

import torch

# 1. Data preparation: get_data
# 2. Creating learnable parameters: get_weights
# 3. Network model: simple_network
# 4. Loss: loss_fn
# 5. Optimizer: optimize

# ============================ Data Preparation ============================ #

# 1.Scalar(0-D tensors)
# type: FloatTensor or LongTensor
print('------------- Scalar -------------')
x = torch.rand(3)
print(x)              # Output: tensor([0.6788, 0.3105, 0.3672])
print(x.size())       # Output: torch.Size([3])

# 2.Vectors(1-D tensors)
print('------------- Vectors -------------')
temp = torch.FloatTensor([23, 24, 24.5, 27.2, 23.0])
print(temp)           # Output: tensor([23.0000, 24.0000, 24.5000, 27.2000, 23.0000])
print(temp.size())    # Output: torch.Size([5])

# 3.Matrix(2-D tensors)
# convert numpy array into a torch tensor: form_numpy()
from sklearn import datasets
print('------------- Matrix -------------')
boston = datasets.load_boston()
boston_tensor = torch.from_numpy(boston.data)
print(boston_tensor.size())
print(boston_tensor[:2])

# 4.3-D Tensors
# 3-D tensors is used to represent data-like images
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
print('------------- 3-D Tensors -------------')
# read a panda image from disk using a library called PIL and convert it to numpy array
panda = np.array(Image.open('panda.jpg').resize((224,224)))
panda_tensor = torch.from_numpy(panda)
print(panda_tensor.size())
# Display panda
plt.imshow(panda)
# plt.show()

# 5.Slicing tensors
print('------------- Slicing tensors -------------')
# sales[:slice_index], where slice_index represents the index where you want to slice the tensor
sales = torch.FloatTensor([1000.0, 323.2, 333.4, 444.5, 1000.0, 323.2, 333.4, 444.5])
print(sales[:5])
print(sales[:-5])

# show image only one channel
plt.imshow(panda_tensor[:,:,0].numpy())
#plt.show()

# show image with some specific region
plt.imshow(panda_tensor[25:175,60:130,0].numpy())
#plt.show()

# 6.4-D Tensors
print('------------- 4-D Tensors -------------')
# 4-D tensor usually represents a batch of images
import glob
# read cat images from disk
data_path = 'cat/'
cats = glob.glob(data_path + '*.jpg')
# convert images into numpy arrays
cat_imgs = np.array([np.array(Image.open(cat).resize((224,224))) for cat in cats[:3]])
cat_imgs = cat_imgs.reshape(-1,224,224,3)
cat_tensors = torch.from_numpy(cat_imgs)
print(cat_tensors.size())

# 7.5-D Tensors
print('------------- 5-D Tensors -------------')
# 5-D tensor usually represents video data

# ========================================================================== #


# ========================= Tensors on GPU and CPU ========================= #
print('------------- Tensors on GPU and CPU -------------')
# tensor addition(+)
a = torch.rand(2,2)
b = torch.rand(2,2)
c = a + b            # method 1
d = torch.add(a,b)   # method 2
e = a.add_(b)        # method 3(in-place addition)
print('a + b = ', c)
print('torch.add(a,b) = ', d)
print('a.add_(b) = ', e)

# tensor multiply(*)
c = a * b             # method 1
d = a.mul(b)          # method 2
e = a.mul_(b)         # method 3(in-place multiplication)
print('a * b = ', c)
print('a.mul(b) = ', d)
print('a.mul_(b) = ', e)

# tensor matrix multiply(compare on CPU and GPU)
import time
a = torch.rand(10000,10000)
b = torch.rand(10000,10000)
# CPU
tic = time.time()
a.matmul(b)
toc = time.time()
#print('Time taken: ', toc-tic, ' s')
# GPU
a = a.cuda()
b = b.cuda()
tic = time.time()
a.matmul(b)
toc = time.time()
#print('Time taken: ', toc-tic, ' s')

# ========================================================================== #


# ================================ Variable ================================ #
# Variable class components: data, grad, creator
print('------------- Variable -------------')
from torch.autograd import Variable

x = Variable(torch.ones(2,2), requires_grad=True)
y = x.mean()
y.backward()
print('x:', x)
print('x.data: ', x.data)
print('x.grad: ', x.grad)
# grad_fn: 'None' for user created, function reference for other
print('x.grad_fn', x.grad_fn)
print('y.grad_fn', y.grad_fn)   # MeanBackward

# ========================================================================== #


# ============================== Neural network ============================ #
# Creating data for neural network(fixed parameters x,y)
def get_data():
    train_X = np.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167,7.042,
                          10.791,5.313,7.997,5.654,9.27,3.1])
    train_Y = np.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221,2.827,
                          3.465,1.65,2.904,2.42,2.94,1.3])
    dtype = torch.FloatTensor
    x = Variable(torch.from_numpy(train_X).type(dtype), requires_grad=False).view(17,1)
    y = Variable(torch.from_numpy(train_Y).type(dtype), requires_grad=False)
    return x, y

# Creating learnable parameters(learnable parameters w,b)
def get_weights():
    w = Variable(torch.randn(1), requires_grad=True)
    b = Variable(torch.randn(1), requires_grad=True)
    return w, b


# Network implementation
def simple_network():
    y_pred = torch.matmul(x,w) + b
    # Much simpler
    # f = nn.Linear(17,1)
    return y_pred

# Loss function
def loss_fn(y, y_pred):
    # sum of squared error(SSE) for regression problem
    loss = (y_pred-y).pow(2).sum()
    for param in [w,b]:
        if not param.grad is None: param.grad.data.zero_()
    loss.backward()
    return loss.data[0]

# Optimize the neural network
def optimize(learning_rate):
    w.data -= learning_rate * w.grad.data
    b.data -= learning_rate * b.grad.data

# Dataset class
# two important function: __len__(self) and __getitem__(self, idx)
from torch.utils.data import Dataset
class DogsAndCatsDataset(Dataset):
    def __init__(self,):
        pass # init do any initialization
    def __len__(self):
        pass # len return the maximum number of elements in dataset
    def __getitem__(self, idx):
        pass # getitem return an element based on the idx every time it is called

class DogsAndCatsDataset(Dataset):
    def __init__(self, root_dir, size=(224,224)):
        self.files = glob.glob(root_dir)
        self.size = size
    def __len__(self):
        return len(self.files)
    def __getitem__(self, idx):
        img = np.asarray(Image.open(self.files[idx]).resuze(self.size))
        label = self.files[idx].split('/')[-2]
        return img, label

# DataLoader class
from torch.utils.data import DataLoader
dataloader = DataLoader(dogsdset, batch_size=32, num_workers=2)
for imgs, labels in dataloader:
    # apply your DL on the dataset
    pass
    # imgs contain a tensor of shape (batch_size, height, weight, channels)
    
# ========================================================================== #

Output:

------------- Scalar -------------
tensor([0.8906, 0.5367, 0.2124])
torch.Size([3])
------------- Vectors -------------
tensor([23.0000, 24.0000, 24.5000, 27.2000, 23.0000])
torch.Size([5])
------------- Matrix -------------
torch.Size([506, 13])
tensor([[6.3200e-03, 1.8000e+01, 2.3100e+00, 0.0000e+00, 5.3800e-01, 6.5750e+00,
         6.5200e+01, 4.0900e+00, 1.0000e+00, 2.9600e+02, 1.5300e+01, 3.9690e+02,
         4.9800e+00],
        [2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01, 6.4210e+00,
         7.8900e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02, 1.7800e+01, 3.9690e+02,
         9.1400e+00]], dtype=torch.float64)
------------- 3-D Tensors -------------
torch.Size([224, 224, 3])
QXcbConnection: Failed to initialize XRandr
------------- Slicing tensors -------------
tensor([1000.0000,  323.2000,  333.4000,  444.5000, 1000.0000])
tensor([1000.0000,  323.2000,  333.4000])
------------- 4-D Tensors -------------
torch.Size([3, 224, 224, 3])
------------- 5-D Tensors -------------
------------- Tensors on GPU and CPU -------------
a + b =  tensor([[0.8486, 1.1625],
        [0.7530, 0.2172]])
torch.add(a,b) =  tensor([[0.8486, 1.1625],
        [0.7530, 0.2172]])
a.add_(b) =  tensor([[0.8486, 1.1625],
        [0.7530, 0.2172]])
a * b =  tensor([[3.8537e-01, 1.1534e+00],
        [3.8019e-02, 9.6186e-04]])
a.mul(b) =  tensor([[3.8537e-01, 1.1534e+00],
        [3.8019e-02, 9.6186e-04]])
a.mul_(b) =  tensor([[3.8537e-01, 1.1534e+00],
        [3.8019e-02, 9.6186e-04]])
------------- Variable -------------
x: tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
x.data:  tensor([[1., 1.],
        [1., 1.]])
x.grad:  tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])
x.grad_fn None
y.grad_fn <MeanBackward0 object at 0x7f1723d3ac18>

2019/09/07 Sat

【1】在訓練前如何設置GPU
【2】加載預訓練模型
【3】網絡定義及常用模塊
【4】nn.Sequential() 模塊
【5】nn.ModuleList() 模塊

【1】設置 GPU:

# Set gpu_id to -1 to run in CPU mode, otherwise set the id of the corresponding gpu
gpu_id = 1
device = torch.device("cuda:"+str(gpu_id) if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print('Using GPU: {} '.format(gpu_id))

# Network definition
net = ...
# 將網絡和數據都放到 GPU 上
net.to(device)

【1】torch.device 代表將 torch.Tensor 分配到的設備對象,可以通過字符串或字符串+設備序號來進行實現:

# 通過字符串
>>> torch.device('cuda:0')
device(type='cuda', index=0)

>>> torch.device('cpu')
device(type='cpu')

>>> torch.device('cuda')  # current cuda device
device(type='cuda')

# 通過字符串和設備序號
>>> torch.device('cuda', 0)
device(type='cuda', index=0)

>>> torch.device('cpu', 0)
device(type='cpu', index=0)

【2】torch.cuda.is_available() 用於驗證 pytorch 是否能正確地使用 GPU 加速運算,只要安裝沒問題,其返回值就是 True:

>>> torch.cuda.is_available()
True

【2】 加載模型:

# cpu->cpu
checkpoint = torch.load('model.pth')
model.load_state_dict(checkpoint)

# cpu->gpu
torch.load('model.pth', map_location=lambda storage, loc: storage.cuda(1))

# gpu1->gpu0
torch.load('model.pth', map_location={'cuda:1':'cuda:0'})

# gpu->cpu
torch.load('model.pth', map_location=lambda storage, loc: storage))

【3】定義網絡:

參考:
【1】nn.Module模塊
【2】PyTorch中的nn.Conv1d與nn.Conv2d

torch.nn 是專門爲神經網絡設計的模塊化接口。nn 構建於 autograd 之上,可以用來定義和運行神經網絡。nn.Modulenn 中十分重要的類,包含網絡各層的定義及 forward 方法。

在定義自己的網絡時,需要繼承 nn.Module 類,並實現 forward 方法。一般把網絡中具有可學習參數的層放在構造函數 __init__() 中,不具有可學習參數的層(如ReLU)既可放在構造函數中,也可不放在構造函數中(在forward中使用nn.functional來代替)。只要在 nn.Module 的子類中定義了 forward 函數,backward 函數就會被自動實現(利用Autograd)

class LeNet(nn.Module):
    def __init__(self):
        # nn.Module的子函數必須在構造函數中繼承父類的構造函數
        # 這句是定義網絡時要寫的標準語句
        # 等價於 nn.Module.__init__()
        super(LeNet, self).__init__()   
 
        # nn.Conv2d返回的是一個Conv2d class的一個對象,該類中包含forward函數的實現
        # 當調用self.conv1(input)的時候,就會調用該類的forward函數
        # output (N, C_{out}, H_{out}, W_{out})
        self.conv1 = nn.Conv2d(1, 6, (5, 5))   
        self.conv2 = nn.Conv2d(6, 16, (5, 5))
        self.fc1 = nn.Linear(256, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
    	# F.max_pool2d的返回值是一個 Variable
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))  
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
 
        # 返回值也是一個Variable對象
        return x

def output_name_and_params(net):
    for name, parameters in net.named_parameters():
        print('name: {}, param: {}'.format(name, parameters))
 
if __name__ == '__main__':
    net = LeNet()
    print('net: {}'.format(net))
    params = net.parameters()   # generator object
    print('params: {}'.format(params))
    output_name_and_params(net)
 
    input_image = torch.FloatTensor(10, 1, 28, 28)
 
    # 與tensorflow不一樣,pytorch中模型的輸入是一個Variable,而且是Variable在圖中流動,不是Tensor。
    # 這可以從forward中每一步的執行結果可以看出
    input_image = Variable(input_image)
 
    output = net(input_image)
    print('output: {}'.format(output))
    print('output.size: {}'.format(output.size()))

(1)nn.Conv2d2d 就是二維,用於對圖像數據的卷積操作,其基本定義爲:

class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)

參考官方文檔:conv2d

輸入參數分別爲:in_channels 輸入通道數,out_channels 輸出通道數,kernel_size 卷積核大小,stride 卷積步長(默認爲1),padding 填充的圈數(zero-padding,默認不填充),dilation 帶孔卷積的擴散率(默認爲1,即普通的卷積),groups 分組卷積(默認爲1,即不分組),bias 是否加偏置項(默認True)。

其中,輸入、輸出通道數卷積核大小是必須設置的,也就是前三項,而後面的參數均有默認值,如果不設置的話就使用默認值啦。

假設現在有大小爲 32 x 32 的圖片樣本,輸入樣本的 channels = 1,該圖片可能屬於 10 個類中的某一類,網絡結構使用 [conv + relu + pooling] * 2 + FC * 3,那麼 CNN 框架定義如下:

class CNN(nn.Module):
    def __init__(self):
        nn.Model.__init__(self)
        
 		# 輸入通道數=1,輸出通道數=6,卷積核大小=5
        self.conv1 = nn.Conv2d(1, 6, 5)  
        # 輸入通道數=6,輸出通道數=16,卷積核大小=5
        self.conv2 = nn.Conv2d(6, 16, 5)  
        self.fc1 = nn.Linear(5 * 5 * 16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
		## 由於relu和maxpooling都沒有可學習的參數,故可以不在init中定義

    def forward(self,x):
        # 輸入x -> conv1 -> relu -> 2x2 maxpooling
        x = self.conv1(x) # stride默認爲1
        x = F.relu(x)
        x = F.max_pool2d(x, 2) # kernel=2
        # 輸入x -> conv2 -> relu -> 2x2窗口的最大池化
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        # view函數將張量x變形成一維向量形式,總特徵數不變,爲全連接層做準備
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

注意: 在 PyTorch 中,池化操作默認的 stride 大小與卷積核的大小一致。

(2)nn.BatchNorm2d 常用於卷積網絡中防止梯度消失或爆炸,其基本定義爲:

nn.BatchNorm2d(num_features, eps=1e-05,momentum=0.1,affine=True)

輸入參數分別爲:num_features 輸入通道數;eps 用於保持數據穩定性的一個參數,加在分母上,默認爲 1e-5;momentum 用於 running_mean 和 running_var 的計算,默認爲 0.1;affine 若爲 True,則網絡包含該可學習參數

# with learnable parameters
m = nn.BatchNorm2d(100)
# without learnable parameters
m = nn.BatchNorm2d(100, affine=False)

(3)nn.ReLU 基本定義如下:

nn.ReLU(inplace=True)

參數 inplace 默認爲 True, 當設爲 True 時,會改變輸入的數據。其實用不同 inplace 對計算結果沒有影響,利用它計算可以節省內(顯)存,同時還可以省去反覆申請和釋放內存的時間。但是會對原變量覆蓋,只要不帶來錯誤就用。

import torch
import torch.nn as nn

out = nn.ReLU(inplace=True)
input = torch.randn(5)

print("input:")
print(input)

output = out(input)

print("ReLU output:")
print(output)

print("input:")
print(input)

>>>
input:
tensor([-0.2954, -0.2941,  0.2327, -0.8194, -0.7024])
ReLU output:
tensor([0.0000, 0.0000, 0.2327, 0.0000, 0.0000])
input:
tensor([0.0000, 0.0000, 0.2327, 0.0000, 0.0000])

(4)nn.MaxPool2d 基本定義如下:

nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)

kernel_size 爲池化的窗口大小,stride 爲池化窗口的移動步長,默認等於池化窗口大小,padding 爲填充圈數(zero-padding),dilation 和帶孔卷積有關,但是池化並沒有可學習參數,return_indices 如果等於 True,會返回輸出最大值的序號,這樣對上採樣操作有幫助,ceil_mode 如果等於 True,則在計算輸出信號大小時會使用向上取整操作,默認的 False 是向下取整

(5)在定義網絡時,還可以在類中定義一些私有方法用來模塊化一些操作,比如在 ResNet 中定義了 _make_layer 來構建ResNet網絡中的4個blocks。

輸入參數:block 用於選擇 BasicBlock 還是 Bottleneck 類,planes 是當前 block 的輸出通道數,blocks 是每個 block 中包含多少個卷積層,它是一個列表,比如在 ResNet101 中定義:

model = ResNet(Bottleneck, [3, 4, 23, 3], n_classes, nInputChannels=nInputChannels,
                   classifier=classifier, dilations=dilations, strides=strides, _print=True)

這裏的 [3, 4, 23, 3] 傳給 ResNet 的 layers 參數,然後用 _make_layer 創建 block:

self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=strides[2])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=strides[3], dilation__=dilations[0])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=strides[4], dilation__=dilations[1])

上面的 layers 參數值傳給 _make_layerblocks,生成每個 block 的卷積層關鍵代碼就在 for 循環中

def _make_layer(self, block, planes, blocks, stride=1, dilation__=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion or dilation__ == 2 or dilation__ == 4:
        downsample = nn.Sequential(
            nn.Conv2d(self.inplanes, planes * block.expansion,
                      kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(planes * block.expansion, affine=affine_par),
        )
    for i in downsample._modules['1'].parameters():
        i.requires_grad = False
    layers = [block(self.inplanes, planes, stride, dilation_=dilation__, downsample=downsample)]
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
        layers.append(block(self.inplanes, planes, dilation_=dilation__))

    return nn.Sequential(*layers)

_make_layer 會爲每個 block 會創建 layer[*]Bottleneck 模塊,根據 Bottleneck 的定義,其中包含了三個卷積層,每個卷積層後面跟着一個 BN,最後一個卷積後除了 BN 還有 ReLU 和 下采樣。

看一下 ResNet 101 網絡參數:這裏只放了前兩個 Block,分別有 3 個和 4 個 Bottleneck 模塊,可以看到 Bottleneck 發揮的作用,降低參數量 ~

ResNet(
  (conv1): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
    )
    (2): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
    )
  )
  (layer2): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
    )
    (2): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
    )
    (3): Bottleneck(
      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
    )
  )

在上面 ResNet101 的網絡參數定義中,看到一個叫 Sequential() 的東西,裏面包含了一個 conv 和 norm 層,很神奇。

【4】nn.Sequential() 模塊

查到它的定義是這樣的:Sequential 是一個有序的容器,神經網絡模塊將按照傳入該容器的順序依次被添加到計算圖中執行,同時以神經網絡模塊爲元素的有序字典也可以作爲傳入參數。

啊,說人話就是在 Sequential 裏可以聲明好多層,聲明的順序就是最終神經網絡參數傳遞的順序,如果把每個 layer 看作一個有特定工作的工人,那就可以把 Sequential 看作是一個工廠,把工人們按流水線的順序安排在工廠裏就可以了,這樣做算是一種簡化方式吧。

使用 nn.Sequential(),必須確保前一層的輸出大小與下一層的輸入大小相匹配,使用該模塊有幾種方式:

# 方法一:先定義對象,再使用 add_module 添加層
model = nn.Sequential()
model.add_module('conv', nn.Conv2d(3, 3, 3))
model.add_module('batchnorm', nn.BatchNorm2d(3))
model.add_module('activation_layer', nn.ReLU())

# 方法二:直接定義
model = nn.Sequential(
          nn.Conv2d(1,20,5),
          nn.ReLU(),
          nn.Conv2d(20,64,5),
          nn.ReLU()
        )

# 方法三:結合 OrderedDict 食用
from collections import OrderedDict
model = nn.Sequential(OrderedDict([
          ('conv1', nn.Conv2d(1,20,5)),
          ('relu1', nn.ReLU()),
          ('conv2', nn.Conv2d(20,64,5)),
          ('relu2', nn.ReLU())
        ]))

# 實例
class Net(nn.Module):
    def __init__(self, inplanes, n_hidden_1, n_hidden_2, planes):
    
        super().__init__()
      	self.layer = nn.Sequential(
            nn.Linear(in_dim, n_hidden_1), 
            nn.ReLU(True),
            nn.Linear(n_hidden_1, n_hidden_2),
            nn.ReLU(True),
            nn.Linear(n_hidden_2, out_dim)
             )

  	def forward(self, x):
      	x = self.layer(x)
      	return x

# 查看模型直接輸出即可
print('model:', model)

【5】nn.ModuleList() 模塊

nn.ModuleList 用來存儲任意數量的 nn. module

當添加 nn.ModuleList 作爲 nn.Module 對象的一個成員時(即當我們添加模塊到我們的網絡時),所有 nn.ModuleList 內部的 nn.Module 的 parameter 也被添加作爲網絡的 parameter。

class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])
 
    def forward(self, x):
        # ModuleList can act as an iterable, or be indexed         using ints
        for i, l in enumerate(self.linears):
            x = self.linears[i // 2](x) + l(x)
        return x

定義了 nn.ModuleList 對象後,可以使用 extend 添加另一個 modulelist,或使用 append 向當前 modulelist 添加另一個 module

class LinearNet(nn.Module):
  def __init__(self, input_size, num_layers, layers_size, output_size):
     super(LinearNet, self).__init__()
 
     self.linears = nn.ModuleList([nn.Linear(input_size, layers_size)])
     self.linears.extend([nn.Linear(layers_size, layers_size) for i in range(1, self.num_layers-1)])
     self.linears.append(nn.Linear(layers_size, output_size)
  def forward()
  	pass

nn.Sequential 不同的是,nn.ModuleList 沒有自動 forward 功能,所以需要自己定義。


2019/09/08 Sun

【1】torchvision.transforms
【2】__init__ 和 __call__

【1】torchvision.transforms

torchvision.transforms 是 PyTorch 中的圖像預處理包,一般會用 transforms.Compose 將多個處理步驟整合到一起,比如:

from torchvision import transforms
composed_transforms_tr = transforms.Compose([

        transforms.CenterCrop(10),
        transforms.ToTensor()
])

其他預處理函數:

Resize:把給定的圖片resize到指定大小
Normalize:對圖像進行標準化
ToTensor:將像素值在範圍[0,255]內的圖像轉換爲範圍在[0.0,1.0]的torch.Tensor
ToPILImage:將tensor轉換爲PIL圖像
CenteCrop:在圖片的中間區域進行裁剪
RandomCrop:在一個隨機的位置進行裁剪
RandomHorizontalFlip:以0.5的概率水平翻轉給定的PIL圖像
RandomVerticalFlip:以0.5的概率豎直翻轉給定的PIL圖像
RandomResizedCrop:將PIL圖像裁剪成任意大小和縱橫比
Grayscale:將圖像轉換爲灰度圖像
RandomGrayscale:將圖像以一定的概率轉換爲灰度圖像
FiceCrop:把圖像裁剪爲四個角和一箇中心
ColorJitter:隨機改變圖像的亮度對比度和飽和度

【2】__init__ 和 __call__

__init__ 類的初始化函數,__call__ 使類具有類似於函數的功能。

class Cat():
    def __init__(self, name, init_age):
        super().__init__
        self.name = name
        self.age = init_age
        print("{} is playing".format(self.name))
        print("{} is {} year-old".format(self.name, self.age))
    def __call__(self, add_age):
        cur_age = self.age + add_age
        print("Now {} is {} year-old".format(self.name, cur_age))

cat = Cat('kamiya', 2)
cat(1)

>>>
kamiya is playing
kamiya is 2 year-old
Now kamiya is 3 year-old
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章