PyTorch 官方文檔:pytorch.org/docs/stable/index.html
PyTorch 中文文檔:github.com/zergtant/pytorch-handbook
小萌新自己的學習記錄,根據學習進度不定期更新,內容可能比較亂,想到哪寫哪,各位大佬勿嫌 ~
2019/08/26 Mon
import torch
# 1. Data preparation: get_data
# 2. Creating learnable parameters: get_weights
# 3. Network model: simple_network
# 4. Loss: loss_fn
# 5. Optimizer: optimize
# ============================ Data Preparation ============================ #
# 1.Scalar(0-D tensors)
# type: FloatTensor or LongTensor
print('------------- Scalar -------------')
x = torch.rand(3)
print(x) # Output: tensor([0.6788, 0.3105, 0.3672])
print(x.size()) # Output: torch.Size([3])
# 2.Vectors(1-D tensors)
print('------------- Vectors -------------')
temp = torch.FloatTensor([23, 24, 24.5, 27.2, 23.0])
print(temp) # Output: tensor([23.0000, 24.0000, 24.5000, 27.2000, 23.0000])
print(temp.size()) # Output: torch.Size([5])
# 3.Matrix(2-D tensors)
# convert numpy array into a torch tensor: form_numpy()
from sklearn import datasets
print('------------- Matrix -------------')
boston = datasets.load_boston()
boston_tensor = torch.from_numpy(boston.data)
print(boston_tensor.size())
print(boston_tensor[:2])
# 4.3-D Tensors
# 3-D tensors is used to represent data-like images
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
print('------------- 3-D Tensors -------------')
# read a panda image from disk using a library called PIL and convert it to numpy array
panda = np.array(Image.open('panda.jpg').resize((224,224)))
panda_tensor = torch.from_numpy(panda)
print(panda_tensor.size())
# Display panda
plt.imshow(panda)
# plt.show()
# 5.Slicing tensors
print('------------- Slicing tensors -------------')
# sales[:slice_index], where slice_index represents the index where you want to slice the tensor
sales = torch.FloatTensor([1000.0, 323.2, 333.4, 444.5, 1000.0, 323.2, 333.4, 444.5])
print(sales[:5])
print(sales[:-5])
# show image only one channel
plt.imshow(panda_tensor[:,:,0].numpy())
#plt.show()
# show image with some specific region
plt.imshow(panda_tensor[25:175,60:130,0].numpy())
#plt.show()
# 6.4-D Tensors
print('------------- 4-D Tensors -------------')
# 4-D tensor usually represents a batch of images
import glob
# read cat images from disk
data_path = 'cat/'
cats = glob.glob(data_path + '*.jpg')
# convert images into numpy arrays
cat_imgs = np.array([np.array(Image.open(cat).resize((224,224))) for cat in cats[:3]])
cat_imgs = cat_imgs.reshape(-1,224,224,3)
cat_tensors = torch.from_numpy(cat_imgs)
print(cat_tensors.size())
# 7.5-D Tensors
print('------------- 5-D Tensors -------------')
# 5-D tensor usually represents video data
# ========================================================================== #
# ========================= Tensors on GPU and CPU ========================= #
print('------------- Tensors on GPU and CPU -------------')
# tensor addition(+)
a = torch.rand(2,2)
b = torch.rand(2,2)
c = a + b # method 1
d = torch.add(a,b) # method 2
e = a.add_(b) # method 3(in-place addition)
print('a + b = ', c)
print('torch.add(a,b) = ', d)
print('a.add_(b) = ', e)
# tensor multiply(*)
c = a * b # method 1
d = a.mul(b) # method 2
e = a.mul_(b) # method 3(in-place multiplication)
print('a * b = ', c)
print('a.mul(b) = ', d)
print('a.mul_(b) = ', e)
# tensor matrix multiply(compare on CPU and GPU)
import time
a = torch.rand(10000,10000)
b = torch.rand(10000,10000)
# CPU
tic = time.time()
a.matmul(b)
toc = time.time()
#print('Time taken: ', toc-tic, ' s')
# GPU
a = a.cuda()
b = b.cuda()
tic = time.time()
a.matmul(b)
toc = time.time()
#print('Time taken: ', toc-tic, ' s')
# ========================================================================== #
# ================================ Variable ================================ #
# Variable class components: data, grad, creator
print('------------- Variable -------------')
from torch.autograd import Variable
x = Variable(torch.ones(2,2), requires_grad=True)
y = x.mean()
y.backward()
print('x:', x)
print('x.data: ', x.data)
print('x.grad: ', x.grad)
# grad_fn: 'None' for user created, function reference for other
print('x.grad_fn', x.grad_fn)
print('y.grad_fn', y.grad_fn) # MeanBackward
# ========================================================================== #
# ============================== Neural network ============================ #
# Creating data for neural network(fixed parameters x,y)
def get_data():
train_X = np.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167,7.042,
10.791,5.313,7.997,5.654,9.27,3.1])
train_Y = np.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221,2.827,
3.465,1.65,2.904,2.42,2.94,1.3])
dtype = torch.FloatTensor
x = Variable(torch.from_numpy(train_X).type(dtype), requires_grad=False).view(17,1)
y = Variable(torch.from_numpy(train_Y).type(dtype), requires_grad=False)
return x, y
# Creating learnable parameters(learnable parameters w,b)
def get_weights():
w = Variable(torch.randn(1), requires_grad=True)
b = Variable(torch.randn(1), requires_grad=True)
return w, b
# Network implementation
def simple_network():
y_pred = torch.matmul(x,w) + b
# Much simpler
# f = nn.Linear(17,1)
return y_pred
# Loss function
def loss_fn(y, y_pred):
# sum of squared error(SSE) for regression problem
loss = (y_pred-y).pow(2).sum()
for param in [w,b]:
if not param.grad is None: param.grad.data.zero_()
loss.backward()
return loss.data[0]
# Optimize the neural network
def optimize(learning_rate):
w.data -= learning_rate * w.grad.data
b.data -= learning_rate * b.grad.data
# Dataset class
# two important function: __len__(self) and __getitem__(self, idx)
from torch.utils.data import Dataset
class DogsAndCatsDataset(Dataset):
def __init__(self,):
pass # init do any initialization
def __len__(self):
pass # len return the maximum number of elements in dataset
def __getitem__(self, idx):
pass # getitem return an element based on the idx every time it is called
class DogsAndCatsDataset(Dataset):
def __init__(self, root_dir, size=(224,224)):
self.files = glob.glob(root_dir)
self.size = size
def __len__(self):
return len(self.files)
def __getitem__(self, idx):
img = np.asarray(Image.open(self.files[idx]).resuze(self.size))
label = self.files[idx].split('/')[-2]
return img, label
# DataLoader class
from torch.utils.data import DataLoader
dataloader = DataLoader(dogsdset, batch_size=32, num_workers=2)
for imgs, labels in dataloader:
# apply your DL on the dataset
pass
# imgs contain a tensor of shape (batch_size, height, weight, channels)
# ========================================================================== #
Output:
------------- Scalar -------------
tensor([0.8906, 0.5367, 0.2124])
torch.Size([3])
------------- Vectors -------------
tensor([23.0000, 24.0000, 24.5000, 27.2000, 23.0000])
torch.Size([5])
------------- Matrix -------------
torch.Size([506, 13])
tensor([[6.3200e-03, 1.8000e+01, 2.3100e+00, 0.0000e+00, 5.3800e-01, 6.5750e+00,
6.5200e+01, 4.0900e+00, 1.0000e+00, 2.9600e+02, 1.5300e+01, 3.9690e+02,
4.9800e+00],
[2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01, 6.4210e+00,
7.8900e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02, 1.7800e+01, 3.9690e+02,
9.1400e+00]], dtype=torch.float64)
------------- 3-D Tensors -------------
torch.Size([224, 224, 3])
QXcbConnection: Failed to initialize XRandr
------------- Slicing tensors -------------
tensor([1000.0000, 323.2000, 333.4000, 444.5000, 1000.0000])
tensor([1000.0000, 323.2000, 333.4000])
------------- 4-D Tensors -------------
torch.Size([3, 224, 224, 3])
------------- 5-D Tensors -------------
------------- Tensors on GPU and CPU -------------
a + b = tensor([[0.8486, 1.1625],
[0.7530, 0.2172]])
torch.add(a,b) = tensor([[0.8486, 1.1625],
[0.7530, 0.2172]])
a.add_(b) = tensor([[0.8486, 1.1625],
[0.7530, 0.2172]])
a * b = tensor([[3.8537e-01, 1.1534e+00],
[3.8019e-02, 9.6186e-04]])
a.mul(b) = tensor([[3.8537e-01, 1.1534e+00],
[3.8019e-02, 9.6186e-04]])
a.mul_(b) = tensor([[3.8537e-01, 1.1534e+00],
[3.8019e-02, 9.6186e-04]])
------------- Variable -------------
x: tensor([[1., 1.],
[1., 1.]], requires_grad=True)
x.data: tensor([[1., 1.],
[1., 1.]])
x.grad: tensor([[0.2500, 0.2500],
[0.2500, 0.2500]])
x.grad_fn None
y.grad_fn <MeanBackward0 object at 0x7f1723d3ac18>
2019/09/07 Sat
【1】在訓練前如何設置GPU
【2】加載預訓練模型
【3】網絡定義及常用模塊
【4】nn.Sequential() 模塊
【5】nn.ModuleList() 模塊
【1】設置 GPU:
# Set gpu_id to -1 to run in CPU mode, otherwise set the id of the corresponding gpu
gpu_id = 1
device = torch.device("cuda:"+str(gpu_id) if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
print('Using GPU: {} '.format(gpu_id))
# Network definition
net = ...
# 將網絡和數據都放到 GPU 上
net.to(device)
【1】torch.device
代表將 torch.Tensor
分配到的設備對象,可以通過字符串或字符串+設備序號來進行實現:
# 通過字符串
>>> torch.device('cuda:0')
device(type='cuda', index=0)
>>> torch.device('cpu')
device(type='cpu')
>>> torch.device('cuda') # current cuda device
device(type='cuda')
# 通過字符串和設備序號
>>> torch.device('cuda', 0)
device(type='cuda', index=0)
>>> torch.device('cpu', 0)
device(type='cpu', index=0)
【2】torch.cuda.is_available()
用於驗證 pytorch 是否能正確地使用 GPU 加速運算,只要安裝沒問題,其返回值就是 True:
>>> torch.cuda.is_available()
True
【2】 加載模型:
# cpu->cpu
checkpoint = torch.load('model.pth')
model.load_state_dict(checkpoint)
# cpu->gpu
torch.load('model.pth', map_location=lambda storage, loc: storage.cuda(1))
# gpu1->gpu0
torch.load('model.pth', map_location={'cuda:1':'cuda:0'})
# gpu->cpu
torch.load('model.pth', map_location=lambda storage, loc: storage))
【3】定義網絡:
參考:
【1】nn.Module模塊
【2】PyTorch中的nn.Conv1d與nn.Conv2d
torch.nn
是專門爲神經網絡設計的模塊化接口。nn
構建於 autograd
之上,可以用來定義和運行神經網絡。nn.Module
是 nn
中十分重要的類,包含網絡各層的定義及 forward 方法。
在定義自己的網絡時,需要繼承 nn.Module
類,並實現 forward
方法。一般把網絡中具有可學習參數的層放在構造函數 __init__()
中,不具有可學習參數的層(如ReLU)既可放在構造函數中,也可不放在構造函數中(在forward中使用nn.functional來代替)。只要在 nn.Module
的子類中定義了 forward
函數,backward
函數就會被自動實現(利用Autograd)
class LeNet(nn.Module):
def __init__(self):
# nn.Module的子函數必須在構造函數中繼承父類的構造函數
# 這句是定義網絡時要寫的標準語句
# 等價於 nn.Module.__init__()
super(LeNet, self).__init__()
# nn.Conv2d返回的是一個Conv2d class的一個對象,該類中包含forward函數的實現
# 當調用self.conv1(input)的時候,就會調用該類的forward函數
# output (N, C_{out}, H_{out}, W_{out})
self.conv1 = nn.Conv2d(1, 6, (5, 5))
self.conv2 = nn.Conv2d(6, 16, (5, 5))
self.fc1 = nn.Linear(256, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# F.max_pool2d的返回值是一個 Variable
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = x.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
# 返回值也是一個Variable對象
return x
def output_name_and_params(net):
for name, parameters in net.named_parameters():
print('name: {}, param: {}'.format(name, parameters))
if __name__ == '__main__':
net = LeNet()
print('net: {}'.format(net))
params = net.parameters() # generator object
print('params: {}'.format(params))
output_name_and_params(net)
input_image = torch.FloatTensor(10, 1, 28, 28)
# 與tensorflow不一樣,pytorch中模型的輸入是一個Variable,而且是Variable在圖中流動,不是Tensor。
# 這可以從forward中每一步的執行結果可以看出
input_image = Variable(input_image)
output = net(input_image)
print('output: {}'.format(output))
print('output.size: {}'.format(output.size()))
(1)nn.Conv2d
中 2d
就是二維,用於對圖像數據的卷積操作,其基本定義爲:
class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
參考官方文檔:conv2d
輸入參數分別爲:in_channels
輸入通道數,out_channels
輸出通道數,kernel_size
卷積核大小,stride
卷積步長(默認爲1),padding
填充的圈數(zero-padding,默認不填充),dilation
帶孔卷積的擴散率(默認爲1,即普通的卷積),groups
分組卷積(默認爲1,即不分組),bias
是否加偏置項(默認True)。
其中,輸入、輸出通道數 和 卷積核大小是必須設置的,也就是前三項,而後面的參數均有默認值,如果不設置的話就使用默認值啦。
假設現在有大小爲 32 x 32
的圖片樣本,輸入樣本的 channels = 1
,該圖片可能屬於 10 個類中的某一類,網絡結構使用 [conv + relu + pooling] * 2 + FC * 3
,那麼 CNN 框架定義如下:
class CNN(nn.Module):
def __init__(self):
nn.Model.__init__(self)
# 輸入通道數=1,輸出通道數=6,卷積核大小=5
self.conv1 = nn.Conv2d(1, 6, 5)
# 輸入通道數=6,輸出通道數=16,卷積核大小=5
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(5 * 5 * 16, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
## 由於relu和maxpooling都沒有可學習的參數,故可以不在init中定義
def forward(self,x):
# 輸入x -> conv1 -> relu -> 2x2 maxpooling
x = self.conv1(x) # stride默認爲1
x = F.relu(x)
x = F.max_pool2d(x, 2) # kernel=2
# 輸入x -> conv2 -> relu -> 2x2窗口的最大池化
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
# view函數將張量x變形成一維向量形式,總特徵數不變,爲全連接層做準備
x = x.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
注意: 在 PyTorch 中,池化操作默認的 stride 大小與卷積核的大小一致。
(2)nn.BatchNorm2d
常用於卷積網絡中防止梯度消失或爆炸,其基本定義爲:
nn.BatchNorm2d(num_features, eps=1e-05,momentum=0.1,affine=True)
輸入參數分別爲:num_features
輸入通道數;eps
用於保持數據穩定性的一個參數,加在分母上,默認爲 1e-5;momentum
用於 running_mean 和 running_var 的計算,默認爲 0.1;affine
若爲 True,則網絡包含該可學習參數
# with learnable parameters
m = nn.BatchNorm2d(100)
# without learnable parameters
m = nn.BatchNorm2d(100, affine=False)
(3)nn.ReLU
基本定義如下:
nn.ReLU(inplace=True)
參數 inplace
默認爲 True, 當設爲 True 時,會改變輸入的數據。其實用不同 inplace 對計算結果沒有影響,利用它計算可以節省內(顯)存,同時還可以省去反覆申請和釋放內存的時間。但是會對原變量覆蓋,只要不帶來錯誤就用。
import torch
import torch.nn as nn
out = nn.ReLU(inplace=True)
input = torch.randn(5)
print("input:")
print(input)
output = out(input)
print("ReLU output:")
print(output)
print("input:")
print(input)
>>>
input:
tensor([-0.2954, -0.2941, 0.2327, -0.8194, -0.7024])
ReLU output:
tensor([0.0000, 0.0000, 0.2327, 0.0000, 0.0000])
input:
tensor([0.0000, 0.0000, 0.2327, 0.0000, 0.0000])
(4)nn.MaxPool2d
基本定義如下:
nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
kernel_size
爲池化的窗口大小,stride
爲池化窗口的移動步長,默認等於池化窗口大小,padding
爲填充圈數(zero-padding),dilation
和帶孔卷積有關,但是池化並沒有可學習參數,return_indices
如果等於 True,會返回輸出最大值的序號,這樣對上採樣操作有幫助,ceil_mode
如果等於 True,則在計算輸出信號大小時會使用向上取整操作,默認的 False 是向下取整
(5)在定義網絡時,還可以在類中定義一些私有方法用來模塊化一些操作,比如在 ResNet 中定義了 _make_layer
來構建ResNet網絡中的4個blocks。
輸入參數:block
用於選擇 BasicBlock 還是 Bottleneck 類,planes
是當前 block 的輸出通道數,blocks
是每個 block 中包含多少個卷積層,它是一個列表,比如在 ResNet101 中定義:
model = ResNet(Bottleneck, [3, 4, 23, 3], n_classes, nInputChannels=nInputChannels,
classifier=classifier, dilations=dilations, strides=strides, _print=True)
這裏的 [3, 4, 23, 3]
傳給 ResNet 的 layers 參數,然後用 _make_layer
創建 block:
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=strides[2])
self.layer3 = self._make_layer(block, 256, layers[2], stride=strides[3], dilation__=dilations[0])
self.layer4 = self._make_layer(block, 512, layers[3], stride=strides[4], dilation__=dilations[1])
上面的 layers
參數值傳給 _make_layer
的 blocks
,生成每個 block 的卷積層關鍵代碼就在 for
循環中
def _make_layer(self, block, planes, blocks, stride=1, dilation__=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion or dilation__ == 2 or dilation__ == 4:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, affine=affine_par),
)
for i in downsample._modules['1'].parameters():
i.requires_grad = False
layers = [block(self.inplanes, planes, stride, dilation_=dilation__, downsample=downsample)]
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, dilation_=dilation__))
return nn.Sequential(*layers)
_make_layer
會爲每個 block 會創建 layer[*]
個 Bottleneck
模塊,根據 Bottleneck
的定義,其中包含了三個卷積層,每個卷積層後面跟着一個 BN,最後一個卷積後除了 BN 還有 ReLU 和 下采樣。
看一下 ResNet 101 網絡參數:這裏只放了前兩個 Block,分別有 3 個和 4 個 Bottleneck 模塊,可以看到 Bottleneck 發揮的作用,降低參數量 ~
ResNet(
(conv1): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
)
(2): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
)
)
(layer2): Sequential(
(0): Bottleneck(
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
)
(2): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
)
(3): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace)
)
)
在上面 ResNet101 的網絡參數定義中,看到一個叫 Sequential()
的東西,裏面包含了一個 conv 和 norm 層,很神奇。
【4】nn.Sequential() 模塊
查到它的定義是這樣的:Sequential 是一個有序的容器,神經網絡模塊將按照傳入該容器的順序依次被添加到計算圖中執行,同時以神經網絡模塊爲元素的有序字典也可以作爲傳入參數。
啊,說人話就是在 Sequential 裏可以聲明好多層,聲明的順序就是最終神經網絡參數傳遞的順序,如果把每個 layer 看作一個有特定工作的工人,那就可以把 Sequential 看作是一個工廠,把工人們按流水線的順序安排在工廠裏就可以了,這樣做算是一種簡化方式吧。
使用 nn.Sequential()
,必須確保前一層的輸出大小與下一層的輸入大小相匹配,使用該模塊有幾種方式:
# 方法一:先定義對象,再使用 add_module 添加層
model = nn.Sequential()
model.add_module('conv', nn.Conv2d(3, 3, 3))
model.add_module('batchnorm', nn.BatchNorm2d(3))
model.add_module('activation_layer', nn.ReLU())
# 方法二:直接定義
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
# 方法三:結合 OrderedDict 食用
from collections import OrderedDict
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))
# 實例
class Net(nn.Module):
def __init__(self, inplanes, n_hidden_1, n_hidden_2, planes):
super().__init__()
self.layer = nn.Sequential(
nn.Linear(in_dim, n_hidden_1),
nn.ReLU(True),
nn.Linear(n_hidden_1, n_hidden_2),
nn.ReLU(True),
nn.Linear(n_hidden_2, out_dim)
)
def forward(self, x):
x = self.layer(x)
return x
# 查看模型直接輸出即可
print('model:', model)
【5】nn.ModuleList() 模塊
nn.ModuleList
用來存儲任意數量的 nn. module
當添加 nn.ModuleList
作爲 nn.Module
對象的一個成員時(即當我們添加模塊到我們的網絡時),所有 nn.ModuleList
內部的 nn.Module 的 parameter 也被添加作爲網絡的 parameter。
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])
def forward(self, x):
# ModuleList can act as an iterable, or be indexed using ints
for i, l in enumerate(self.linears):
x = self.linears[i // 2](x) + l(x)
return x
定義了 nn.ModuleList
對象後,可以使用 extend
添加另一個 modulelist,或使用 append
向當前 modulelist 添加另一個 module
class LinearNet(nn.Module):
def __init__(self, input_size, num_layers, layers_size, output_size):
super(LinearNet, self).__init__()
self.linears = nn.ModuleList([nn.Linear(input_size, layers_size)])
self.linears.extend([nn.Linear(layers_size, layers_size) for i in range(1, self.num_layers-1)])
self.linears.append(nn.Linear(layers_size, output_size)
def forward()
pass
和 nn.Sequential
不同的是,nn.ModuleList
沒有自動 forward 功能,所以需要自己定義。
2019/09/08 Sun
【1】torchvision.transforms
【2】__init__ 和 __call__
【1】torchvision.transforms
torchvision.transforms
是 PyTorch 中的圖像預處理包,一般會用 transforms.Compose
將多個處理步驟整合到一起,比如:
from torchvision import transforms
composed_transforms_tr = transforms.Compose([
transforms.CenterCrop(10),
transforms.ToTensor()
])
其他預處理函數:
Resize:把給定的圖片resize到指定大小
Normalize:對圖像進行標準化
ToTensor:將像素值在範圍[0,255]內的圖像轉換爲範圍在[0.0,1.0]的torch.Tensor
ToPILImage:將tensor轉換爲PIL圖像
CenteCrop:在圖片的中間區域進行裁剪
RandomCrop:在一個隨機的位置進行裁剪
RandomHorizontalFlip:以0.5的概率水平翻轉給定的PIL圖像
RandomVerticalFlip:以0.5的概率豎直翻轉給定的PIL圖像
RandomResizedCrop:將PIL圖像裁剪成任意大小和縱橫比
Grayscale:將圖像轉換爲灰度圖像
RandomGrayscale:將圖像以一定的概率轉換爲灰度圖像
FiceCrop:把圖像裁剪爲四個角和一箇中心
ColorJitter:隨機改變圖像的亮度對比度和飽和度
【2】__init__ 和 __call__
__init__
類的初始化函數,__call__
使類具有類似於函數的功能。
class Cat():
def __init__(self, name, init_age):
super().__init__
self.name = name
self.age = init_age
print("{} is playing".format(self.name))
print("{} is {} year-old".format(self.name, self.age))
def __call__(self, add_age):
cur_age = self.age + add_age
print("Now {} is {} year-old".format(self.name, cur_age))
cat = Cat('kamiya', 2)
cat(1)
>>>
kamiya is playing
kamiya is 2 year-old
Now kamiya is 3 year-old