文章目錄
torch.nn
import torch.nn as nn
參考翻譯 What is torch.nn really?
主要是對 PyTorch 框架的各模塊進行簡要介紹
一定程度上是 PyTorch 的入門筆記
假設已經對神經網絡相關基礎知識有了一定了解
(或實現過機器學習梯度下降相關代碼)
before
PyTorch 使用 torch.tensor
,需要將數據進行轉換
import torch
x_train, y_train, x_valid, y_valid = map(
torch.tensor,
(x_train, y_train, x_valid, y_valid)
)
x_train.shape
x_train.min()
x_train.max()
map(function, iterable, …)
return iterable
nn.functional
import torch.nn.functional as F
包含 torch.nn
庫中所有函數
同時包含大量 loss 和 activation function
import torch.nn.functional as F
loss_func = F.cross_entropy
loss = loss_func(model(x), y)
loss.backward()
其中 loss.backward()
更新模型的梯度,包括 weights 和 bias
PyTorch 中,nn 與 nn.functional 有什麼區別?
nn.functional.xxx
是函數接口,nn.Xxx
是.nn.functional.xxx
的類封裝,並且nn.Xxx
都繼承於一個共同祖先nn.Module
nn.Xxx
除了具有nn.functional.xxx
功能之外,內部附帶nn.Module
相關的屬性和方法,eg.train()
,eval()
,load_state_dict
,state_dict
- 兩者的調用方式不同
nn.Xxx
,實例化 -> 函數調用 -> 傳入數據
inputs = torch.rand(64, 3, 28, 28)
conv = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
out = conv(inputs)
nn.functional.xxx
傳入數據 和 weight、bias 等其他參數
weight = torch.rand(64, 3, 3, 3)
bias = torch.rand(64)
out = nn.functional.conv2d(inputs, weight, bias, padding=1)
- 能否和
nn.Sequential
結合使用
nn.Xxx
繼承於 nn.Module
,能夠很好的與 nn.Sequential
結合使用
fm_layer = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(num_features=64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Droput(0.2)
)
而 nn.functional.xxx
無法與 nn.Sequential
結合使用
- 是否需要自己定義和管理 weight 和 bias 等參數
nn.Xxx
不需要自己定義和管理weight
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=16, padding=0)
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=0)
self.relu2 = nn.ReLU()
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.linear1 = nn.Linear(4 * 4 * 32, 10)
def forward(self, x):
x = x.view(x.size(0), -1)
out = self.maxpool1(self.relu1(self.cnn1(x)))
out = self.maxpool2(self.relu2(self.cnn2(x)))
out = self.linear1(out.view(x.size(0), -1))
return out
nn.functional.xxx
需要自己定義 weight,每次調用的時候都需要手動傳入 weight,不利於代碼複用
class CNN(nn.Module):
"""docstring for CNN"""
def __init__(self):
super(CNN, self).__init__()
self.cnn1_weight = nn.Parameter(torch.rand(16, 1, 5, 5))
self.bias1_weight = nn.Parameter((torch.rand(16)))
self.cnn2_weight = nn.Parameter(torch.rand(32, 16, 5, 5))
self.bias2_weight = nn.Parameter(torch.rand(32))
self.linear1_weight = nn.Parameter(torch.rand(4 * 4 * 32, 10))
self.bias3_weight = nn.Parameter(torch.rand(10))
def forward(self, x):
x = x.view(x.size(0), -1)
out = F.conv2d(x, self.cnn1_weight, self.bias1_weight)
out = F.relu(out)
out = F.max_pool2d(out)
out = F.conv2d(out, self.cnn2_weight, self.bias2_weight)
out = F.relu(out)
out = F.max_pool2d(out)
out = F.linear(out, self.linear1_weight, self.bias3_weight)
上述兩中定義方式得到的 CNN 功能都是相同的
PyTorch 官方推薦:
- 具有學習參數的(eg. conv2d, linear, batch_norm) 採用
nn.Xxx
- 沒有學習參數的(eg. maxpool, loss_func, activation func) 等根據個人選擇使用
nn.functional.xxx
或nn.Xxx
- 最後,關於 dropout,強烈推薦使用
nn.Xxx
方式,因爲一般情況下只有訓練階段才進行 dropout,在 eval 階段不會進行 dropout。使用nn.Xxx
方法定義 dropout,在調用model.eval()
之後,model 中所有的 dropout layer 都關閉,但以nn.functional.dropout
方式定義 dropout,在調用model.eval()
之後並不能關閉 dropout。需要使用F.dropout(x, trainig=self.training
。
nn.Module & nn.Parameter
繼承 nn.Module
,構造一個保存 weights,bias 和具有前向傳播方法(forward step)的類
nn.Module
有大量屬性和方法(eg. .parameters()
和 .zero_grad()
)
nn.Linear
torch.optim
torch.optim
有各種優化算法,可以使用優化器的 step
來進行前向傳播,而不用人工的更新所有參數
opt.step()
opt.zero_grad()
optim.zero_grad()
將所有的梯度置爲 0,需要在下個批次計算梯度之前調用
DataLoader
TensorDataset
是 Dataset 的 tensor 包裝
from torch.utils.data import TensorDataset
train_ds = TensorDataset(x_train, y_train)
DataLoader
用於管理 batches,便於迭代
from torch.utils.data import DataLoader
train_dl = DataLoader(train_ds, batch_size=32)
迭代訓練
model, opt = get_model()
for epoch in range(epochs):
for xb, yb in train_dl:
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
print(loss_func(model(xb), yb))
Add Validation
在訓練過程中計算並打印每個 epoch 的 validation loss
model, opt = get_model()
for epoch in range(epochs):
# 訓練前
model.train()
for xb, yb in train_dl:
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
# 訓練後,驗證前
# 確保 nn.BatchNorm2d 和 nn.Dropout 採取適當的行爲(關閉)
model.eval()
with torch.no_grad():
valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
print(epoch, valid_loss / len(valid_dl))
爲了簡化代碼,增強可讀性,可以構建 fit()
和 get_data()
函數
def get_data(train_ds, valid_ds, bs):
return (
DataLoader(train_ds, batch_size=bs, shuffle=True),
DataLoader(valid_ds, batch_size=bs *2)
)
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
import numpy as np
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
for epoch in range(epochs):
model.train()
# 遍歷 batch 中的每個樣本
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
model.eval()
with torch.no_grad():
losses, nums = zip(*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl])
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
print(epoch, val_loss)
主要代碼簡化爲
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
nn.Sequential
參考 Keras 中的 Sequential Model
model = nn.Sequential(
Lambda(preprocess),
nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.AvgPool2d(4),
Lambda(lambda x: x.view(x.size(0), -1))
)
其中,可以 PyTorch 沒有提供 view layer,需要構造(Sequential中的Lambda)
class Lambda(nn.Module):
def __init__(self, func):
super(Lambda, self).__init__()
self.func = func
def forward(self, x):
return self.func(x)
def preprocess(x):
return x.view(-1, 1, 28, 28)
Using GPU
GPU 和 CPU 訓練的模型的加載不一樣,參數需要設置
首先,判斷 GPU 是否可以使用
print(torch.cuda.is_available())
使用指定 GPU
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
將數據(batch)移到GPU(使用 .to(torch.device("cuda"))
或 .cuda()
)
xb.to(dev) # xb.cuda()
yb.to(dev) # yb.cuda()
最後,需要將模型移到 GPU
model.to(dev) # model.cuda()