nn.Module
用nn.Module實現全連接層
import torch as t
from torch import nn
from torch.autograd import Variable as V
class Linear(nn.Module):
def __init__(self, in_features, out_features):
super(Linear, self).__init__()
self.w = nn.Parameter(t.randn(in_features, out_features))
print(self.w.shape)
self.b = nn.Parameter(t.randn(out_features))
def forward(self, x):
x = x.mm(self.w)
x = x + self.b.expand_as(x)
return x
layer = Linear(4, 3)
input = V(t.randn(2, 4))
print(input.shape)
output = layer(input)
print(output)
for name, parameter in layer.named_parameters():
print(name, parameter)
- 自定義層必須繼承nn.Module,並在構造函數中調用nn.Module的構造函數,即super(Linear, self)__init__(),可利用前面自定義的層作爲當前module的子層,前面定義的Module的可學習參數在當前的模塊中也是可學習的
- 在構造函數__init__中自己定義可學習的參數,並封裝成parameter
- forward函數實現前向傳播
- 無須實現反向傳播函數
- 把layer看成數學概念中的函數
- module中的可學習的參數可通過named_paremeters()或者parameters()返回迭代器
當網絡很深,包含很多層的時候利用nn.Squential()
from torch import nn
net1 = nn.Sequential()
net1.add_module('conv', nn.Conv2d(3, 3, 3))
net1.add_module('batchnorm', nn.BatchNorm2d(3))
net1.add_module('activation_layer', nn.ReLU())
net2 = nn.Sequential(
nn.Conv2d(3, 3, 3),
nn.BatchNorm2d(3),
nn.ReLU()
)
from collections import OrderedDict
net3 = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(3, 3, 3)),
('bn1', nn.BatchNorm2d(3)),
('relu1', nn.ReLU())
]))
print('net1: ', net1)
print('net2: ', net2)
print('net3: ', net3)
初始化
from torch.nn import init
from torch import nn
import torch
Linear = nn.Linear(3, 4)
torch.manual_seed(1)
init.xavier_normal(Linear.weight)
並行計算的兩個方法
new_net = nn.DataParallel(net, device_ids=[0, 1])
output = new_net(input)
output = nn.parallel.data_parallel(net, input, device_ids=[0, 1])
搭建ResNet
from torch.nn import init
from torch import nn
import torch as t
from torch.nn import functional as F
class ResidualBlock(nn.Module):
def __init__(self, inchannel, outchannel, stride=1, shortcut=None):
super(ResidualBlock, self).__init__()
self.left = nn.Sequential(
nn.Conv2d(in_channels=inchannel, out_channels=outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(outchannel),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=outchannel, out_channels=outchannel, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(outchannel)
)
self.right = shortcut
def forward(self, x):
out = self.left(x)
residual = x if self.right is None else self.right(x)
# print("out ", out.shape)
# print("residual ", residual.shape)
out += residual
return F.relu(out)
class ResNet(nn.Module):
def __init__(self, num_classes=1000):
super(ResNet, self).__init__()
self.pre = nn.Sequential(
nn.Conv2d(3, 64, 7, 2, 3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(3, 2, 1)
)
self.layer1 = self._make_layer(64, 128, 3)
self.layer2 = self._make_layer(128, 256, 4, stride=2)
self.layer3 = self._make_layer(256, 512, 6, stride=2)
self.layer4 = self._make_layer(512, 512, 3, stride=2)
self.fc = nn.Linear(512, num_classes)
def _make_layer(selfself, inchannel, outchannel, block_num, stride=1):
shortcut = nn.Sequential(
nn.Conv2d(inchannel, outchannel, 1, stride=stride, bias=False),
nn.BatchNorm2d(outchannel)
)
layers = []
layers.append(ResidualBlock(inchannel, outchannel, stride, shortcut))
for i in range(1, block_num):
layers.append(ResidualBlock(outchannel, outchannel))
return nn.Sequential(*layers)
def forward(self, x):
x = self.pre(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = F.avg_pool2d(x, 7)
x = x.view(x.size(0), -1)
return self.fc(x)
model = ResNet()
input = t.autograd.Variable(t.randn(1, 3, 224, 224))
o = model(input)
print(o.shape)