# 【小白學PyTorch】4 構建模型三要素與權重初始化

## 1 模型三要素

1. 必須要繼承nn.Module這個類，要讓PyTorch知道這個類是一個Module
2. 在__init__(self)中設置好需要的組件，比如conv，pooling，Linear，BatchNorm等等
3. 最後在forward(self,x)中用定義好的組件進行組裝，就像搭積木，把網絡結構搭建出來，這樣一個模型就定義好了

def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(3,6,5)
self.pool1 = nn.MaxPool2d(2,2)
self.conv2 = nn.Conv2d(6,16,5)
self.pool2 = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(16*5*5,120)
self.fc2 = nn.Linear(120,84)
self.fc3 = nn.Linear(84,10)


def forward(self,x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = x.view(-1,16*5*5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x


x爲模型的輸入，第一行表示x經過conv1，然後經過激活函數relu，然後經過pool1操作

net = Net()
outputs = net(inputs)


## 2 參數初始化

# 定義權值初始化
def initialize_weights(self):
for m in self.modules():
if isinstance(m,nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m,nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m,nn.Linear):
torch.nn.init.normal_(m.weight.data,0,0.01)
# m.weight.data.normal_(0,0.01)
m.bias.data.zero_()


# self.modules的源碼
def modules(self):
for name,module in self.named_modules():
yield module


## 3 完整運行代碼

import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x

def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0, 0.01)
# m.weight.data.normal_(0,0.01)
m.bias.data.zero_()

net = Net()
net.initialize_weights()
print(net.modules())
for m in net.modules():
print(m)


# 這個是print(net.modules())的輸出
<generator object Module.modules at 0x0000023BDCA23258>
# 這個是第一次從net.modules()取出來的東西，是整個網絡的結構
Net(
(conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
# 從net.modules()第二次開始取得東西就是每一層了
Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
Linear(in_features=400, out_features=120, bias=True)
Linear(in_features=120, out_features=84, bias=True)
Linear(in_features=84, out_features=10, bias=True)


torch.nn.init.xavier_normal(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()


## 4 尺寸計算與參數計算

net = Net()
net.initialize_weights()
layers = {}
for m in net.modules():
if isinstance(m,nn.Conv2d):
print(m)
break


Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))


【問題1：輸入特徵圖和輸出特徵圖的尺寸計算】

$$output = \frac{input+2\times padding -kernel}{stride}+1$$

net = Net()
net.initialize_weights()
input = torch.ones((16,3,10,10))
output = net.conv1(input)
print(input.shape)
print(output.shape)


torch.Size([16, 3, 10, 10])
torch.Size([16, 6, 6, 6])


$$\frac{10+2\times 0-5}{1}+1=6$$ 算出來的結果沒毛病。

【問題2：這個卷積層中有多少的參數？】

net = Net()
net.initialize_weights()
for m in net.modules():
if isinstance(m,nn.Conv2d):
print(m)
print(m.weight.shape)
print(m.bias.shape)
break


Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
torch.Size([6, 3, 5, 5])
torch.Size([6])