load data
import torch.utils.data as Data
torch_dataset = Data.TensorDataset(x, y) #x,y爲tensor形式
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # 要不要打亂數據 (打亂比較好)
num_workers=0 # 多線程來讀數據,windows下要爲0
)
for epoch in range(3): # 訓練所有!整套!數據 3 次
for step, (batch_x, batch_y) in enumerate(loader): # 每一步 loader 釋放一小批數據用來學習
備註:Data.TensorDataset(x, y)
中x
、y
順序和後面batch_x
、batch_y
相同即可
train
net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()
for step in range(100):
prediction = net(input)
loss = loss_func(prediction, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
save & load
torch.save(net, 'net.pkl')
torch.save(net.state_dict(), 'net_params.pkl')
net = torch.load('net.pkl')
net.load_state_dcit(torch.load('net_params.pkl'))
例子
def save():
net1 = torch.nn.Sequential(
torch.nn.Linear(1,10),
torch.nn.ReLU(),
torch.nn.Linear(10,1)
)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()
# 訓練
for t in range(100):
prediction = net1(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
torch.save(net1, 'net.pkl') # 保存整個神經網絡
torch.save(net1.state_dict(), 'net_params.pkl') # 只保存節點參數
def restore_net():
net2 = torch.load('net.pkl')
prediction = net2(x)
def restore_params():
# 先要建立一樣的網絡
net3 = torch.nn.Sequential(
torch.nn.Linear(1,10),
torch.nn.ReLU(),
torch.nn.Linear(10,1)
)
net3.load_state_dict(torch.load('net_params.pkl')) # 比restore_net更快
prediction = net3(x)
Dropout
def __init__(self):
...
self.dropout = torch.nn.Dropout(0.2) # drop 20%
...
def forward(self, x):
...
x = self.dropout(x)
...
# change to eval mode in order to fix drop out effect
net_dropped.eval()
# change back to train mode
net_dropped.train()
Batch Normalization
一般加在隱藏層前,即放在原始數據輸入後和隱藏層激活函數後
輸出層可不加
def __init__(self):
...
self.bn_input = nn.BatchNorm1d(1, momentum=0.5)
...
def forward(self, x):
...
x = self.bn_input(x)
...
# set eval mode to fix moving_mean and moving_var
net.eval()
# free moving_mean and moving_var
net.train()