load data
import torch.utils.data as Data
torch_dataset = Data.TensorDataset(x, y) #x,y为tensor形式
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=0 # 多线程来读数据,windows下要为0
)
for epoch in range(3): # 训练所有!整套!数据 3 次
for step, (batch_x, batch_y) in enumerate(loader): # 每一步 loader 释放一小批数据用来学习
备注:Data.TensorDataset(x, y)
中x
、y
顺序和后面batch_x
、batch_y
相同即可
train
net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()
for step in range(100):
prediction = net(input)
loss = loss_func(prediction, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
save & load
torch.save(net, 'net.pkl')
torch.save(net.state_dict(), 'net_params.pkl')
net = torch.load('net.pkl')
net.load_state_dcit(torch.load('net_params.pkl'))
例子
def save():
net1 = torch.nn.Sequential(
torch.nn.Linear(1,10),
torch.nn.ReLU(),
torch.nn.Linear(10,1)
)
optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()
# 训练
for t in range(100):
prediction = net1(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
torch.save(net1, 'net.pkl') # 保存整个神经网络
torch.save(net1.state_dict(), 'net_params.pkl') # 只保存节点参数
def restore_net():
net2 = torch.load('net.pkl')
prediction = net2(x)
def restore_params():
# 先要建立一样的网络
net3 = torch.nn.Sequential(
torch.nn.Linear(1,10),
torch.nn.ReLU(),
torch.nn.Linear(10,1)
)
net3.load_state_dict(torch.load('net_params.pkl')) # 比restore_net更快
prediction = net3(x)
Dropout
def __init__(self):
...
self.dropout = torch.nn.Dropout(0.2) # drop 20%
...
def forward(self, x):
...
x = self.dropout(x)
...
# change to eval mode in order to fix drop out effect
net_dropped.eval()
# change back to train mode
net_dropped.train()
Batch Normalization
一般加在隐藏层前,即放在原始数据输入后和隐藏层激活函数后
输出层可不加
def __init__(self):
...
self.bn_input = nn.BatchNorm1d(1, momentum=0.5)
...
def forward(self, x):
...
x = self.bn_input(x)
...
# set eval mode to fix moving_mean and moving_var
net.eval()
# free moving_mean and moving_var
net.train()