Pytorch實用技巧
0: 相關參考資料
0.1:pytorch 設計
PyTorch under the hood:A guide to understand PyTorch internals
PyTorch快速入門視頻教程
0.2:自動微分技術
Hands-On Machine Learning with Scikit-Learn & TensorFlow: Appendix D
0.3 PyTorch源碼解析
0.4 PyTorch動態圖
1:手動搭建梯度
構造一個具有:y=(x+1)*3
,z=y*y
樣式的複合函數,分步計算梯度。
import torch.nn as nn
import torch as th
def show_tensor(tensor, name="None"):
print("\n***********%s*************" % name)
print("Tensor: ", tensor)
print("Tensor.shape: ", tensor.shape)
print("Tensor required gradient: ", tensor.requires_grad)
print("Tensor.data: ", tensor.data)
print("Tensor.grad: ", tensor.grad)
length = 10
x = th.ones(length, requires_grad=True)
grads = th.FloatTensor([x for x in range(1, length+1)])
z_grads = th.ones(length)
show_tensor(grads, "tmp_grad")
show_tensor(z_grads, "z_grads")
show_tensor(x, "x=1")
# x=1,x'=0
# y=(x+1)*3=6, y'=3
# z=y*y=36,z'=2y
y = (x + 1) * 3
show_tensor(y, "y=(x+1)*3")
# cut down from the backward chain
y_t = y.detach()
y_t.requires_grad = True
show_tensor(y_t, "y_t=y")
z = y_t * y_t
show_tensor(z, "z=y*y")
# x.data *= 100 # we can adjust data for one tensor.
#show_tensor(x, "x")
z.backward(z_grads)
show_tensor(z, "z-grad with z backward")
show_tensor(y_t, "y_t-grad with z backward")
show_tensor(x, "x-grad with z backward")
y.backward(y_t.grad)
show_tensor(z, "z-grad with y backward")
show_tensor(y, "y-grad with y backward")
show_tensor(x, "x-grad with y backward")
tensor.backward: https://discuss.pytorch.org/t/what-does-tensor-backward-do-mathematically/27953
refer to: https://www.lizenghai.com/archives/29498.html
2:網絡模型搭建
我們以LeNet5訓練MNIST爲例說明。
1: 默認情況下,構建標準LeNet5網絡模型如下:
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(4*4*50, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = F.relu(self.conv1(x)) #24*24*20
x = F.max_pool2d(x, 2, 2) #12*12*50
x = F.relu(self.conv2(x)) #8*8*50
x = F.max_pool2d(x, 2, 2) #4*4*50
x = x.view(-1, 4*4*50) #800
x = F.relu(self.fc1(x)) #500
x = self.fc2(x) #10
return F.log_softmax(x, dim=1) #log_softmax函數 在softmax的基礎上 取了對數 爲了使用交叉熵函數
此時,打印網絡模型如下:
Model:
LeNet5(
(conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=800, out_features=500, bias=True)
(fc2): Linear(in_features=500, out_features=10, bias=True)
)
name: conv1.weight Shape: torch.Size([20, 1, 5, 5])
name: conv1.bias Shape: torch.Size([20])
name: conv2.weight Shape: torch.Size([50, 20, 5, 5])
name: conv2.bias Shape: torch.Size([50])
name: fc1.weight Shape: torch.Size([500, 800])
name: fc1.bias Shape: torch.Size([500])
name: fc2.weight Shape: torch.Size([10, 500])
name: fc2.bias Shape: torch.Size([10])
2:動態搭建模型
爲了方便構建指定層數的網絡模型,我們可以選擇使用一個列表,根據傳入的參數決定模型的構建。但是注意默認情況下,使用一個list
(也就是self.net_group=[]
)是不可以,會導致你的optimizer找不到優化的目標,因此我們可以選擇使用nn.ModuleList()
指定我們的模型list:
class LeNet5_v1(nn.Module):
def __init__(self):
super(LeNet5_v1, self).__init__()
self.net_group=nn.ModuleList() # 如果 self.net_group=[],則會顯示沒有可優化參數,這與pytorch動態圖有關
self.net_group.append(nn.Conv2d(1, 20, 5, 1))
self.net_group.append(nn.Conv2d(20, 50, 5, 1))
self.net_group.append(nn.Linear(4*4*50, 500))
self.net_group.append(nn.Linear(500, 10))
def forward(self, x):
x = F.relu(self.net_group[0](x)) #24*24*20
x = F.max_pool2d(x, 2, 2) #12*12*50
x = F.relu(self.net_group[1](x)) #8*8*50
x = F.max_pool2d(x, 2, 2) #4*4*50
x = x.view(-1, 4*4*50) #800
x = F.relu(self.net_group[2](x)) #500
x = self.net_group[3](x) #10
return F.log_softmax(x, dim=1) #log_softmax函數 在softmax的基礎上 取了對數 爲了使用交叉熵函數
此時,打印網絡模型如下:
Model:
LeNet5_v1(
(net_group): ModuleList(
(0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
(1): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
(2): Linear(in_features=800, out_features=500, bias=True)
(3): Linear(in_features=500, out_features=10, bias=True)
)
)
name: net_group.0.weight Shape: torch.Size([20, 1, 5, 5])
name: net_group.0.bias Shape: torch.Size([20])
name: net_group.1.weight Shape: torch.Size([50, 20, 5, 5])
name: net_group.1.bias Shape: torch.Size([50])
name: net_group.2.weight Shape: torch.Size([500, 800])
name: net_group.2.bias Shape: torch.Size([500])
name: net_group.3.weight Shape: torch.Size([10, 500])
name: net_group.3.bias Shape: torch.Size([10])
'''
3:一個完整MNIST模型的例子
在如下替換掉LeNet5模型,就可以跑了。
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
'''
class LeNet5_v1(nn.Module):
def __init__(self):
#TODO: init your network
pass
def forward(self, x):
#TODO: connect your network
'''
#訓練函數
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device) #若device有cuda ,則使用gpu
optimizer.zero_grad() #梯度清零
output = model(data) #前向傳播
loss = F.nll_loss(output, target) #計算交叉熵函數
loss.backward() #反向傳播
optimizer.step() #參數更新
if batch_idx % args.log_interval == 0: #能整除log_interval時 打印 相關記錄 epoch, batch_index/總的訓練樣本 ,當前訓練數據佔總樣本比例 ,loss
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
#定義測試函數
def test(args, model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def model_show(model,name=""):
print("Model: ",name," \n",model)
for name, value in model.named_parameters():
print("name: ", name, "Shape: ",value.shape)
#主程序
def main():
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)') #batch-size數目
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)') #測試數據的每次大小
parser.add_argument('--epochs', type=int, default=10, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=False,
help='For Saving the current Model')
args = parser.parse_args()
use_cuda = not args.no_cuda and torch.cuda.is_available()
print("Using cuda? ",use_cuda )
torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
model = LeNet5().to(device)
#model_v1=LeNet5_v1().to(device)
#model_show(model)
#model_show(model_v1)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
test(args, model, device, test_loader)
if (args.save_model):
torch.save(model.state_dict(),"mnist_cnn.pt")
if __name__ == '__main__':
main()
3:tensor追蹤神器:torchsnooper
https://blog.csdn.net/iodjSVf8U1J7KYc/article/details/93549944
4:C++ frontend
4.1 安裝PyTorch C++版
參考官方例子: https://pytorch.org/cppdocs/installing.html
4.2 構建C++版本運行環境
使用cmake環境構建編譯工具鏈
編寫CMakeLists.txt
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(dcgan)
find_package(Torch REQUIRED)
add_executable(dcgan dcgan.cpp)
target_link_libraries(dcgan "${TORCH_LIBRARIES}")
set_property(TARGET dcgan PROPERTY CXX_STANDARD 11)
使用絕對路徑指定pytorch運行庫:
cd path/to/pytorch
pwd
打印出來
4.3 用一個端到端的MNIST例子
創建dcgan.cpp
如下:
#include <torch/torch.h>
// Define a new Module.
struct Net : torch::nn::Module
{
Net()
{
// Construct and register two Linear submodules.
fc1 = register_module("fc1", torch::nn::Linear(784, 64));
fc2 = register_module("fc2", torch::nn::Linear(64, 32));
fc3 = register_module("fc3", torch::nn::Linear(32, 10));
}
// Implement the Net's algorithm.
torch::Tensor forward(torch::Tensor x)
{
// Use one of many tensor manipulation functions.
x = torch::relu(fc1->forward(x.reshape({x.size(0), 784})));
x = torch::dropout(x, /*p=*/0.5, /*train=*/is_training());
x = torch::relu(fc2->forward(x));
x = torch::log_softmax(fc3->forward(x), /*dim=*/1);
return x;
}
// Use one of many "standard library" modules.
torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr};
};
int main()
{
// Create a new Net.
auto net = std::make_shared<Net>();
for (const auto& p : net->named_parameters())
{
std::cout << "Tensor Name: " << p.key() << "; "
<< "\tTensor Size: " << p.value().nbytes()
<< std::endl;
//<< ", Tensor Value: \n" << p.value() << std::endl;
}
//std::cout << "Show the Network: \n" << *net << std::endl;
// Create a multi-threaded data loader for the MNIST dataset.
auto data_loader = torch::data::make_data_loader(
torch::data::datasets::MNIST("./data").map(
torch::data::transforms::Stack<>()),
/*batch_size=*/64);
// Instantiate an SGD optimization algorithm to update our Net's parameters.
torch::optim::SGD optimizer(net->parameters(), /*lr=*/0.01);
for (size_t epoch = 1; epoch <= 10; ++epoch)
{
size_t batch_index = 0;
// Iterate the data loader to yield batches from the dataset.
for (auto& batch : *data_loader)
{
// Reset gradients.
optimizer.zero_grad();
// Execute the model on the input data.
torch::Tensor prediction = net->forward(batch.data);
// Compute a loss value to judge the prediction of our model.
torch::Tensor loss = torch::nll_loss(prediction, batch.target);
// Compute gradients of the loss w.r.t. the parameters of our model.
loss.backward();
// Update the parameters based on the calculated gradients.
optimizer.step();
// Output the loss and checkpoint every 100 batches.
if (++batch_index % 100 == 0)
{
std::cout << "Epoch: " << epoch << " | Batch: " << batch_index
<< " | Loss: " << loss.item<float>() << std::endl;
// Serialize your model periodically as a checkpoint.
torch::save(net, "net.pt");
}
}
}
}
4.4 使用cmake編譯
mkdir build && cd build
cmake -DCMAKE_PREFIX_PATH=the-abs-path-to-libtorch ..
4.5 下載MNIST數據集
參考:http://yann.lecun.com/exdb/mnist/
使用gzip -d 加壓 到運行程序下面的data文件夾下
4.6 運行程序
./dcgan
5 GPU stream加速
6 源碼編譯DEBUG
6.1 進入sudo
權限,安裝依賴
pip3 install -r requirements.txt
6.2 編譯源碼
sudo DEBUG=1 USE_OPENCV=1 USE_FFMPEG=1 USE_LMDB=1 USE_CUDA=1 USE_CUDNN=1 python3 setup.py build develop
注意:sudo
必須要用,而且還要放在最前面,否則生成avx代碼失敗。
(NO_CUDA=1)
參考: PyTorch源碼編譯