用pytorch訓練圖像分類器模型導出ONNX測試

項目實現功能

1. 搭建一個簡單的圖片分類器，完成訓練和測試
2. 轉換pytorch的pth模型到ONNX格式，加載ONNX並測試

項目結構

images 目錄下存放訓練和測試數據集，本例使用了kaggle競賽的貓狗數據集，統一resize到了120*120大小；
TrainTestConvertOnnx.py 是訓練和測試代碼，包括了pth模型到onnx的轉換。訓練在CPU和GPU上測試ok。文件概覽：

TestOnnx.cpp 是onnx的加載和測試代碼。文件概覽：

（注：方便一鍵運行，項目把N多操作合併到了一個文件裏）

網絡搭建訓練部分參考了 JR_Chan的博客，示謝！
網絡結構很簡單，包含了3個卷積層，一個全連接層：

詳細點的結構：

訓練效果

Epoch:1/100 test Loss: 0.6443 Acc: 0.6168
Epoch:2/100 train Loss: 0.6298 Acc: 0.6421
Epoch:2/100 test Loss: 0.5762 Acc: 0.6986
……
Epoch:99/100 train Loss: 0.2731 Acc: 0.8842
Epoch:99/100 test Loss: 0.2618 Acc: 0.8936
Epoch:100/100 train Loss: 0.2757 Acc: 0.8837
Epoch:100/100 test Loss: 0.2613 Acc: 0.8926

學習率0.002，100個epoch，準確率大概在89% 。

onnx測試效果

網絡很小，模型文件pth和cat_dog_classify.onnx大小隻有63KB。通過OpenCV調用onnx，測試效果：

順便貼一下py文件和cpp文件的代碼（略長，文末有完整工程下載鏈接）

TrainTestConvertOnnx.py

# -*- coding: UTF-8 -*-
# Created by -牧野- CSDN https://blog.csdn.net/dcrmg/article/details/102807575
# 參考 https://blog.csdn.net/JR_Chan/article/details/95641758
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import os
from math import ceil
import argparse
import copy
from PIL import Image
from torchvision import transforms, datasets
from torch.autograd import Variable
from tensorboardX import SummaryWriter

# 定義一個簡單的二分類網絡
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        # 三個卷積層用於提取特徵
        # 1 input channel image 90x90, 8 output channel image 44x44
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        # 8 input channel image 44x44, 16 output channel image 22x22
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        # 16 input channel image 22x22, 32 output channel image 10x10
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        # 分類
        self.classifier = nn.Sequential(
            nn.Linear(32 * 10 * 10, 3)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(-1, 32 * 10 * 10)
        x = self.classifier(x)
        return x

# 訓練模型入口
def train(args):
    # read data
    dataloders, dataset_sizes, class_names = ImageDataset(args)

    with open(args.class_file, 'w') as f:
        for name in class_names:
            f.writelines(name + '\n')
    # use gpu or not
    use_gpu = torch.cuda.is_available()
    print("use_gpu:{}".format(use_gpu))

    # get model
    model = SimpleNet()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            model.load_state_dict(torch.load(args.resume))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    if use_gpu:
        model = torch.nn.DataParallel(model)
        model.to(torch.device('cuda'))
    else:
        model.to(torch.device('cpu'))

    # 用交叉熵損失函數(define loss function)
    criterion = nn.CrossEntropyLoss()

    # 梯度下降(Observe that all parameters are being optimized)
    optimizer_ft = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4)

    # Decay LR by a factor of 0.98 every 1 epoch
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=1, gamma=0.98)

    model = train_model(args=args,
                        model=model,
                        criterion=criterion,
                        optimizer=optimizer_ft,
                        scheduler=exp_lr_scheduler,
                        num_epochs=args.num_epochs,
                        dataset_sizes=dataset_sizes,
                        use_gpu=use_gpu,
                        dataloders = dataloders)

    torch.save(model.state_dict(), os.path.join(args.save_path, 'best_model.pth'))
    writer.close()

# 測試單張圖片（使用pth模型）入口
def test(test_model_path, test_img_path, class_file):

    best_model_path = test_model_path
    model = SimpleNet()
    model.load_state_dict(torch.load(best_model_path))
    model.eval()

    class_names = []
    with open(class_file, 'r') as f:
        lines = f.readlines()
        for line in lines:
            class_names.append(line)

    img_path = test_img_path
    predict_class = class_names[predict_image(model, img_path)]
    print(predict_class)

# 轉換pytorch訓練的pth模型到ONNX模型
def convert_model_to_ONNX(input_img_size, input_pth_model, output_ONNX):
    dummy_input = torch.randn(3, 1, input_img_size, input_img_size)
    model = SimpleNet()

    state_dict = torch.load(input_pth_model, map_location='cpu')

    model.load_state_dict(state_dict)
    model.eval()  # 設置模型爲推理模式（重要）

    input_names = ["input_image"]
    output_names = ["output_classification"]

    torch.onnx.export(model, dummy_input, output_ONNX, verbose=True, input_names=input_names,
                      output_names=output_names)


# 訓練模型主函數
def train_model(args, model, criterion, optimizer, scheduler, num_epochs, dataset_sizes, use_gpu, dataloders):
    begin = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    device = torch.device('cuda' if use_gpu else 'cpu')

    for epoch in range(args.start_epoch, num_epochs):
        # 每一個epoch中都有一個訓練和一個驗證過程(Each epoch has a training and validation phase)
        for phase in ['train', 'test']:
            if phase == 'train':
                scheduler.step(epoch)
                # 設置爲訓練模式(Set model to training mode)
                model.train()
            else:
                # 設置爲驗證模式(Set model to evaluate mode)
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            tic_batch = time.time()

            # 在多個batch上依次處理數據(Iterate over data)
            for i, (inputs, labels) in enumerate(dataloders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # 梯度置零(zero the parameter gradients)
                optimizer.zero_grad()

                # 前向傳播(forward)
                # 訓練模式下才記錄操作以進行反向傳播(track history if only in train)
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    # 訓練模式下進行反向傳播與梯度下降(backward + optimize only if in training phase)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # 統計損失和準確率(statistics)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                batch_loss = running_loss / (i * args.batch_size + inputs.size(0))
                batch_acc = running_corrects.double() / (i * args.batch_size + inputs.size(0))

                if phase == 'train' and (i + 1) % args.print_freq == 0:
                    print(
                        '[Epoch {}/{}]-[batch:{}/{}] lr:{:.6f} {} Loss: {:.6f}  Acc: {:.4f}  Time: {:.4f} sec/batch'.format(
                            epoch + 1, num_epochs, i + 1, ceil(dataset_sizes[phase] / args.batch_size),
                            scheduler.get_lr()[0], phase, batch_loss, batch_acc,
                            (time.time() - tic_batch) / args.print_freq))
                    tic_batch = time.time()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if epoch == 0 and os.path.exists('result.txt'):
                os.remove('result.txt')
            with open('result.txt', 'a') as f:
                f.write('Epoch:{}/{} {} Loss: {:.4f} Acc: {:.4f} \n'.format(epoch + 1, num_epochs, phase, epoch_loss,
                                                                            epoch_acc))

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            writer.add_scalar(phase + '/Loss', epoch_loss, epoch)
            writer.add_scalar(phase + '/Acc', epoch_acc, epoch)

        if (epoch + 1) % args.save_epoch_freq == 0:
            if not os.path.exists(args.save_path):
                os.makedirs(args.save_path)
            torch.save(model.state_dict(), os.path.join(args.save_path, "epoch_" + str(epoch) + ".pth"))

        # 深拷貝模型(deep copy the model)
        if phase == 'test' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    # 將model保存爲graph
    writer.add_graph(model, (inputs,))

    time_elapsed = time.time() - begin
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Accuracy: {:4f}'.format(best_acc))

    # 載入最佳模型參數(load best model weights)
    model.load_state_dict(best_model_wts)
    return model

# 測試單張圖片主函數
def predict_image(model, image_path):
    image = Image.open(image_path).convert('L')

    # 測試時截取中間的90x90
    transformation1 = transforms.Compose([
        transforms.CenterCrop(90),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])

    ])

    # 預處理圖像
    image_tensor = transformation1(image).float()

    # 額外添加一個批次維度，因爲PyTorch將所有的圖像當做批次
    image_tensor = image_tensor.unsqueeze_(0)

    if torch.cuda.is_available():
        image_tensor.cuda()

    # 將輸入變爲變量
    input = Variable(image_tensor)

    # 預測圖像的類別
    output = model(input)

    index = output.data.numpy().argmax()

    return index


# 使用PIL讀取圖片並轉換爲灰度圖
def readImg(path):
    im = Image.open(path)
    return im.convert("L")

# 讀取訓練和測試數據
def ImageDataset(args):
    # 數據增強及歸一化
    # 圖片都是120x120的，訓練時隨機裁取90x90的部分，測試時裁取中間的90x90
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomCrop(def_img_train_and_test_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])
        ]),
        'test': transforms.Compose([
            transforms.CenterCrop(def_img_train_and_test_size),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])
        ]),
    }

    data_dir = args.data_dir
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                            data_transforms[x], loader=readImg)
                    for x in ['train', 'test']}
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.batch_size,
                                                shuffle=(x == 'train'), num_workers=args.num_workers)
                for x in ['train', 'test']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
    class_names = image_datasets['train'].classes

    return dataloaders, dataset_sizes, class_names

# 設置參數
def set_parser():
    parser = argparse.ArgumentParser(description='classification')
    # 圖片數據的根目錄(Root catalog of images)
    parser.add_argument('--data-dir', type=str, default='images')
    parser.add_argument('--class-file', type=str, default='class_names.class')
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--num-epochs', type=int, default=100)
    parser.add_argument('--lr', type=float, default=0.002)  # those who set lr greater than 0.01 are hooligans!!
    parser.add_argument('--num-workers', type=int, default=8)
    parser.add_argument('--print-freq', type=int, default=100)
    parser.add_argument('--save-epoch-freq', type=int, default=1)
    parser.add_argument('--save-path', type=str, default='output')
    parser.add_argument('--resume', type=str, default='', help='For training from one checkpoint')
    parser.add_argument('--start-epoch', type=int, default=0, help='Corresponding to the epoch of resume')
    return parser.parse_args()

if __name__ == '__main__':
    writer = SummaryWriter(log_dir='log')
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    def_img_train_and_test_size = 90  # 訓練尺寸
    args = set_parser()      # 設置參數

    train(args)      # 訓練模型
    test('./output/best_model.pth', './images/test/cat/cat.0.jpg', args.class_file)     # 測試模型（單張圖片）
    # 轉換pytorch的pth模型到ONNX模型
    convert_model_to_ONNX(def_img_train_and_test_size, './output/epoch_99.pth', "./cat_dog_classify.onnx")

TestOnnx.cpp


// PthONNX.cpp : 基於OpenCV dnn、 onnx 的cat、dog二分類程序
// Created by -牧野- 2019年10月29日 https://blog.csdn.net/dcrmg/article/details/102807575
//

#include <iostream>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn.hpp>
#include <fstream>

//ONNX 執行推理類
class PthONNX {
public:
    //@model_path  ONNX模型路徑  
    //@classes_file_path  分類信息文件
    //@input_size  網絡輸入大小
    PthONNX(const std::string &model_path, const std::string &classes_file_path, cv::Size input_size);

    //@input_image  輸入圖片，BGR格式  
    //@classification_output  網絡輸出的分類名稱  0：cat  1：dog  1：None
    void Classify(const cv::Mat &input_image, std::string &classification_output);

private:
    void ClassifyImplement(const cv::Mat &image, std::string &classification_output);

private:
    cv::Size input_size_;
    cv::dnn::Net net_classify_;
    std::vector<std::string> classes_;
};

// 構造函數
PthONNX::PthONNX(const std::string &model_path, const std::string &classes_file_path,
                 cv::Size input_size) : input_size_(input_size) {
    std::ifstream ifs(classes_file_path.c_str());
    assert(ifs.is_open());
    std::string line;
    while (getline(ifs, line)) {
        line = line;
        classes_.push_back(line);
    }

    net_classify_ = cv::dnn::readNetFromONNX(model_path);
    net_classify_.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
    net_classify_.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}

// ONNX推理入口函數
void PthONNX::Classify(const cv::Mat &input_image, std::string &classification_results) {
    assert(input_image.data);
    cv::Mat image = input_image.clone();
    cv::resize(image, image, cv::Size(90, 90));
    cv::cvtColor(image, image, cv::COLOR_BGR2GRAY);
    ClassifyImplement(image,classification_results);
}

//ONNX推理主函數
void PthONNX::ClassifyImplement(const cv::Mat &image,std::string &classification_results) {
    classification_results.clear();
    //***********前處理***********
    cv::Scalar mean_value(0, 0, 0);
    cv::Mat input_blob = cv::dnn::blobFromImage(image, 1, input_size_, mean_value, false, false, CV_32F);
    //***********前處理***********
    
    net_classify_.setInput(input_blob);
    const std::vector<cv::String> &out_names = net_classify_.getUnconnectedOutLayersNames();
    cv::Mat out_tensor = net_classify_.forward(out_names[0]);

    //***********後處理***********
    double minVal;
    double maxVal;
    cv::Point minIdx;
    cv::Point maxIdx;	// minnimum Index, maximum Index
    cv::minMaxLoc(out_tensor, &minVal, &maxVal, &minIdx, &maxIdx);
    int index_class = maxIdx.x;
    classification_results = (index_class <= 1) ? classes_[index_class] : "None";
    //***********後處理***********
}

int main()
{
    const std::string img_path = "D:/1/1/SimpleNet-master/images/train/cat/cat.4896.jpg";
    const std::string onnx_model_path = "D:/1/1/pytorch-train-test-onnx/cat_dog_classify.onnx";
    const std::string class_names_file_path = "D:/software/VS2019_Test/PthONNX/x64/class_names.class";
    const cv::Size net_input_size(90, 90);

    cv::Mat img = cv::imread(img_path);
    std::string classify_output; // 分類結果
    
    PthONNX classifier(onnx_model_path, class_names_file_path, net_input_size);
    classifier.Classify(img, classify_output); 
    std::cout << "圖片類別：" << classify_output << std::endl << std::endl;

    cv::putText(img, classify_output, cv::Point(20,20), 2, 1.2, cv::Scalar(0, 0, 255));
    cv::imshow("classify", img);    
    cv::waitKey();
}

完整工程（含數據集，pytorch訓練和測試，pth模型轉onnx，onnx文件加載和測試）下載鏈接：pytorch訓練圖像分類模型pth轉ONNX並測試

用pytorch訓練圖像分類器模型導出ONNX測試

項目實現功能

項目結構

訓練效果

onnx測試效果

用pytorch訓練圖像分類器模型導出ONNX測試

MacOS Xcode11 PyCharm 2019.2基礎快捷鍵

tensorflow中共享變量 tf.get_variable 和命名空間 tf.variable_scope

計算數據集的均值和方差（mean，std）

使用pytorchviz和Netron可視化pytorch網絡結構

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結