YOLOv3——TensorRT加速

工程：https://pan.baidu.com/s/1P_p46ahzDcnyBmE6Pn0cGw 提取碼：nc5l

依賴包：https://pan.baidu.com/s/1kMPzXAU2a5YIJptegp1x0g 提取碼：3ue8

1.環境搭建

解壓工程，得到deepstream-plugins文件夾，將依賴包DeepStream2.0解壓到deepstream-plugins文件夾

安裝gstreamer

sudo apt-get install gstreamer1.0-plugins-base gstreamer1.0-plugins-bad gstreamer1.0-libav gstreamer1.0-plugins-bad-videoparsers gstreamer1.0-plugins-good gstreamer1.0-plugins-ugly libgstreamer1.0-0 libgstreamer1.0-dev python3-gst-1.0

實現的配置：TensorRT-5.0.2.6、cuda9.0、opencv-4.0.0-alpha(必須帶dnn模塊)

修改根目錄的Makefile.config，其中opencv在/usr/local下

# MIT License
 
# Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
 
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
 
 
#Update the install directory paths for dependencies below
CXX=g++
CUDA_VER:=9.0
 
#Set to TEGRA for jetson or TESLA for dGPU's
PLATFORM:=TESLA
 
#For Tesla Plugins
OPENCV_INSTALL_DIR:= /usr/local
TENSORRT_INSTALL_DIR:= /usr/local/TensorRT-5.0.2.6
DEEPSTREAM_INSTALL_DIR:= /deepstream-plugins/DeepStream_Release
 
#For Tegra Plugins
NVGSTIVA_APP_INSTALL_DIR:= /path/to/nvgstiva-app_sources

修改./sources/lib/Makefile


# MIT License
 
# Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
 
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
 
 
CONFIG :=../../Makefile.config
ifeq ($(wildcard $(CONFIG)),)
$(error $(CONFIG) missing.)
endif
include $(CONFIG)
 
ifeq ($(CUDA_VER),)
$(error "CUDA_VER variable is not set in Makefile.config")
endif
 
ifeq ($(PLATFORM),)
$(error PLATFORM variable is not set in Makefile.config)
endif
 
CUCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
SRCS:= $(wildcard *.cpp)
KERNELS:= $(wildcard *.cu)
BUILD_PATH:= ./build/
OBJS= $(patsubst %, $(BUILD_PATH)%, $(SRCS:.cpp=.o))
OBJS+=$(patsubst %, $(BUILD_PATH)%, $(KERNELS:.cu=.o))  
DEPS:= $(SRCS)
DEPS+= $(KERNELS)
DEPS+= $(wildcard *.h)
TARGET:= libyoloplugin.a
 
ifeq ($(PLATFORM), TESLA)
	INCS:= -I"$(TENSORRT_INSTALL_DIR)/include" \
           -I"/usr/local/cuda-$(CUDA_VER)/include" \
           -I "$(OPENCV_INSTALL_DIR)/include/opencv4"
 
    LIBS:= -L"$(TENSORRT_INSTALL_DIR)/lib" -lnvinfer -lnvinfer_plugin -Wl,-rpath="$(TENSORRT_INSTALL_DIR)/lib" \
	       -L"/usr/local/cuda-$(CUDA_VER)/lib64" -lcudart -lcublas -lcurand -Wl,-rpath="/usr/local/cuda-$(CUDA_VER)/lib64" \
	       -L "$(OPENCV_INSTALL_DIR)/lib" -lopencv_core -lopencv_imgproc -lopencv_imgcodecs -lopencv_highgui -lopencv_dnn -Wl,-rpath="$(OPENCV_INSTALL_DIR)/lib"
endif
 
ifeq ($(PLATFORM), TEGRA)
	INCS:= -I"usr/include/aarch64-linux-gnu" \
	       -I"/usr/local/cuda-$(CUDA_VER)/include" \
		   -I "/usr/include"
 
	LIBS:= -L "/usr/lib/aarch64-linux-gnu" -lnvinfer -lnvinfer_plugin -Wl,-rpath="/usr/lib/aarch64-linux-gnu" \
	       -L "/usr/local/cuda-$(CUDA_VER)/lib64" -lcudart -lcublas -lcurand -Wl,-rpath="/usr/local/cuda-$(CUDA_VER)/lib64" \
		   -L "/usr/lib" -lopencv_core -lopencv_imgproc -lopencv_imgcodecs -lopencv_highgui -lopencv_dnn -Wl,-rpath="/usr/lib"
endif
 
CXXFLAGS:= -O2 -std=c++11 -lstdc++fs -ldl -fPIC -Wall -Wunused-function -Wunused-variable -Wfatal-errors $(shell pkg-config --cflags glib-2.0)
 
.PHONY: all dirs clean deps
 
all: dirs deps
	ar rcs $(TARGET) $(OBJS)
 
dirs:	
	if [ ! -d "models" ]; then mkdir -p models; fi
	if [ ! -d "calibration" ]; then mkdir -p calibration; fi
	if [ ! -d "build" ]; then mkdir -p build; fi
	if [ ! -d "../../../data/detections" ]; then mkdir -p ./../../data/detections; fi
 
deps: $(DEPS) $(OBJS)
 
$(BUILD_PATH)%.o: %.cpp %.h network_config.h network_config.cpp
	$(CXX) $(INCS) -c -o $@ $(CXXFLAGS) $<
 
$(BUILD_PATH)%.o: %.cu
	$(CUCC) -c -o $@ -arch=compute_50 --shared -Xcompiler -fPIC  $<
clean: 
	rm -f ./build/*
	rm -f ./*.a
 
clean_models:
	rm -rf ./models/*
 
clean_detections:
	rm -rf ../../data/detections/*

修改./sources/apps/trt-yolo/Makefile

# MIT License
 
# Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
 
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
 
 
CONFIG :=../../Makefile.config
ifeq ($(wildcard $(CONFIG)),)
$(error $(CONFIG) missing.)
endif
include $(CONFIG)
 
ifeq ($(CUDA_VER),)
$(error "CUDA_VER variable is not set in Makefile.config")
endif
 
ifeq ($(PLATFORM),)
$(error PLATFORM variable is not set in Makefile.config)
endif
 
CUCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
SRCS:= $(wildcard *.cpp)
KERNELS:= $(wildcard *.cu)
BUILD_PATH:= ./build/
OBJS= $(patsubst %, $(BUILD_PATH)%, $(SRCS:.cpp=.o))
OBJS+=$(patsubst %, $(BUILD_PATH)%, $(KERNELS:.cu=.o))  
DEPS:= $(SRCS)
DEPS+= $(KERNELS)
DEPS+= $(wildcard *.h)
TARGET:= libyoloplugin.a
 
ifeq ($(PLATFORM), TESLA)
	INCS:= -I"$(TENSORRT_INSTALL_DIR)/include" \
           -I"/usr/local/cuda-$(CUDA_VER)/include" \
           -I "$(OPENCV_INSTALL_DIR)/include/opencv4"
 
    LIBS:= -L"$(TENSORRT_INSTALL_DIR)/lib" -lnvinfer -lnvinfer_plugin -Wl,-rpath="$(TENSORRT_INSTALL_DIR)/lib" \
	       -L"/usr/local/cuda-$(CUDA_VER)/lib64" -lcudart -lcublas -lcurand -Wl,-rpath="/usr/local/cuda-$(CUDA_VER)/lib64" \
	       -L "$(OPENCV_INSTALL_DIR)/lib" -lopencv_core -lopencv_imgproc -lopencv_imgcodecs -lopencv_highgui -lopencv_dnn -Wl,-rpath="$(OPENCV_INSTALL_DIR)/lib"
endif
 
ifeq ($(PLATFORM), TEGRA)
	INCS:= -I"usr/include/aarch64-linux-gnu" \
	       -I"/usr/local/cuda-$(CUDA_VER)/include" \
		   -I "/usr/include"
 
	LIBS:= -L "/usr/lib/aarch64-linux-gnu" -lnvinfer -lnvinfer_plugin -Wl,-rpath="/usr/lib/aarch64-linux-gnu" \
	       -L "/usr/local/cuda-$(CUDA_VER)/lib64" -lcudart -lcublas -lcurand -Wl,-rpath="/usr/local/cuda-$(CUDA_VER)/lib64" \
		   -L "/usr/lib" -lopencv_core -lopencv_imgproc -lopencv_imgcodecs -lopencv_highgui -lopencv_dnn -Wl,-rpath="/usr/lib"
endif
 
CXXFLAGS:= -O2 -std=c++11 -lstdc++fs -ldl -fPIC -Wall -Wunused-function -Wunused-variable -Wfatal-errors $(shell pkg-config --cflags glib-2.0)
 
.PHONY: all dirs clean deps
 
all: dirs deps
	ar rcs $(TARGET) $(OBJS)
 
dirs:	
	if [ ! -d "models" ]; then mkdir -p models; fi
	if [ ! -d "calibration" ]; then mkdir -p calibration; fi
	if [ ! -d "build" ]; then mkdir -p build; fi
	if [ ! -d "../../../data/detections" ]; then mkdir -p ./../../data/detections; fi
 
deps: $(DEPS) $(OBJS)
 
$(BUILD_PATH)%.o: %.cpp %.h network_config.h network_config.cpp
	$(CXX) $(INCS) -c -o $@ $(CXXFLAGS) $<
 
$(BUILD_PATH)%.o: %.cu
	$(CUCC) -c -o $@ -arch=compute_50 --shared -Xcompiler -fPIC  $<
clean: 
	rm -f ./build/*
	rm -f ./*.a
 
clean_models:
	rm -rf ./models/*
 
clean_detections:
	rm -rf ../../data/detections/*

直接在./sources/apps/trt-yolo/下編譯，出現錯誤

trt_utils.h:83:22: error: ‘nvinfer1::DimsHW YoloTinyMaxpoolPaddingFormula::compute(nvinfer1::DimsHW, nvinfer1::DimsHW, nvinfer1::DimsHW, nvinfer1::DimsHW, nvinfer1::DimsHW, const char*)’ marked ‘override’, but does not override
     nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize,
                      ^~~~~~~
compilation terminated due to -Wfatal-errors.

在NVIDIA Developer Forums諮詢了下nvidia的工程師，需要修改下./sources/lib/trt_utils.h

class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula
{
 
private:
    std::set<std::string> m_SamePaddingLayers;
 
    nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize,
                             nvinfer1::DimsHW stride, nvinfer1::DimsHW padding,
                             nvinfer1::DimsHW dilation, const char* layerName) const override
    {
        assert(inputDims.d[0] == inputDims.d[1]);
        assert(kernelSize.d[0] == kernelSize.d[1]);
        assert(stride.d[0] == stride.d[1]);
        assert(padding.d[0] == padding.d[1]);
 
        int outputDim;
        // Only layer maxpool_12 makes use of same padding
        if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end())
        {
            outputDim = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0];
        }
        // Valid Padding
        else
        {
            outputDim = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1;
        }
        return nvinfer1::DimsHW{outputDim, outputDim};
    }
 
public:
    void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); }
};

修改./sources/lib/ds_image.cpp 將CV_LOAD_IMAGE_COLOR修改爲1

編譯通過。

2.網絡配置

以yolov3-voc.cfg爲例，修改./sources/lib/network_config.h 宏定義MODEL_V3

修改./sources/lib/network_config.cpp對應的參數

其中kPRECISION取kFLOAT、kINT8、kHALF分別爲fp32、int8、fp16精度

在根目錄下對應配置網絡、模型、校準圖片、測試圖片。

在./sources/apps/trt-yolo/下重新編譯得到trt-yolo-app

在根目錄執行

./sources/apps/trt-yolo/trt-yolo-app

3.推理速度比較

對比一下推理時間，在1080Ti下(不支持fp16)，以darknet53爲主幹網絡的權重，fp32推理時間9.4ms，int8 batch1 推理時間5.9ms，對比一下在darknet框架下fp32推理時間20.1ms，有較大提升。

原工程：https://github.com/NVIDIA-AI-IOT/deepstream_reference_apps

該工程還在不斷更新，目前增加SENet的tensorrt模型。有興趣可以嘗試。

本文使用的工程主要是在2018年11月左右下載，因此給出壓縮包形式。與目前的框架已有較大不同，主要是作者將網絡配置這塊單獨提煉，更方便使用，建議使用新框架。

參考：https://blog.csdn.net/cgt19910923/article/details/88847228

YOLOv3——TensorRT加速

linux安裝cuda和cudnn

模擬手機設備：使用 Playwright 實現移動端自動化測試

Mellanox網卡開啓SR-IOV

全面系統的AI學習路徑，幫助普通人也能玩轉AI

HTML 00 Tutorial

uni-app實現上拉加載

vue3編譯優化之“靜態提升”

又是一個月-20240513

flask 如何保證返回json有序

linux服務器設置ssh免密

Linux的chmod命令，對一個目錄及其子目錄所有文件添加讀寫權限

Pytorch介紹與linux、windows環境下安裝

Tensorflow下利用Deeplabv3+訓練自己的數據(超詳細完整版）

YOLOv3——TensorRT加速

Ubuntu18.04安裝nvidia顯卡驅動

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結