Hi3559 + opencv4.3.0交叉編譯 + tengine 優化+ face_detector sample運行測試

opencv4.3.0的版本更新中將Tengine作爲Arm平臺的Backend加入OpenCV DNN Module，添加其爲第三方庫中的一個，提升了OpenCV DNN在Arm平臺的運行效率，本文對opencv4.3.0進行交叉編譯，開啓其對Tengine的支持，在Hi3559板子上運行DNN sample中的face_detector例程，對其加速效果進行測試。

由於編譯的服務器無法連接外網，因此本次交叉編譯是離線進行的，需要手動下載一些依賴庫。

1、交叉編譯opencv4.3.0到Hi3559

1.1、編譯Tengine

Tengine的github開源倉庫:https://github.com/OAID/Tengine

其下有兩個分支，其中master分支是源代碼本體，進行編譯後是能在arm等平臺上進行運行，通過將其他框架下的模型轉換成其專用的tmfile格式的模型，加快神經網絡的運行速度。

另一個分支就是Tengine-opencv，是作爲opencv第三方庫的編譯源代碼，我們要用的就是這個。

下載zip文件https://github.com/OAID/Tengine/tree/tengine-opencv

解壓後操作

cd example_config
cp himix200_linux_cross.config himix100_linux_cross.config

#修改配置爲Hi3559交叉編譯工具鏈
#The following option is used for setting cross compiler path
EMBEDDED_CROSS_PATH=/opt/hisi-linux/x86-arm/aarch64-himix100-linux/bin/

#The following option is used for setting cross compiler prefix
CROSS_COMPILE=aarch64-himix100-linux-

#The following option is used for system architecture type. 
#option value [x86/Arm32/Arm64]
ARCH_TYPE=Arm64

默認情況下會編譯生成動態庫，需要更改linux_build.sh中的內容，在cmake配置參數中添加一條

-DBUILT_IN_OPENCV=ON \

執行以下命令進行編譯

./linux_build.sh example_config/himix100_linux_cross.config

編譯完成後在install/lib目錄看到libtengine.a文件則說明編譯成功

1.2編譯opencv4.3.0

opencv4.3.0的交叉編譯方法與之前版本沒有大的區別，以下只介紹正常流程，若出現其他錯誤可以參考之前的一篇文章opencv3.2.0移植Hi3559完整總結版

下載opencv-4.3.0源碼https://github.com/opencv/opencv/tree/4.3.0

解壓後

mkdir output build
cd build

# 注意替換自己的路徑
cmake -DCMAKE_BUILD_TYPE=RELEASE  \
-DCMAKE_INSTALL_PREFIX=../output  \
-DCMAKE_C_COMPILER=aarch64-himix100-linux-gcc \
-DCMAKE_CXX_COMPILER=aarch64-himix100-linux-g++ \
-DCMAKE_EXE_LINKER_FLAGS=-lrt -lpthread \
-DOPENCV_ENABLE_NONFREE=ON \
-DBUILD_SHARED_LIBS=ON \
-DBUILD_EXAMPLES=ON \
-DBUILD_PERF_TESTS=OFF \
-DBUILD_TESTS=OFF \
-DWITH_OPENCL=OFF \
-DBUILD_DOCS=OFF \
-DWITH_CUDA=OFF \
-DWITH_IPP=OFF \
-DWITH_PNG=OFF \
-DENABLE_PROFILING=OFF \
-DOPENCV_LIBTENGINE_ROOT_DIR=/*yourpath*/Tengine-tengine-opencv/install \
-DWITH_TENGINE=ON \
-DZLIB_INCLUDE_DIR=/*yourpath*/opencv-4.3.0/3rdparty/zlib \
..

在離線狀態下，執行完cmake配置後觀察，可能會出現ADE download failed錯誤

-- ADE: Download: v0.1.1f.zip
-- Try 1 failed
--

解決方法是找到download鏈接，手動下載源代碼並替換。

打開opencv-4.3.0/modules/gapi/cmake/DownloadADE.cmake

按照其github地址下載zip，並重命名爲v0.1.1f.zip

修改DownloadADE.cmake文件

- "https://github.com/opencv/ade/archive/"
+ "file::///yourpath"

重新執行cmake

其他download失敗都可以用類似方法解決

執行

make 
make install

出現其他依賴庫錯誤參考前文

如此編譯出來的so和a文件都在output目錄下，其中dnn是經過Tengine加速過的版本。

2、face_detector代碼測試

人臉測試集是來自，圖片均採集自網絡

Multi-Task Facial Landmark (MTFL) dataset

This dataset contains 12,995 face images collected from the Internet. The images are annotated with (1) five facial landmarks, (2) attributes of gender, smiling, wearing glasses, and head pose.

face_detector用到的模型文件在opencv-4.3.0/samples/dnn/face_detector文件夾下，其中

caffer模型文件爲

res10_300x300_ssd_iter_140000_fp16.caffemodel

deploy.prototxt

tensorflow模型文件爲

opencv_face_detector_uint8.pb

opencv_face_detector.pbtxt

使用download_weights.py腳本在網絡正常的情況下下載

face_detector 測試代碼

#include <opencv2/dnn.hpp>
#include <opencv2/opencv.hpp>
#include <string.h>
#include <dirent.h>
#include <unistd.h>
using namespace cv;
using namespace cv::dnn;

#include <iostream>
#include <string>
#include <vector>
//#include <cstdlib>
using namespace std;
const size_t inWidth = 300;
const size_t inHeight = 300;
const double inScaleFactor = 1.0;
const Scalar meanVal(104.0, 177.0, 123.0);
const float confidenceThreshold = 0.6;
int readFileList(const char* basePath, vector<string> &fileList);
void face_detect_dnn();
void mtcnn_demo();
int main(int argc, char** argv)
{
    face_detect_dnn();
    //waitKey(0);
    return 0;
}

int readFileList(const char* basePath, vector<string> &fileList)
{
    DIR *dir;
    struct dirent *ptr;
    char base[1000];

    if ((dir=opendir(basePath)) == NULL)
    {
        perror("Open dir error...");
        exit(1);
    }

    while ((ptr=readdir(dir)) != NULL)
    {
        if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0)    ///current dir OR parrent dir
            continue;
        else if(ptr->d_type == 8)    ///file
            //printf("d_name:%s/%s\n",basePath,ptr->d_name);
            fileList.push_back(ptr->d_name);
        else if(ptr->d_type == 10)    ///link file
            printf("d_name:%s/%s\n",basePath,ptr->d_name);
        else if(ptr->d_type == 4)    ///dir
        {
            memset(base,'\0',sizeof(base));
            strcpy(base,basePath);
            strcat(base,"/");
            strcat(base,ptr->d_name);
            readFileList(base, fileList);
        }
    }
    closedir(dir);
    return 1;
}
void face_detect_dnn() {
    //String modelDesc = "D:/projects/opencv_tutorial/data/models/resnet/deploy.prototxt";
    // String modelBinary = "D:/projects/opencv_tutorial/data/models/resnet/res10_300x300_ssd_iter_140000.caffemodel";
    //String modelBinary = "D:/opencv-4.2.0/opencv/sources/samples/dnn/face_detector/opencv_face_detector_uint8.pb";
    //String modelDesc = "D:/opencv-4.2.0/opencv/sources/samples/dnn/face_detector/opencv_face_detector.pbtxt";
    String modelBinary = "opencv_face_detector_uint8.pb";
    String modelDesc = "opencv_face_detector.pbtxt";
    // 初始化網絡
    // dnn::Net net = readNetFromCaffe(modelDesc, modelBinary);
    dnn::Net net = readNetFromTensorflow(modelBinary, modelDesc);

    net.setPreferableBackend(DNN_BACKEND_OPENCV);
    net.setPreferableTarget(DNN_TARGET_CPU);
    if (net.empty())
    {
        printf("could not load net...\n");
        return;
    }

#if 0
    // 打開攝像頭
    // VideoCapture capture(0);
    VideoCapture capture("D:/images/video/Boogie_Up.mp4");
    if (!capture.isOpened()) {
        printf("could not load camera...\n");
        return;
    }
#endif

    Mat frame;
    int count = 0;
    //char imagePath[100] = {};
    string inputPath = "/mnt/yuwy/opencv/lfw_5590/"; 
    //char outPath[100] = {};
	string outPath = "/mnt/yuwy/opencv/out_lfw_5590/";
    vector<string> imageList; 
    readFileList(inputPath.c_str(), imageList);
    //while (capture.read(frame)) 
    for (int i = 0; i < imageList.size(); i++)
    {
        //getchar();
        //imagePath = to_string(i) + ".jpg";
        //sprintf_s(imagePath, "%d.jpg", i);
        //printf("imagePath:%s\n", imagePath);
        //frame = cv::imread("E:/opencv_4_2_0_is_installed_here/opencv/sources/samples/dnn/face_detector/0.jpg");

        string imagePath = inputPath + imageList[i];
        frame = cv::imread(imagePath);
        if (frame.empty())
        {
            printf("read test jpg error\n");
        }
        else
        {
            //cout << "get image: " << imagePath << endl;
            int64 start = getTickCount();

#if 0
            if (frame.empty())
            {
                break;
        }
#endif

            // 水平鏡像調整
            // flip(frame, frame, 1);
            //imshow("input", frame);
            if (frame.channels() == 4)
                cvtColor(frame, frame, COLOR_BGRA2BGR);

            // 輸入數據調整
            Mat inputBlob = blobFromImage(frame, inScaleFactor,
                Size(inWidth, inHeight), meanVal, false, false);
            net.setInput(inputBlob, "data");

            // 人臉檢測
            Mat detection = net.forward("detection_out");
            vector<double> layersTimings;
            double freq = getTickFrequency() / 1000;
            double time = net.getPerfProfile(layersTimings) / freq;
            Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

            ostringstream ss;
            for (int i = 0; i < detectionMat.rows; i++)
            {
                // 置信度 0～1之間
                float confidence = detectionMat.at<float>(i, 2);
                if (confidence > confidenceThreshold)
                {
                    count++;
                    int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
                    int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
                    int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
                    int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);

                    Rect object((int)xLeftBottom, (int)yLeftBottom,
                        (int)(xRightTop - xLeftBottom),
                        (int)(yRightTop - yLeftBottom));

                    rectangle(frame, object, Scalar(0, 255, 0));

                    ss << confidence;
                    String conf(ss.str());
                    String label = "Face: " + conf;
                    int baseLine = 0;
                    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
                    rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
                        Size(labelSize.width, labelSize.height + baseLine)),
                        Scalar(255, 255, 255), FILLED);
                    putText(frame, label, Point(xLeftBottom, yLeftBottom),
                        FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
                }
            }
            float fps = getTickFrequency() / (getTickCount() - start);
            ss.str("");
            ss << "FPS: " << fps << " ; inference time: " << time << " ms";
            cout << i << " ; inference time: " << time << " ms";
            putText(frame, ss.str(), Point(20, 20), 0, 0.75, Scalar(0, 0, 255), 2, 8);
            //imshow("dnn_face_detection", frame);
            //sprintf_s(outPath, "out%d.jpg", i);
            string outimagePath =  outPath + imageList[i];
            imwrite(outimagePath, frame);
            //if (waitKey(1) >= 0) break;
            //if (waitKey(1) >= 0) return;
  
    }
        //printf("total face: %d\n", count);
        cout << "total face:" << count << endl;
    }
    
}

Makefile

CC = aarch64-himix100-linux-g++

#INCLUDE_PATH = -I/home/sdb/yuwy/nfs/opencv/opencv-4.3.0-original/include/opencv4 \
				-I/home/sdb/yuwy/nfs/opencv/opencv-4.3.0-original/output/include
INCLUDE_PATH = -I/home/sdb/yuwy/nfs/opencv/opencv-4.3.0-tengine/include/opencv4 \
				-I/home/sdb/yuwy/nfs/opencv/opencv-4.3.0-tengine/output/include
LIB_PATH = -Wl,-rpath-link -Wl,/home/sdc/yuwy/opencv/glib-2.47.3/output/lib \
	-L/home/sdc/yuwy/opencv/opencv-4.3.0/output/lib \
	-L/home/sdc/yuwy/opencv/opencv-4.3.0/3rdparty/lib \

LD = -lopencv_highgui -lopencv_dnn -lopencv_features2d -lopencv_flann -lopencv_calib3d -lopencv_objdetect -lopencv_imgcodecs -lopencv_imgproc -lopencv_videoio -lopencv_core -lpthread -lrt -ldl -lzlib
 
all : face_detector.cpp
	$(CC) $(INCLUDE_PATH) $(LIB_PATH) $(LD) face_detector.cpp -o face_detector

clean: 
	rm face_detector

注意代碼中的模型位置和測試圖片位置，注意Makefile中的lib和include位置，編譯成功後就可以在Hi3559板子上運行了，記得在板子上添加動態庫搜索路徑

export LD_LIBRARY_PATH=/mnt/yuwy/opencv/opencv-4.3.0/lib:$LD_LIBRARY_PATH
echo $LD_LIBRARY_PATH

3、測試結果

face_detector在模型輸入爲300x300時，未經過Tengine加速的單張圖像處理時間約爲330ms，而經過Tengine加速後單張圖像的處理時間爲170ms，有較大提升。

加速後的測試結果

加速前的測試結果

Hi3559 + opencv4.3.0交叉編譯 + tengine 優化+ face_detector sample運行測試

Hi3559 + opencv4.3.0交叉編譯 + tengine 優化+ face_detector sample運行測試

1、交叉編譯opencv4.3.0到Hi3559

1.1、編譯Tengine

1.2編譯opencv4.3.0

Multi-Task Facial Landmark (MTFL) dataset

3、測試結果

海思NNIE Hi3559量化部署Mobilefacenet與RetinaFace

Hi3559 + opencv4.3.0交叉編譯 + tengine 優化+ face_detector sample運行測試

opencv3.2.0移植Hi3559完整總結版

GMM算法基本原理(OPENCV3展示實例)

ubuntu下簡單腳本的編寫和執行

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結