Tesseract5+OpenCV4（VS2017+win10）實現OCR識別

一、環境配置

較之前採用cppan進行編譯的方式，vcpkg的方式已經發生了許多變化，帶來的最大不同就是便捷。

對於在NuGet中能夠找到的Vcpkg的export，真的實現了開箱即用

這樣的話對於普通用戶來說就購了；而複雜的問題就交給專家來解決。

二、代碼編寫和結果展示

// ConsoleOCRDEMO.cpp : 基於vcpkg+nuget快速解決環境配置問題
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/publictypes.h>
using namespace cv;
using namespace cv::dnn;
using namespace std;
//對east的結果進行解碼
void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
    std::vector<RotatedRect>& detections, std::vector<float>& confidences)
{
    detections.clear();
    CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4); CV_Assert(scores.size[0] == 1);
    CV_Assert(geometry.size[0] == 1); CV_Assert(scores.size[1] == 1); CV_Assert(geometry.size[1] == 5);
    CV_Assert(scores.size[2] == geometry.size[2]); CV_Assert(scores.size[3] == geometry.size[3]);
    const int height = scores.size[2];
    const int width = scores.size[3];
    for (int y = 0; y < height; ++y)
    {
        const float* scoresData = scores.ptr<float>(0, 0, y);
        const float* x0_data = geometry.ptr<float>(0, 0, y);
        const float* x1_data = geometry.ptr<float>(0, 1, y);
        const float* x2_data = geometry.ptr<float>(0, 2, y);
        const float* x3_data = geometry.ptr<float>(0, 3, y);
        const float* anglesData = geometry.ptr<float>(0, 4, y);
        for (int x = 0; x < width; ++x)
        {
            float score = scoresData[x];
            if (score < scoreThresh)
                continue;
            // Decode a prediction.
            // Multiple by 4 because feature maps are 4 time less than input image.
            float offsetX = x * 4.0f, offsetY = y * 4.0f;
            float angle = anglesData[x];
            float cosA = std::cos(angle);
            float sinA = std::sin(angle);
            float h = x0_data[x] + x2_data[x];
            float w = x1_data[x] + x3_data[x];
            Point2f offset(offsetX + cosA * x1_data[x] + sinA * x2_data[x],
                offsetY - sinA * x1_data[x] + cosA * x2_data[x]);
            Point2f p1 = Point2f(-sinA * h, -cosA * h) + offset;
            Point2f p3 = Point2f(-cosA * w, sinA * w) + offset;
            RotatedRect r(0.5f * (p1 + p3), Size2f(w, h), -angle * 180.0f / (float)CV_PI);
            detections.push_back(r);
            confidences.push_back(score);
        }
    }
}
int main()
{
    //參數和常量準備
    String model = "F:/未來項目/GOCVHelper455/GOCVHelper455/GOCVHelper455/model/frozen_east_text_detection.pb";
    std::vector<Mat> outs;
    std::vector<String> outNames(2);
    outNames[0] = "feature_fusion/Conv_7/Sigmoid";
    outNames[1] = "feature_fusion/concat_3";
    Mat  blob;
    std::vector<RotatedRect> boxes;
    std::vector<float> confidences;
    std::vector<int> indices;
    char cbuf[255];
    // 引入EAST model
    Net net = readNet(model);
    //對tesseract進行初始化操作
    tesseract::TessBaseAPI tess;
    if (tess.Init("C:/Program Files/Tesseract-OCR/tessdata", "eng"))
    {
        std::cout << "OCRTesseract: Could not initialize tesseract." << std::endl;
        return 1;
    }
    tess.SetVariable("tessedit_char_whitelist", "0123456789");
    Mat src = imread("F:/未來項目/OCR2023/2.jpg");
    Mat board = src.clone();//用於顯示圖片
    blobFromImage(src, blob, 1.0, Size(1280, 1280), Scalar(), true, false);//Scalar採用默認是設置
    net.setInput(blob);
    net.forward(outs, outNames);
    Mat scores = outs[0];
    Mat geometry = outs[1];
    decode(scores, geometry, 0.5, boxes, confidences);//注意0.5是超參數
    NMSBoxes(boxes, confidences, 0.5, 0.4, indices);
    Point2f ratio((float)src.cols / 1280, (float)src.rows / 1280);//縮放比例
    //獲得最終框選結果
    for (size_t i = 0; i < indices.size(); ++i)
    {
        RotatedRect& box = boxes[indices[i]];
        Point2f vertices[4];
        box.points(vertices);
        for (int j = 0; j < 4; ++j)
        {
            vertices[j].x *= ratio.x;
            vertices[j].y *= ratio.y;
        }
        Point2f* lastItemPointer = (vertices + sizeof vertices / sizeof vertices[0]);
        vector<Point2f> contour(vertices, lastItemPointer);
        Rect boundRect = boundingRect(Mat(contour));
        //對rect適當進行擴充
        boundRect = cv::Rect(boundRect.tl().x - 5, boundRect.tl().y, boundRect.width + 10, boundRect.height);
        if (boundRect.y < src.rows)
        {
            Mat roi = src(boundRect);
            //繪製外接邊線
            for (int j = 0; j < 4; ++j)
                line(board, vertices[j], vertices[(j + 1) % 4], Scalar(0, 255, 0), 1);
            rectangle(board, boundRect, Scalar(0, 0, 255));//繪製外接最小矩形
            //打印數據
            sprintf_s(cbuf, "F:/未來項目/OCR2023//roi/%d.jpg", i);//打印出來
            imwrite(cbuf, roi);
            ////將切割出來的圖片輸入tesseract中
            auto pixs = pixRead(cbuf);
            if (!pixs)
            {
                std::cout << "Cannot open input file: " << std::endl;
                return 1;
            }
            // recognize
            tess.SetImage(pixs);
            tess.Recognize(0);
            // get result and delete[] returned char* string
            std::cout << std::unique_ptr<char[]>(tess.GetUTF8Text()).get() << std::endl;
            string strOut = tess.GetUTF8Text();
            putText(board, std::unique_ptr<char[]>(tess.GetUTF8Text()).get(), boundRect.tl(), 1, 1.0f, Scalar(0, 255, 0));
            // cleanup
            tess.Clear();
            pixDestroy(&pixs);
        }
    }
    imshow("board", board);
    imwrite("F:/未來項目/OCR2023/roi/result.jpg", board);
    cv::waitKey();
    getchar();
    return 0;
}

三、代碼編寫和結果展示

這種環境配置方法以數量級的方式降低了環境配置的難度，而且“正好”有這樣的一個可以使用的環境。其中：。

1、VCPKG是什麼

VCPKG是微軟 C++ 團隊開發的適用於 C 和 C++ 庫的跨平臺開源軟件包管理器，它大大簡化了 Windows、Linux 和 macOS 上第三方庫相關的下載和配置操作，目前已有超過1600個第三方庫可以通過vcpkg來安裝。

自動下載開源庫源代碼。一鍵安裝第三方庫。源碼包的緩存管理和版本管理，可以依需求安裝指定的版本。

自動檢查庫的依賴關係並安裝其依賴項。

無縫集成Visual Studio，不用手動設置任何的庫相關的路徑。

Visual Studio全平臺支持，不僅支持Debug/Release、x86/x64編譯，還支持UWP、ARM平臺的編譯。

2、nuget

Nuget是一個.NET平臺下的開源的項目，它是Visual Studio的擴展。在使用Visual Studio 或.NET CLI開發基於.NET 或.NET Framework的應用時，Nuget能把在項目中添加、移除和更新引用的工作變得更加快捷方便。

3、vcpkg導出NuGet

vcpkg export --nuget [options] <package>...

在 <output-dir>/<nuget-id>.<nuget-version>.nupkg創建 NuGet 包。

包含標準集成文件以及其他 MSBuild 集成，以支持通過 NuGet 包管理器 (.vcxproj) 包含在 MSBuild C++ 項目中。請注意，不能將生成的 export 多個 NuGet 包混合在一起 -- 僅使用其中一個包。若要添加其他庫，必須創建包含完整依賴項集的新導出。

Tesseract5+OpenCV4（VS2017+win10）實現OCR識別

認知提升的方法

C#開源的兩款功能強大的錄屏神器

螞蟻面試：Springcloud核心組件的底層原理，你知道多少？

前端 Vue yarn.lock文件：詳解和使用指南

嘗試使用kimi解析體能表格

Hessian矩陣以及在血管增強中的應用——OpenCV實現【2024年更新】

基於vllm，探索產業級llm的部署

【內部項目預研】對信息分類進行探索

LCEL的具體實驗

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結