文檔圖像傾斜校正算法(3)——二位傅里葉變化法傾斜校正

文檔圖像傾斜校正算法(3)——二位傅里葉變化法傾斜校正

原理:文本行具有一定的紋理信息,通過傅里葉變換後在頻域中會凸顯出來。

適用範圍:二位傅里葉變化法傾斜校正需要有明顯的頻域信息時才能生效。爲得到更顯著的頻域特徵,在使用時一般會先對文檔圖像的的二值化圖像進行文本區域融合。但是對於文檔中文字稀疏,頻域特徵不明顯的文檔該方法會失效。該方法能彌補文本行投影法在短文本傾斜矯正時的不足:短文本,尤其是文本行存在錯行時,使用投影法難以對傾斜文檔圖像進行校正,二維傅里葉變換法採用全局信息進行傾斜矯正,能得到較爲理想的結果,但是當背景紋理過於突出時也會失效,所以建議先截取文檔圖像中的關鍵區域再進行傾斜校正。
在這裏插入圖片描述

#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <vector>
#include <numeric>
#define MY_SKEW 14

//圖像旋轉1:旋轉(截取圖像)Crop
//         Mat img :圖像輸入,單通道或者三通道
//         Mat & imgout :圖像輸出
//         int degree :圖像要旋轉的角度
//         int border_value:圖像旋轉填充值
int rotateImage1(Mat img,Mat & imgout, int degree,int border_value)
{
    if( img.empty())
        return 1;
    degree = -degree;//warpAffine默認的旋轉方向是逆時針,所以加負號表示轉化爲順時針
    double angle = degree  * CV_PI / 180.; // 弧度  
    double a = sin(angle), b = cos(angle);
    int width = img.cols;
    int height = img.rows;
    int width_rotate = int(width * fabs(b)-height * fabs(a));//height * fabs(a) +
    int height_rotate = int(height * fabs(b)-width * fabs(a));//width * fabs(a) +
    if(width_rotate<=20||height_rotate<=20)
    {
        width_rotate = 20;
        height_rotate = 20;
    }
    //旋轉數組map
    // [ m0  m1  m2 ] ===>  [ A11  A12   b1 ]
    // [ m3  m4  m5 ] ===>  [ A21  A22   b2 ]
    float map[6];
    Mat map_matrix = Mat(2, 3, CV_32F, map);
    // 旋轉中心
    CvPoint2D32f center = cvPoint2D32f(width / 2, height / 2);
    CvMat map_matrix2 = map_matrix;
    cv2DRotationMatrix(center, degree, 1.0, &map_matrix2);//計算二維旋轉的仿射變換矩陣
    map[2] += (width_rotate - width) / 2;
    map[5] += (height_rotate - height) / 2;
    //Mat img_rotate;
    //對圖像做仿射變換
    //CV_WARP_FILL_OUTLIERS - 填充所有輸出圖像的象素。
    //如果部分象素落在輸入圖像的邊界外,那麼它們的值設定爲 fillval.
    //CV_WARP_INVERSE_MAP - 指定 map_matrix 是輸出圖像到輸入圖像的反變換,
    int chnnel =img.channels();
    if(chnnel == 3)
        warpAffine(img, imgout, map_matrix, Size(width_rotate, height_rotate), 1, 0, Scalar(border_value,border_value,border_value));
    else
        warpAffine(img, imgout, map_matrix, Size(width_rotate, height_rotate), 1, 0, border_value);
    return 0;
}

//圖像旋轉2:擴充圖像邊緣full
//         Mat img :圖像輸入,單通道或者三通道
//         Mat & imgout :圖像輸出
//         int degree :圖像要旋轉的角度
//         int border_value:圖像旋轉填充值
int rotateImage2(Mat img,Mat & imgout, int degree,int border_value)
{
    if(img.empty())
        return 1;
    degree = -degree;//warpAffine默認的旋轉方向是逆時針,所以加負號表示轉化爲順時針
    double angle = degree  * CV_PI / 180.; // 弧度  
    double a = sin(angle), b = cos(angle);
    int width = img.cols;
    int height = img.rows;
    int width_rotate = int(width * fabs(b)+height * fabs(a));//height * fabs(a) +
    int height_rotate = int(height * fabs(b)+width * fabs(a));//width * fabs(a) +
    if(width_rotate<=20||height_rotate<=20)
    {
        width_rotate = 20;
        height_rotate = 20;
    }
    //旋轉數組map
    // [ m0  m1  m2 ] ===>  [ A11  A12   b1 ]
    // [ m3  m4  m5 ] ===>  [ A21  A22   b2 ]
    float map[6];
    Mat map_matrix = Mat(2, 3, CV_32F, map);
    // 旋轉中心
    CvPoint2D32f center = cvPoint2D32f(width / 2, height / 2);
    CvMat map_matrix2 = map_matrix;
    cv2DRotationMatrix(center, degree, 1.0, &map_matrix2);//計算二維旋轉的仿射變換矩陣
    map[2] += (width_rotate - width) / 2;
    map[5] += (height_rotate - height) / 2;
    //Mat img_rotate;
    //對圖像做仿射變換
    //CV_WARP_FILL_OUTLIERS - 填充所有輸出圖像的象素。
    //如果部分象素落在輸入圖像的邊界外,那麼它們的值設定爲 fillval.
    //CV_WARP_INVERSE_MAP - 指定 map_matrix 是輸出圖像到輸入圖像的反變換,
    Mat imgout_pro;
    int chnnel =img.channels();
    if(chnnel == 3)
        warpAffine(img, imgout_pro, map_matrix, Size(width_rotate, height_rotate), 1, 0, Scalar(border_value, border_value, border_value));
    else
        warpAffine(img, imgout_pro, map_matrix, Size(width_rotate, height_rotate), 1, 0, border_value);

    resize(imgout_pro, imgout, Size(imgout_pro.rows, imgout_pro.cols), 0, 0, 1);
    return 0;
}

//圖像2維傅里葉變換
int my_fft(Mat & image,Mat & imgout)
{
    Mat I = image;
    if( I.empty())
        return 1;
    
    Mat padded;                            //expand input image to optimal size
    int m = getOptimalDFTSize( I.rows );
    int n = getOptimalDFTSize( I.cols ); // on the border add zero values
    copyMakeBorder(I, padded, 0, m - I.rows, 0, n - I.cols, BORDER_CONSTANT, Scalar::all(0));

    Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
    Mat complexI;
    merge(planes, 2, complexI);         // Add to the expanded another plane with zeros

    dft(complexI, complexI);            // this way the result may fit in the source matrix

    // compute the magnitude and switch to logarithmic scale
    // => log(1 + sqrt(Re(DFT(I))^2 + Im(DFT(I))^2))
    split(complexI, planes);                   // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
    magnitude(planes[0], planes[1], planes[0]);// planes[0] = magnitude  
    Mat magI = planes[0];
    
    magI += Scalar::all(1);                    // switch to logarithmic scale
    log(magI, magI);

    // crop the spectrum, if it has an odd number of rows or columns
    magI = magI(Rect(0, 0, magI.cols & -2, magI.rows & -2));

    // rearrange the quadrants of Fourier image  so that the origin is at the image center        
    int cx = magI.cols/2;
    int cy = magI.rows/2;

    Mat q0(magI, Rect(0, 0, cx, cy));   // Top-Left - Create a ROI per quadrant
    Mat q1(magI, Rect(cx, 0, cx, cy));  // Top-Right
    Mat q2(magI, Rect(0, cy, cx, cy));  // Bottom-Left
    Mat q3(magI, Rect(cx, cy, cx, cy)); // Bottom-Right

    Mat tmp;                           // swap quadrants (Top-Left with Bottom-Right)
    q0.copyTo(tmp);
    q3.copyTo(q0);
    tmp.copyTo(q3);

    q1.copyTo(tmp);                    // swap quadrant (Top-Right with Bottom-Left)
    q2.copyTo(q1);
    tmp.copyTo(q2);

    normalize(magI, magI, 0, 1, CV_MINMAX); // Transform the matrix with float values into a
                                            // viewable image form (float between values 0 and 1).
    imgout = magI;
    return 0;
}

int skew_correction_shallow(const Mat rgbimgin,Mat & rgbimgout,int &theta)
{
    if(rgbimgin.empty())
        {
            return 1;
        }
    Mat imgout_crop = rgbimgin.clone();

        Mat imgout;
        float zoom_ratio = 800.0/imgout_crop.rows;
        resize(imgout_crop,imgout,Size(0,0),zoom_ratio,zoom_ratio,1);
        //imshow("1212121",imgout);

        Mat Gray;
        if (imgout.channels() == 3)
            {
                cvtColor(imgout,Gray,COLOR_RGB2GRAY);
            }
        else if (imgout.channels() == 1)
            {
                Gray = imgout.clone();
            }
        else
            return 1;
        
        Mat BW;
        adaptiveThreshold(Gray,BW,255,CV_ADAPTIVE_THRESH_MEAN_C,CV_THRESH_BINARY,111,11);
        Mat element =getStructuringElement(MORPH_RECT,Size(9,9));
        erode(BW,BW,element);
        Mat canny_img;
        Canny(BW,canny_img,50,150,3);
        
        Mat FFT_binary;
        Mat outt;
        my_fft(canny_img,FFT_binary);
        FFT_binary = FFT_binary*255;

        Scalar ss = sum(FFT_binary);
        int dot_sum = ss(0);
        int thresh = dot_sum/(FFT_binary.cols*FFT_binary.rows);
        threshold(FFT_binary,FFT_binary,1.3*thresh,255,THRESH_BINARY);//180

        outt=FFT_binary(Range(FFT_binary.rows/4,FFT_binary.rows*3/4),
            Range(FFT_binary.cols/4,FFT_binary.cols*3/4));
        Mat img_skew;
        unsigned int matrik1_1[MY_SKEW+1];
        unsigned int matrik1_2[MY_SKEW+1];
        unsigned int matrik2[2*MY_SKEW+1];
        for(int skew=-MY_SKEW;skew<=MY_SKEW;skew++)
        {
            rotateImage2(outt,img_skew,skew,0);
            //cout<<skew<<endl;
            threshold(img_skew,img_skew,180,255,THRESH_BINARY);
            vector<unsigned int> cols_pro;
            unsigned int cols_project;
            vector<unsigned int> rows_pro;
            unsigned int rows_project;

            for(int i=0;i<img_skew.rows;i++)
            {
                rows_project=0;
                for(int j=0;j<img_skew.cols;j++)
                {
                    if(img_skew.at<int>(i,j)>0)
                        rows_project++;
                }
                rows_pro.push_back(rows_project);
            }

            for(int i=0;i<img_skew.cols;i++)
            {
                cols_project=0;
                for(int j=0;j<img_skew.rows;j++)
                {
                    if(img_skew.at<int>(j,i)>0)
                        cols_project++;
                }
                cols_pro.push_back(cols_project);
            }
            if(skew<0)
            {
                unsigned int biggest1 = *max_element(begin(rows_pro),end(rows_pro));
                matrik1_1[skew+MY_SKEW] = biggest1;
                unsigned int biggest2 = *max_element(begin(cols_pro),end(cols_pro));
                matrik2[skew+MY_SKEW] = biggest2;
            }
            else if(skew == 0)
            {
                unsigned int biggest1 = *max_element(begin(rows_pro),end(rows_pro));
                matrik1_1[skew+MY_SKEW] = biggest1;
                matrik1_2[skew] = biggest1;
                unsigned int biggest2 = *max_element(begin(cols_pro),end(cols_pro));
                matrik2[skew+MY_SKEW] = biggest2;
            }
            else
            {
                unsigned int biggest1 = *max_element(begin(rows_pro),end(rows_pro));
                matrik1_2[skew] = biggest1;
                unsigned int biggest2 = *max_element(begin(cols_pro),end(cols_pro));
                matrik2[skew+MY_SKEW] = biggest2;
            }
            
        }
        int theta1_1 = (int)distance(matrik1_1,max_element(matrik1_1,matrik1_1 + sizeof(matrik1_1)/sizeof(matrik1_1[0])))-MY_SKEW;
        
        int theta1_2 = (int)distance(matrik1_2,max_element(matrik1_2,matrik1_2 + sizeof(matrik1_2)/sizeof(matrik1_2[0])));
        int theta2 = (int)distance(matrik2,max_element(matrik2,matrik2 + sizeof(matrik2)/sizeof(matrik2[0])))-MY_SKEW;
        theta = ((theta1_1+theta1_2)/2+theta2)/2;

        rotateImage1(rgbimgin,rgbimgout,theta,0);
        return 0;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章