文檔圖像傾斜校正算法(3)——二位傅里葉變化法傾斜校正
原理:文本行具有一定的紋理信息,通過傅里葉變換後在頻域中會凸顯出來。
適用範圍:二位傅里葉變化法傾斜校正需要有明顯的頻域信息時才能生效。爲得到更顯著的頻域特徵,在使用時一般會先對文檔圖像的的二值化圖像進行文本區域融合。但是對於文檔中文字稀疏,頻域特徵不明顯的文檔該方法會失效。該方法能彌補文本行投影法在短文本傾斜矯正時的不足:短文本,尤其是文本行存在錯行時,使用投影法難以對傾斜文檔圖像進行校正,二維傅里葉變換法採用全局信息進行傾斜矯正,能得到較爲理想的結果,但是當背景紋理過於突出時也會失效,所以建議先截取文檔圖像中的關鍵區域再進行傾斜校正。
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <vector>
#include <numeric>
#define MY_SKEW 14
//圖像旋轉1:旋轉(截取圖像)Crop
// Mat img :圖像輸入,單通道或者三通道
// Mat & imgout :圖像輸出
// int degree :圖像要旋轉的角度
// int border_value:圖像旋轉填充值
int rotateImage1(Mat img,Mat & imgout, int degree,int border_value)
{
if( img.empty())
return 1;
degree = -degree;//warpAffine默認的旋轉方向是逆時針,所以加負號表示轉化爲順時針
double angle = degree * CV_PI / 180.; // 弧度
double a = sin(angle), b = cos(angle);
int width = img.cols;
int height = img.rows;
int width_rotate = int(width * fabs(b)-height * fabs(a));//height * fabs(a) +
int height_rotate = int(height * fabs(b)-width * fabs(a));//width * fabs(a) +
if(width_rotate<=20||height_rotate<=20)
{
width_rotate = 20;
height_rotate = 20;
}
//旋轉數組map
// [ m0 m1 m2 ] ===> [ A11 A12 b1 ]
// [ m3 m4 m5 ] ===> [ A21 A22 b2 ]
float map[6];
Mat map_matrix = Mat(2, 3, CV_32F, map);
// 旋轉中心
CvPoint2D32f center = cvPoint2D32f(width / 2, height / 2);
CvMat map_matrix2 = map_matrix;
cv2DRotationMatrix(center, degree, 1.0, &map_matrix2);//計算二維旋轉的仿射變換矩陣
map[2] += (width_rotate - width) / 2;
map[5] += (height_rotate - height) / 2;
//Mat img_rotate;
//對圖像做仿射變換
//CV_WARP_FILL_OUTLIERS - 填充所有輸出圖像的象素。
//如果部分象素落在輸入圖像的邊界外,那麼它們的值設定爲 fillval.
//CV_WARP_INVERSE_MAP - 指定 map_matrix 是輸出圖像到輸入圖像的反變換,
int chnnel =img.channels();
if(chnnel == 3)
warpAffine(img, imgout, map_matrix, Size(width_rotate, height_rotate), 1, 0, Scalar(border_value,border_value,border_value));
else
warpAffine(img, imgout, map_matrix, Size(width_rotate, height_rotate), 1, 0, border_value);
return 0;
}
//圖像旋轉2:擴充圖像邊緣full
// Mat img :圖像輸入,單通道或者三通道
// Mat & imgout :圖像輸出
// int degree :圖像要旋轉的角度
// int border_value:圖像旋轉填充值
int rotateImage2(Mat img,Mat & imgout, int degree,int border_value)
{
if(img.empty())
return 1;
degree = -degree;//warpAffine默認的旋轉方向是逆時針,所以加負號表示轉化爲順時針
double angle = degree * CV_PI / 180.; // 弧度
double a = sin(angle), b = cos(angle);
int width = img.cols;
int height = img.rows;
int width_rotate = int(width * fabs(b)+height * fabs(a));//height * fabs(a) +
int height_rotate = int(height * fabs(b)+width * fabs(a));//width * fabs(a) +
if(width_rotate<=20||height_rotate<=20)
{
width_rotate = 20;
height_rotate = 20;
}
//旋轉數組map
// [ m0 m1 m2 ] ===> [ A11 A12 b1 ]
// [ m3 m4 m5 ] ===> [ A21 A22 b2 ]
float map[6];
Mat map_matrix = Mat(2, 3, CV_32F, map);
// 旋轉中心
CvPoint2D32f center = cvPoint2D32f(width / 2, height / 2);
CvMat map_matrix2 = map_matrix;
cv2DRotationMatrix(center, degree, 1.0, &map_matrix2);//計算二維旋轉的仿射變換矩陣
map[2] += (width_rotate - width) / 2;
map[5] += (height_rotate - height) / 2;
//Mat img_rotate;
//對圖像做仿射變換
//CV_WARP_FILL_OUTLIERS - 填充所有輸出圖像的象素。
//如果部分象素落在輸入圖像的邊界外,那麼它們的值設定爲 fillval.
//CV_WARP_INVERSE_MAP - 指定 map_matrix 是輸出圖像到輸入圖像的反變換,
Mat imgout_pro;
int chnnel =img.channels();
if(chnnel == 3)
warpAffine(img, imgout_pro, map_matrix, Size(width_rotate, height_rotate), 1, 0, Scalar(border_value, border_value, border_value));
else
warpAffine(img, imgout_pro, map_matrix, Size(width_rotate, height_rotate), 1, 0, border_value);
resize(imgout_pro, imgout, Size(imgout_pro.rows, imgout_pro.cols), 0, 0, 1);
return 0;
}
//圖像2維傅里葉變換
int my_fft(Mat & image,Mat & imgout)
{
Mat I = image;
if( I.empty())
return 1;
Mat padded; //expand input image to optimal size
int m = getOptimalDFTSize( I.rows );
int n = getOptimalDFTSize( I.cols ); // on the border add zero values
copyMakeBorder(I, padded, 0, m - I.rows, 0, n - I.cols, BORDER_CONSTANT, Scalar::all(0));
Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
Mat complexI;
merge(planes, 2, complexI); // Add to the expanded another plane with zeros
dft(complexI, complexI); // this way the result may fit in the source matrix
// compute the magnitude and switch to logarithmic scale
// => log(1 + sqrt(Re(DFT(I))^2 + Im(DFT(I))^2))
split(complexI, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
magnitude(planes[0], planes[1], planes[0]);// planes[0] = magnitude
Mat magI = planes[0];
magI += Scalar::all(1); // switch to logarithmic scale
log(magI, magI);
// crop the spectrum, if it has an odd number of rows or columns
magI = magI(Rect(0, 0, magI.cols & -2, magI.rows & -2));
// rearrange the quadrants of Fourier image so that the origin is at the image center
int cx = magI.cols/2;
int cy = magI.rows/2;
Mat q0(magI, Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
Mat q1(magI, Rect(cx, 0, cx, cy)); // Top-Right
Mat q2(magI, Rect(0, cy, cx, cy)); // Bottom-Left
Mat q3(magI, Rect(cx, cy, cx, cy)); // Bottom-Right
Mat tmp; // swap quadrants (Top-Left with Bottom-Right)
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
q2.copyTo(q1);
tmp.copyTo(q2);
normalize(magI, magI, 0, 1, CV_MINMAX); // Transform the matrix with float values into a
// viewable image form (float between values 0 and 1).
imgout = magI;
return 0;
}
int skew_correction_shallow(const Mat rgbimgin,Mat & rgbimgout,int &theta)
{
if(rgbimgin.empty())
{
return 1;
}
Mat imgout_crop = rgbimgin.clone();
Mat imgout;
float zoom_ratio = 800.0/imgout_crop.rows;
resize(imgout_crop,imgout,Size(0,0),zoom_ratio,zoom_ratio,1);
//imshow("1212121",imgout);
Mat Gray;
if (imgout.channels() == 3)
{
cvtColor(imgout,Gray,COLOR_RGB2GRAY);
}
else if (imgout.channels() == 1)
{
Gray = imgout.clone();
}
else
return 1;
Mat BW;
adaptiveThreshold(Gray,BW,255,CV_ADAPTIVE_THRESH_MEAN_C,CV_THRESH_BINARY,111,11);
Mat element =getStructuringElement(MORPH_RECT,Size(9,9));
erode(BW,BW,element);
Mat canny_img;
Canny(BW,canny_img,50,150,3);
Mat FFT_binary;
Mat outt;
my_fft(canny_img,FFT_binary);
FFT_binary = FFT_binary*255;
Scalar ss = sum(FFT_binary);
int dot_sum = ss(0);
int thresh = dot_sum/(FFT_binary.cols*FFT_binary.rows);
threshold(FFT_binary,FFT_binary,1.3*thresh,255,THRESH_BINARY);//180
outt=FFT_binary(Range(FFT_binary.rows/4,FFT_binary.rows*3/4),
Range(FFT_binary.cols/4,FFT_binary.cols*3/4));
Mat img_skew;
unsigned int matrik1_1[MY_SKEW+1];
unsigned int matrik1_2[MY_SKEW+1];
unsigned int matrik2[2*MY_SKEW+1];
for(int skew=-MY_SKEW;skew<=MY_SKEW;skew++)
{
rotateImage2(outt,img_skew,skew,0);
//cout<<skew<<endl;
threshold(img_skew,img_skew,180,255,THRESH_BINARY);
vector<unsigned int> cols_pro;
unsigned int cols_project;
vector<unsigned int> rows_pro;
unsigned int rows_project;
for(int i=0;i<img_skew.rows;i++)
{
rows_project=0;
for(int j=0;j<img_skew.cols;j++)
{
if(img_skew.at<int>(i,j)>0)
rows_project++;
}
rows_pro.push_back(rows_project);
}
for(int i=0;i<img_skew.cols;i++)
{
cols_project=0;
for(int j=0;j<img_skew.rows;j++)
{
if(img_skew.at<int>(j,i)>0)
cols_project++;
}
cols_pro.push_back(cols_project);
}
if(skew<0)
{
unsigned int biggest1 = *max_element(begin(rows_pro),end(rows_pro));
matrik1_1[skew+MY_SKEW] = biggest1;
unsigned int biggest2 = *max_element(begin(cols_pro),end(cols_pro));
matrik2[skew+MY_SKEW] = biggest2;
}
else if(skew == 0)
{
unsigned int biggest1 = *max_element(begin(rows_pro),end(rows_pro));
matrik1_1[skew+MY_SKEW] = biggest1;
matrik1_2[skew] = biggest1;
unsigned int biggest2 = *max_element(begin(cols_pro),end(cols_pro));
matrik2[skew+MY_SKEW] = biggest2;
}
else
{
unsigned int biggest1 = *max_element(begin(rows_pro),end(rows_pro));
matrik1_2[skew] = biggest1;
unsigned int biggest2 = *max_element(begin(cols_pro),end(cols_pro));
matrik2[skew+MY_SKEW] = biggest2;
}
}
int theta1_1 = (int)distance(matrik1_1,max_element(matrik1_1,matrik1_1 + sizeof(matrik1_1)/sizeof(matrik1_1[0])))-MY_SKEW;
int theta1_2 = (int)distance(matrik1_2,max_element(matrik1_2,matrik1_2 + sizeof(matrik1_2)/sizeof(matrik1_2[0])));
int theta2 = (int)distance(matrik2,max_element(matrik2,matrik2 + sizeof(matrik2)/sizeof(matrik2[0])))-MY_SKEW;
theta = ((theta1_1+theta1_2)/2+theta2)/2;
rotateImage1(rgbimgin,rgbimgout,theta,0);
return 0;
}