圖形圖像處理－之－一個複雜度爲常數的快速局部自適應算法上篇

圖形圖像處理－之－一個複雜度爲常數的快速局部自適應算法上篇
[email protected] 2008.04.12

(2009.03.10 可以到這裏下載完整的可以編譯的項目源代碼: http://cid-10fa89dec380323f.skydrive.live.com/browse.aspx/.Public?uc=4

tag: 圖像二值化,局部自適應,二維模板

摘要: 圖像處理中，某些算法在對一個像素的處理都需要根據周圍很多像素的綜合信息
來做處理，這類算法一般叫做局部自適應算法,用以得到更好的處理效果;但很多時候
這都可能成爲一個性能瓶頸，因爲對一個像素點都需要做大量的處理；本文將提供我使
用的一個複雜度爲常數的快速局部自適應算法。
(當然,某些二維模板不一定能夠拆解成常數算法，但很多還是可以拆解成線性算法的)

正文:
代碼使用C++,編譯器:VC2005
測試平臺:(CPU:AMD64x2 4200+(2.37G); 內存:DDR2 677(雙通道); 編譯器:VC2005)

A:像素使用ARGB32顏色類型，顏色和圖片的數據定義:

typedef unsigned char   TUInt8; // [0..255]
typedef unsigned long   TUInt32;

struct TARGB32       // 32 bit color
{
    TUInt8  b,g,r,a;           // a is alpha
};

struct TPicRegion   // 一塊顏色數據區的描述，便於參數傳遞
{
    TARGB32 *         pdata;         // 顏色數據首地址
     long             byte_width;    // 一行數據的物理寬度(字節寬度)；
        // 注意: abs(byte_width)有可能大於等於width*sizeof(TARGB32);
    unsigned long    width;         // 像素寬度
    unsigned long    height;        // 像素高度
};

// 那麼訪問一個點的函數可以寫爲：
inline TARGB32 & Pixels( const TPicRegion & pic, const long x, const long y)
{
     return ( (TARGB32 * )((TUInt8 * )pic.pdata + pic.byte_width * y) )[x];
}

B:圖像二值化的一個簡單實現
函數的作用是將一幅彩色圖像轉化成黑白兩色的圖像；
算法很簡單，像素的亮度值大於127(也可以預先統計出源圖片的平均亮度值作
爲閾值)的轉換爲白色，否則設置爲黑色，實現如下:

     const double cs_gray_red   = 0.299 ;
     const double cs_gray_green = 0.587 ;
     const double cs_gray_blue = 0.114 ;

    inline long getGray0( const TARGB32 & color) // 獲得顏色的亮度
    {
         return ( long )(color.r * cs_gray_red + color.g * cs_gray_green + color.b * cs_gray_blue);
    }

void threshold0( const TPicRegion & dst, const TPicRegion & src)
{
     long width = dst.width;
     if (src.width < width) width = src.width;
     long height = dst.height;
     if   (src.height < height) height = src.height;
    TARGB32 * srcLine = src.pdata;
    TARGB32 * dstLine = dst.pdata;
     for ( long y = 0 ;y < height; ++ y)
    {
         for ( long x = 0 ;x < width; ++ x)
        {
             long light = getGray0(srcLine[x]);
             if (light >= 127 ) // 設置爲白色
            {
                dstLine[x].b = 255 ;
                dstLine[x].g = 255 ;
                dstLine[x].r = 255 ;
                dstLine[x].a = 255 ;
            }
             else    // 設置爲黑色
            {
                dstLine[x].b = 0 ;
                dstLine[x].g = 0 ;
                dstLine[x].r = 0 ;
                dstLine[x].a = 0 ;
            }
        }
        (TUInt8 *& )srcLine += src.byte_width; // 下一行顏色
        (TUInt8 *& )dstLine += dst.byte_width; // 下一行顏色
    }
}

原圖像(圖像大小: 640x480):

函數效果:

速度測試:
//////////////////////////////////////////////////////////////
//threshold0 177.1 FPS
//////////////////////////////////////////////////////////////

C:我們來簡單優化一下threshold0的速度
getGray0涉及到浮點計算和浮點數取整，可以改寫爲一個整數定點數算法(見代
碼中的getGrayInt函數)；
在取黑白值的時候涉及到一個邏輯判斷，從而生成了一個分支，可以優化掉；
在寫顏色值的時候可以一次寫入4個顏色分量；
詳細的代碼如下:

     const long cs_gray_red_16   = ( long )(cs_gray_red * ( 1 << 16 ));
     const long cs_gray_green_16 = ( long )(cs_gray_green * ( 1 << 16 ));
     const long cs_gray_blue_16 = ( long )(cs_gray_blue * ( 1 << 16 ));

    inline long getGrayInt( const TARGB32 & color)
    {
         return (color.r * cs_gray_red_16 + color.g * cs_gray_green_16 + color.b * cs_gray_blue_16) >> 16 ;
    }

void threshold1( const TPicRegion & dst, const TPicRegion & src)
{
     long width = dst.width;
     if (src.width < width) width = src.width;
     long height = dst.height;
     if (src.height < height) height = src.height;
    TARGB32 * srcLine = src.pdata;
    TARGB32 * dstLine = dst.pdata;
     for ( long y = 0 ;y < height; ++ y)
    {
         for ( long x = 0 ;x < width; ++ x)
        {
             long light = getGrayInt(srcLine[x]);
            TUInt32 color = (( 127 - light) >> 31 ); // 利用了整數的編碼方式來消除了分支
            ((TUInt32 * )dstLine)[x] = color;   // 一次寫4個字節
        }
        (TUInt8 *& )srcLine += src.byte_width;
        (TUInt8 *& )dstLine += dst.byte_width;
    }
}

threshold1實現的功能和threshold0完全相同；threshold1的速度爲:
//////////////////////////////////////////////////////////////
//threshold1 747.6 FPS
//////////////////////////////////////////////////////////////
(當然，該函數還可以繼續優化的，比如使用MMX、SSE等指令，可以得到更快的速度；)

D:一個局部自適應圖像二值化算法的實現
局部自適應二值化：對於某個像素p，求其周圍MxM範圍內的像素的平均亮
度I, 若像素p的亮度大於I，則該像素設置爲白色，否則設置爲黑色；
在邊界處，統計周圍亮度的時候可能會訪問到圖像以外，爲了在邊界處也得到好的效果，
可以返回一個圖像內的對應映射像素，完成該功能的函數爲getMapBorderColor；
實現如下:

     // 執行邊界檢查和映射的顏色訪問函數
    inline const TARGB32 & getMapBorderColor( const TPicRegion & src, long x, long y)
    {
         if (x < 0 ) x =- x - 1 ;
         long width2 = src.width * 2 ;
         while (x >= width2) x -= width2;
         if (x >= src.width) x = width2 - x - 1 ;
         if (y < 0 ) y =- y - 1 ;
         long height2 = src.height * 2 ;
         while (y >= height2) y -= height2;
         if (y >= src.height) y = height2 - y - 1 ;
         return Pixels(src,x,y);
    }

     // 返回圖片src中以(x0,y0)爲中心距離localHalfWidth以內的所有像素的亮度和
     long getLocalLight_quadratic( const TPicRegion & src, long x0, long y0, long localHalfWidth)
    {
         long sumLight = 0 ;
         for ( long y = y0 - localHalfWidth;y <= y0 + localHalfWidth; ++ y)
        {
             for ( long x = x0 - localHalfWidth;x <= x0 + localHalfWidth; ++ x)
            {
                 const TARGB32 & mapBorderColor = getMapBorderColor(src,x,y);
                sumLight += getGrayInt(mapBorderColor);
            }
        }
         return sumLight;
    }


void localAdaptiveThreshold_quadratic( const TPicRegion & dst, const TPicRegion & src, long localWidth)
{
     long width = dst.width;
     if (src.width < width) width = src.width;
     long height = dst.height;
     if (src.height < height) height = src.height;
    TARGB32 * srcLine = src.pdata;
    TARGB32 * dstLine = dst.pdata;
     long localHalfWidth = localWidth / 2 ;
     long tLocalWidth = localHalfWidth * 2 + 1 ;
     long tLocalWidthSqr = tLocalWidth * tLocalWidth;
     for ( long y = 0 ;y < height; ++ y)
    {
         for ( long x = 0 ;x < width; ++ x)
        {
             long sumLight = getLocalLight_quadratic(src,x,y,localHalfWidth);
             long light = getGrayInt(srcLine[x]);
                  // localWidth^2*255<=(2^31-1) =>  localWidth<=2901
            TUInt32 color = ((sumLight - light * tLocalWidthSqr) >> 31 );
            ((TUInt32 * )dstLine)[x] = color;
        }
        (TUInt8 *& )srcLine += src.byte_width;
        (TUInt8 *& )dstLine += dst.byte_width;
    }
}

函數效果:
localWidth=151

localWidth=51

localWidth=17

localWidth=5

恩,效果不錯:)

速度測試:
//////////////////////////////////////////////////////////////
// localWidth= 5 | 17 | 51 | 151
//------------------------------------------------------------
//localAdaptiveThreshold_quadratic 9.33 1.04 0.12 0.012FPS
//////////////////////////////////////////////////////////////
可以看到隨着模板大小的增長，速度在成平方的減小

(文章的下篇將開始優化localAdaptiveThreshold_quadratic的速度，得到線性算法
和常數算法)

圖形圖像處理－之－一個複雜度爲常數的快速局部自適應算法上篇

“數學函數動態編譯器TCompile類”的bug跟蹤、新版源代碼下載

圖形圖像處理－之－任意角度的高質量的快速的圖像旋轉下篇補充話題

HDiffPatch和BsDiff4.3&xdelta3.1的對比測試

我的分形畫廊

YUV視頻格式到RGB32格式轉換的速度優化中篇

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結

圖形圖像處理－之－一個複雜度爲常數的快速局部自適應算法 上篇

圖形圖像處理－之－一個複雜度爲常數的快速局部自適應算法上篇