圖像旋轉+二線性插值算法

long BilinearInterpolateD( const SwImage & pic,double fx,double fy )
{  

	#define BILINEAR4_UNITRPOC(a,b,c,d) ((a<<22) +(b-a)*rx2048 + (c-a)*ry2048 + (a-b-c+d)*rxry )

	long x = (long)fx;
	long y = (long)fy;

	unsigned char * c0 = ( unsigned char* )(pic.buffer) + ((pic.width * y + x)<<2);
	unsigned char * c2 = c0 + ( pic.width <<2 );
 
	long rx		= (long)(( fx - x ) * 2048);
	long ry		= (long)(( fy - y ) * 2048);
	long rx2048 = rx << 11;
	long ry2048 = ry << 11;
	long rxry	= rx * ry; 

	long rr = BILINEAR4_UNITRPOC( *(c0+2), *(c0+6), *(c2+2), *(c2+6) ); 
	long gg = BILINEAR4_UNITRPOC( *(c0+1), *(c0+5), *(c2+1), *(c2+5) ); 
	long bb = BILINEAR4_UNITRPOC( *(c0),   *(c0+4), *(c2),   *(c2+4) ); 

	return ((rr>>22<<16) | (gg>>22<<8) | bb>>22);
	
	#undef BILINEAR4_UNITRPOC
}

//使用SSE彙編指令集
BOOL SwImgRotateBilinearSSE( SwImage * pPicDest, const SwImage & picSrc, float angle )
{ 
	 long srcw = picSrc.width;
	 long srch = picSrc.height;
	 long halfsrcw = srcw >> 1;
	 long halfsrch = srch >> 1;
	 long neg_halfsrcw = -halfsrcw;
	 long neg_halfsrch = -halfsrch;
	 long destw = 0;
	 long desth = 0;
	 SwColor * pDest = NULL;
	 long isx = 0;
	 long isy = 0;
 
	 __declspec(align(16)) float arc  = angle * _PI_DIV_180_FS;
	 __declspec(align(16)) float sina = sin( arc );
	 __declspec(align(16)) float cosa = cos( arc );
	 __declspec(align(16)) float fhalfsrcw = (float)srcw * 0.5f;
 	 __declspec(align(16)) float fhalfsrch = (float)srch * 0.5f;

	 SwImgCalcRotExtent( srcw, srch, sina, cosa, &destw, &desth );
	 if( ( destw <= 0 ) || ( desth <= 0 ) )
		 return FALSE;

	 SwImgAlloc( pPicDest, destw, desth );
	 pDest = pPicDest->buffer;
 
	 __declspec(align(16)) float srcx0 = -(destw * 0.5f)*cosa + (-(desth * 0.5f)*sina); 
	 __declspec(align(16)) float srcy0 = -(destw * 0.5f)*sina - (-(desth * 0.5f)*cosa);
	 __declspec(align(16)) float fsx = 0;
	 __declspec(align(16)) float fsy = 0;
 
	 _asm movaps xmm0,srcx0
	 _asm movaps xmm1,srcy0
 
     for (int y = 0; y < desth; y++)
     {    
		 _asm movaps xmm2,xmm0
		 _asm movaps xmm3,xmm1
	 
         for ( int x = 0; x < destw; x++)
         {   
			 __asm
			 {   
				 cvtss2si eax,xmm2 

				 // if( isx > halfsrcw || isx < neg_halfsrcw ) goto over;
				 cmp eax,halfsrcw
				 jg LOOP2_OVER
				 cmp eax,neg_halfsrcw
				 jl LOOP2_OVER
					
				
				 cvtss2si eax,xmm3 

				 // if( isy > halfsrch || isy < neg_halfsrch ) goto over;
				 cmp eax,halfsrch
				 jg LOOP2_OVER
				 cmp eax,neg_halfsrch
				 jl LOOP2_OVER
			 		
				 movss xmm4,xmm2
				 movss xmm5,xmm3
				 addss xmm4,fhalfsrcw
				 subss xmm5,fhalfsrch
				 movss fsx,xmm4
				 movss fsy,xmm5
		  
				 // 將 fsy 絕對值,並截取整數部分到 isy
			 	 fld dword ptr[fsy]
			 	 fabs
			 	 fst dword ptr[fsy]
			 	 fisttp dword ptr[isy]
			 	 
				 // 因爲 BinlinearInterpolate4 並不進行任何的參數檢查
				 // 所以這裏進行預先判斷
				 cvtss2si eax,xmm4 // eax = (int)fsx
				 add eax,2
				 cmp eax,srcw
				 jnb LOOP2_OVER
				 mov eax,isy
				 add eax,2
				 cmp eax,srch
				 jnb LOOP2_OVER

				 // 將參數壓放棧中,調用函數 BinlinearInterpolate4
				 fld dword ptr[fsy]
				 sub esp,8
				 fstp qword ptr[esp]
				 fld dword ptr[fsx]
				 sub esp,8
				 fstp qword ptr[esp]
				 mov eax,dword ptr[picSrc]
				 push eax
				 call BilinearInterpolateD
 				 add esp,14h

				 // 現在將函數的返回值賦給 p[x]	 
				 mov ecx,dword ptr[x]
				 mov edx,dword ptr[pDest] 
				 mov dword ptr[edx+ecx*4],eax
				 
LOOP2_OVER: 			
			    addss xmm2,cosa
			    addss xmm3,sina

			} 
         }
		 pDest += destw;
		 _asm addss xmm0,sina
		 _asm subss xmm1,cosa
     }  
	 return TRUE;
}

發佈了22 篇原創文章 · 獲贊 7 · 訪問量 3萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章