上一篇介紹了實際進行濾波的函數,本篇主要介紹,去方塊濾波這邊的函數調用關係。先看幾個定義:
//! 兩個函數指針,第一個是bS=1~3時調用的,第二個是bS=4時調用的。
typedef void (*x264_deblock_inter_t)( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
typedef void (*x264_deblock_intra_t)( uint8_t *pix, int stride, int alpha, int beta );
typedef struct
{
x264_deblock_inter_t deblock_v_luma;
x264_deblock_inter_t deblock_h_luma;
x264_deblock_inter_t deblock_v_chroma;
x264_deblock_inter_t deblock_h_chroma;
x264_deblock_intra_t deblock_v_luma_intra;
x264_deblock_intra_t deblock_h_luma_intra;
x264_deblock_intra_t deblock_v_chroma_intra;
x264_deblock_intra_t deblock_h_chroma_intra;
} x264_deblock_function_t; //!< 定義存放一組去方塊濾波的函數的結構體
接下來是去方塊濾波的初始化工作:
void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
{
//!< 爲函數指針指定適當的函數入口地址
pf->deblock_v_luma = deblock_v_luma_c;
pf->deblock_h_luma = deblock_h_luma_c;
pf->deblock_v_chroma = deblock_v_chroma_c;
pf->deblock_h_chroma = deblock_h_chroma_c;
pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
#ifdef HAVE_MMX
if( cpu&X264_CPU_MMXEXT )
{
pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
#ifdef ARCH_X86
pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
#endif
if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
{
pf->deblock_v_luma = x264_deblock_v_luma_sse2;
pf->deblock_h_luma = x264_deblock_h_luma_sse2;
pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
}
}
#endif
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
{
pf->deblock_v_luma = x264_deblock_v_luma_altivec;
pf->deblock_h_luma = x264_deblock_h_luma_altivec;
}
#endif // ARCH_PPC
}
在代碼中,這個函數可以認爲是去方塊濾波的頂層函數了:
static void x264_fdec_filter_row( x264_t *h, int mb_y )
{
/* mb_y is the mb to be encoded next, not the mb to be filtered here */
int b_hpel = h->fdec->b_kept_as_ref;
int b_deblock = !h->sh.i_disable_deblocking_filter_idc; //!< 標記是否進行deblock
int b_end = mb_y == h->sps->i_mb_height; //!< 標記是否爲最後一行
int min_y = mb_y - (1 << h->sh.b_mbaff);
int max_y = b_end ? h->sps->i_mb_height : mb_y;
b_deblock &= b_hpel || h->param.psz_dump_yuv; //!< psz_dump_yuv--filename for reconstructed frames
if( mb_y & h->sh.b_mbaff )
return;
if( min_y < 0 )
return;
if( !b_end ) //!< 不是最後一行
{
int i, j;
for( j=0; j<=h->sh.b_mbaff; j++ )
for( i=0; i<3; i++ )
{
memcpy( h->mb.intra_border_backup[j][i],
h->fdec->plane[i] + ((mb_y*16 >> !!i) + j - 1 - h->sh.b_mbaff) * h->fdec->i_stride[i],
h->sps->i_mb_width*16 >> !!i ); //!< 拷貝上一行的像素--bottom pixels of the previous mb row
}
}
if( b_deblock ) //!< 進行deblock
{
int y;
for( y = min_y; y < max_y; y += (1 << h->sh.b_mbaff) ) //!< 遍歷整一行
x264_frame_deblock_row( h, y ); //!< 該函數將被調用用於去方塊濾波!!!
}
if( b_hpel )
{
x264_frame_expand_border( h, h->fdec, min_y, b_end );
if( h->param.analyse.i_subpel_refine )
{
x264_frame_filter( h, h->fdec, min_y, b_end );
x264_frame_expand_border_filtered( h, h->fdec, min_y, b_end );
}
}
if( h->param.i_threads > 1 && h->fdec->b_kept_as_ref ) //!< 多線程且該幀被用作參考幀
{
x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );
}
min_y = X264_MAX( min_y*16-8, 0 );
max_y = b_end ? h->param.i_height : mb_y*16-8;
if( h->param.analyse.b_psnr )
{
int i;
for( i=0; i<3; i++ )
h->stat.frame.i_ssd[i] +=
x264_pixel_ssd_wxh( &h->pixf,
h->fdec->plane[i] + (min_y>>!!i) * h->fdec->i_stride[i], h->fdec->i_stride[i],
h->fenc->plane[i] + (min_y>>!!i) * h->fenc->i_stride[i], h->fenc->i_stride[i],
h->param.i_width >> !!i, (max_y-min_y) >> !!i );
}
if( h->param.analyse.b_ssim )
{
x264_emms();
/* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks,
* and overlap by 4 */
min_y += min_y == 0 ? 2 : -6;
h->stat.frame.f_ssim +=
x264_pixel_ssim_wxh( &h->pixf,
h->fdec->plane[0] + 2+min_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
h->fenc->plane[0] + 2+min_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
h->param.i_width-2, max_y-min_y, h->scratch_buffer );
}
}
註釋忽略了與濾波無關的部分,該函數將調用這個函數進行濾波工作:
void x264_frame_deblock_row( x264_t *h, int mb_y )
{
const int s8x8 = 2 * h->mb.i_mb_stride; //!< 以8x8塊爲單位的每一行的跨度
const int s4x4 = 4 * h->mb.i_mb_stride; //!< 以4x4塊爲單位的每一行的跨度
const int b_interlaced = h->sh.b_mbaff;
const int mvy_limit = 4 >> b_interlaced;
const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
int mb_x;
int stridey = h->fdec->i_stride[0];
int stride2y = stridey << b_interlaced;
int strideuv = h->fdec->i_stride[1];
int stride2uv = strideuv << b_interlaced;
if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
{
const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x; //!< 宏塊序號
const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x; //!< 8x8塊序號
const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x; //!< 4x4塊序號
const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
const int i_qp = h->mb.qp[mb_xy];
int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4; //!< 需要進行濾波的邊界數
int no_sub8x8 = h->mb.type[mb_xy] != P_8x8 || !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x; //!< Y像素值
uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x; //!< U像素值
uint8_t *pixv = h->fdec->plane[2] + 8*mb_y*strideuv + 8*mb_x; //!< V像素值
if( b_interlaced && (mb_y&1) )
{
pixy -= 15*stridey;
pixu -= 7*strideuv;
pixv -= 7*strideuv;
}
x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
if( i_qp <= qp_thresh )
i_edge_end = 1;
#define FILTER_DIR(intra, i_dir)\
{\
/* Y plane */\
i_qpn= h->mb.qp[mbn_xy];\
if( i_dir == 0 )\
{\
/* vertical edge */\
deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
h->loopf.deblock_h_luma##intra );\
if( !(i_edge & 1) )\
{\
/* U/V planes */\
int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
stride2uv, bS, i_qpc, 1,\
h->loopf.deblock_h_chroma##intra );\
}\
}\
else\
{\
/* horizontal edge */\
deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
h->loopf.deblock_v_luma##intra );\
/* U/V planes */\
if( !(i_edge & 1) )\
{\
int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
stride2uv, bS, i_qpc, 1,\
h->loopf.deblock_v_chroma##intra );\
}\
}\
}
//! i_edge標記是否爲宏塊最左邊的邊界或者是最頂部的邊界
//! i_dir標記邊界類型:0--垂直邊界;1--水平邊界
//! 得到bS的值
#define DEBLOCK_STRENGTH(i_dir)\
{\
/* *** Get bS for each 4px for the current edge *** */\
if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
*(uint32_t*)bS = 0x03030303;\
else\
{\
*(uint32_t*)bS = 0x00000000;\
for( i = 0; i < 4; i++ )\
{\
int x = i_dir == 0 ? i_edge : i;\
int y = i_dir == 0 ? i : i_edge;\
int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
int yn = i_dir == 0 ? y : (y - 1)&0x03;\
if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
bS[i] = 2;\
else if(!(i_edge&no_sub8x8))\
{\
if((i&no_sub8x8) && bS[i-1] != 2)\
bS[i] = bS[i-1];\
else\
{\
/* FIXME: A given frame may occupy more than one position in\
* the reference list. So we should compare the frame numbers,\
* not the indices in the ref list.\
* No harm yet, as we don't generate that case.*/\
int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
int i4p= mb_4x4+x+y*s4x4;\
int i4q= mbn_4x4+xn+yn*s4x4;\
if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
(h->sh.i_type == SLICE_TYPE_B &&\
(h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
{\
bS[i] = 1;\
}\
}\
}\
}\
}\
}
/* i_dir == 0 -> vertical edge
* i_dir == 1 -> horizontal edge */
#define DEBLOCK_DIR(i_dir)\
{\
int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
DECLARE_ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\
if( i_edge )\
i_edge+= b_8x8_transform;\
else\
{\
mbn_xy = i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride;\
mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
if( b_interlaced && i_dir == 1 )\
{\
mbn_xy -= h->mb.i_mb_stride;\
mbn_8x8 -= 2 * s8x8;\
mbn_4x4 -= 4 * s4x4;\
}\
else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
{\
FILTER_DIR( _intra, i_dir );\
goto end##i_dir;\
}\
DEBLOCK_STRENGTH(i_dir);\
if( *(uint32_t*)bS )\
FILTER_DIR( , i_dir);\
end##i_dir:\
i_edge += b_8x8_transform+1;\
}\
mbn_xy = mb_xy;\
mbn_8x8 = mb_8x8;\
mbn_4x4 = mb_4x4;\
for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
{\
DEBLOCK_STRENGTH(i_dir);\
if( *(uint32_t*)bS )\
FILTER_DIR( , i_dir);\
}\
}
DEBLOCK_DIR(0); //!< 垂直邊界濾波
DEBLOCK_DIR(1); //!< 水平邊界濾波
}
if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
}
函數中間有一大段都是用宏來寫的,個人建議剛開始看時大概根據宏名判斷它的功能就行,直接跳到最後兩個DEBLOCK_DIR的宏,通過設置斷點的方式對它們進行調試,從而先熟悉濾波的實際工作過程,等之後再回過頭來分析這幾個宏的具體實現,畢竟這幾個宏所作的工作主要是確定bS值,再選擇合適的濾波函數進行實際的濾波。