先解釋下,運動補償和帶權預測
1 運動補償 ,h264編碼器爲了讓宏塊的運動預測更加精確,通過差值的方法,將像素最大差值到1/4像素的方式,生成新的宏塊,從而減少運動殘差。
2 帶權預測,h264在做宏塊差值的時候,首先以幀爲單位,計算當前編碼幀和被參考幀的平均luma(亮度),chroma(色度)的平均值,然後計算平均值比值,將這個比值作爲權重,先將參考幀的被參考塊做一次比值乘法運算,讓其更接近當前編碼宏塊。舉例來說:參考宏塊平均值255,編碼宏塊平均值130,先將參考宏塊乘以130/255,然後再和編碼宏塊做差值運算,這樣得到的結果更接近,更容易壓縮。
(1) 運動補償代碼流程
...先是做了宏塊分析...找到了參考幀,計算好了mv
void x264_macroblock_encode( x264_t *h )
{//按照編碼的像素格式的不同,用不同的函數完成宏編碼
if( CHROMA444 )
macroblock_encode_internal( h, 3, 0 );
else if( CHROMA_FORMAT )
macroblock_encode_internal( h, 1, 1 );
else
macroblock_encode_internal( h, 1, 0 );
}
static ALWAYS_INLINE void macroblock_encode_internal( x264_t *h, int plane_count, int chroma )
{
int i_qp = h->mb.i_qp;
int b_decimate = h->mb.b_dct_decimate;
int b_force_no_skip = 0;
int nz;
h->mb.i_cbp_luma = 0;
for( int p = 0; p < plane_count; p++ )
h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = 0;
if( h->mb.i_type == I_PCM )// PCM encoder
{
/* if PCM is chosen, we need to store reconstructed frame data */
for( int p = 0; p < plane_count; p++ )
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc[p], FENC_STRIDE, 16 );
if( chroma )
{
int height = 16 >> CHROMA_V_SHIFT;
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, height );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, height );
}
// 如果是PCM。則我們直接把fenc 存儲到fdec重建幀中
return;
}
//上面PCM模式的忽略
for( int p = 0; p < plane_count; p++ )
h->mc.mc_luma( h->mb.pic.p_fdec[p], FDEC_STRIDE,
&h->mb.pic.p_fref[0][0][p*4], h->mb.pic.i_stride[p],
mvx, mvy, 16, 16, &h->sh.weight[0][p] );// 計算 luma 亮度mc
//編碼第一步,先把參考宏塊拷貝到重建幀宏塊對應的位置裏面
static void mc_luma( pixel *dst, intptr_t i_dst_stride,
pixel *src[4], intptr_t i_src_stride,
int mvx, int mvy,
int i_width, int i_height, const x264_weight_t *weight )
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);//計算mv以1/4像素爲單位的偏移,圖中黑色部分是真實的像素,框框都是插值出來的半像素,1/4個像素大小
int offset = (mvy>>2)*i_src_stride + (mvx>>2);
pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed 這裏表示需要精確到1/4像素,因爲前面做lookahead的時候已經做了1/2像素差值,這裏要精確到1/4,所以需要再插值一次 */
{
pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel_avg( dst, i_dst_stride, src1, i_src_stride,
src2, i_src_stride, i_width, i_height );// 算 1/4 像素
if( weight->weightfn )
mc_weight( dst, i_dst_stride, dst, i_dst_stride, weight, i_width, i_height );
}
else if( weight->weightfn )
{
mc_weight( dst, i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
}
else
{// 複製像素數據
mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
}
}
//這是亮度參考宏塊拷貝到重建宏塊的過程
2 帶權預測
mc_weight( dst, i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
const x264_weight_t *weight, int i_width, int i_height )
{
//加權預測,將參考塊乘以個weight,然後再存入fdec 重建塊中,然後再和編碼塊做差值,得到最終結果
int offset = weight->i_offset << (BIT_DEPTH-8);
int scale = weight->i_scale;//縮放係數
int denom = weight->i_denom;
if( denom >= 1 )
{
for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
for( int x = 0; x < i_width; x++ )
opscale( x );
}
else
{
for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
for( int x = 0; x < i_width; x++ )
opscale_noden( x );
}
}
x264_encoder_encode()-->
weighted_pred_init()-->
void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
int i_width, int i_height, x264_weight_t *w )// 計算權重w的兩個係數
{
for( int i = 0; i < 3; i++ )//循環每一個plane 對應YUV
{
for( int j = 0; j < h->i_ref[0]; j++ )// 參考序列中的每一個參考幀都計算一次
{
x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
int i_width, int i_height, x264_weight_t *w )
{
/* Weight horizontal strips of height 16. This was found to be the optimal height
* in terms of the cache loads. */
while( i_height > 0 )
{
int x;
for( x = 0; x < i_width-8; x += 16 )
w->weightfn[16>>2]( dst+x, i_dst_stride, src+x, i_src_stride, w, X264_MIN( i_height, 16 ) );
if( x < i_width )
w->weightfn[ 8>>2]( dst+x, i_dst_stride, src+x, i_src_stride, w, X264_MIN( i_height, 16 ) );
i_height -= 16;
dst += 16 * i_dst_stride;
src += 16 * i_src_stride;
}
}
最終的weightfn用匯編實現的,具體代碼後面再詳細分析
}
}
typedef struct x264_weight_t
{
/* aligning the first member is a gcc hack to force the struct to be
* 16 byte aligned, as well as force sizeof(struct) to be a multiple of 16 */
ALIGNED_16( int16_t cachea[8] );
int16_t cacheb[8];
int32_t i_denom;
int32_t i_scale;
int32_t i_offset;
weight_fn_t *weightfn;
} ALIGNED_16( x264_weight_t );
帶權預測運算過程
#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset ) 這個計算公式暫時還沒理解
#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
這裏大致介紹了這兩個東西的基本含義和流程,詳細公式含義後續再研究。