x264宏塊帶權預測以及運動補償

先解釋下，運動補償和帶權預測

1 運動補償，h264編碼器爲了讓宏塊的運動預測更加精確，通過差值的方法，將像素最大差值到1/4像素的方式，生成新的宏塊，從而減少運動殘差。

2 帶權預測，h264在做宏塊差值的時候，首先以幀爲單位，計算當前編碼幀和被參考幀的平均luma(亮度),chroma(色度)的平均值，然後計算平均值比值，將這個比值作爲權重，先將參考幀的被參考塊做一次比值乘法運算，讓其更接近當前編碼宏塊。舉例來說：參考宏塊平均值255,編碼宏塊平均值130,先將參考宏塊乘以130/255,然後再和編碼宏塊做差值運算，這樣得到的結果更接近，更容易壓縮。

(1) 運動補償代碼流程

...先是做了宏塊分析...找到了參考幀，計算好了mv

void x264_macroblock_encode( x264_t *h )
{//按照編碼的像素格式的不同，用不同的函數完成宏編碼
if( CHROMA444 )
macroblock_encode_internal( h, 3, 0 );
else if( CHROMA_FORMAT )
macroblock_encode_internal( h, 1, 1 );
else
macroblock_encode_internal( h, 1, 0 );
}

static ALWAYS_INLINE void macroblock_encode_internal( x264_t *h, int plane_count, int chroma )
{
int i_qp = h->mb.i_qp;
int b_decimate = h->mb.b_dct_decimate;
int b_force_no_skip = 0;
int nz;
h->mb.i_cbp_luma = 0;
for( int p = 0; p < plane_count; p++ )
h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = 0;

if( h->mb.i_type == I_PCM )// PCM encoder
{
/* if PCM is chosen, we need to store reconstructed frame data */
for( int p = 0; p < plane_count; p++ )
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc[p], FENC_STRIDE, 16 );
if( chroma )
{
int height = 16 >> CHROMA_V_SHIFT;
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, height );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, height );
}
// 如果是PCM。則我們直接把fenc 存儲到fdec重建幀中
return;
}

//上面PCM模式的忽略

for( int p = 0; p < plane_count; p++ )
h->mc.mc_luma( h->mb.pic.p_fdec[p], FDEC_STRIDE,
&h->mb.pic.p_fref[0][0][p*4], h->mb.pic.i_stride[p],
mvx, mvy, 16, 16, &h->sh.weight[0][p] );// 計算 luma 亮度mc

//編碼第一步，先把參考宏塊拷貝到重建幀宏塊對應的位置裏面

static void mc_luma( pixel *dst, intptr_t i_dst_stride,
pixel *src[4], intptr_t i_src_stride,
int mvx, int mvy,
int i_width, int i_height, const x264_weight_t *weight )
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);//計算mv以1/4像素爲單位的偏移,圖中黑色部分是真實的像素，框框都是插值出來的半像素,1/4個像素大小

int offset = (mvy>>2)*i_src_stride + (mvx>>2);
pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;

if( qpel_idx & 5 ) /* qpel interpolation needed 這裏表示需要精確到1/4像素，因爲前面做lookahead的時候已經做了1/2像素差值，這裏要精確到1/4，所以需要再插值一次 */
{
pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel_avg( dst, i_dst_stride, src1, i_src_stride,
src2, i_src_stride, i_width, i_height );// 算 1/4 像素
if( weight->weightfn )
mc_weight( dst, i_dst_stride, dst, i_dst_stride, weight, i_width, i_height );
}
else if( weight->weightfn )
{
mc_weight( dst, i_dst_stride, src1, i_src_stride, weight, i_width, i_height );
}
else
{// 複製像素數據
mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
}
}

//這是亮度參考宏塊拷貝到重建宏塊的過程

2 帶權預測

mc_weight( dst, i_dst_stride, src1, i_src_stride, weight, i_width, i_height );

static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
const x264_weight_t *weight, int i_width, int i_height )
{
//加權預測,將參考塊乘以個weight,然後再存入fdec 重建塊中，然後再和編碼塊做差值，得到最終結果
int offset = weight->i_offset << (BIT_DEPTH-8);
int scale = weight->i_scale;//縮放係數
int denom = weight->i_denom;
if( denom >= 1 )
{
for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
for( int x = 0; x < i_width; x++ )
opscale( x );
}
else
{
for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
for( int x = 0; x < i_width; x++ )
opscale_noden( x );
}
}

x264_encoder_encode()-->

weighted_pred_init()-->

void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
int i_width, int i_height, x264_weight_t *w )// 計算權重w的兩個係數

{

for( int i = 0; i < 3; i++ )//循環每一個plane 對應YUV
{
for( int j = 0; j < h->i_ref[0]; j++ )// 參考序列中的每一個參考幀都計算一次

{

x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );

void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
int i_width, int i_height, x264_weight_t *w )
{
/* Weight horizontal strips of height 16. This was found to be the optimal height
* in terms of the cache loads. */
while( i_height > 0 )
{
int x;
for( x = 0; x < i_width-8; x += 16 )
w->weightfn[16>>2]( dst+x, i_dst_stride, src+x, i_src_stride, w, X264_MIN( i_height, 16 ) );
if( x < i_width )
w->weightfn[ 8>>2]( dst+x, i_dst_stride, src+x, i_src_stride, w, X264_MIN( i_height, 16 ) );
i_height -= 16;
dst += 16 * i_dst_stride;
src += 16 * i_src_stride;
}
}

最終的weightfn用匯編實現的，具體代碼後面再詳細分析

}

typedef struct x264_weight_t
{
/* aligning the first member is a gcc hack to force the struct to be
* 16 byte aligned, as well as force sizeof(struct) to be a multiple of 16 */
ALIGNED_16( int16_t cachea[8] );
int16_t cacheb[8];
int32_t i_denom;
int32_t i_scale;
int32_t i_offset;
weight_fn_t *weightfn;
} ALIGNED_16( x264_weight_t );

帶權預測運算過程

#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset ) 這個計算公式暫時還沒理解
#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )

這裏大致介紹了這兩個東西的基本含義和流程，詳細公式含義後續再研究。

x264宏塊帶權預測以及運動補償

AI 畫圖真刺激，手把手教你如何用 ComfyUI 來畫出刺激的圖

公司剛入職了一名 Java 中級開發，短短 4 行代碼居然湊齊了 3 個 bug！我哭了~~

數據展示動態（跑分）顯示

公衆號5月C#/.NET熱文一覽

git 下載大陸鏡像地址

被同事扔過來hpp文件作爲接口文件怎麼辦？

x264 rdo調整

C++模板偏特化簡單demo

C++ 拷貝構造的一個小陷阱

x264宏塊帶權預測以及運動補償

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結