x264源碼分析 -- x264_slicetype_mb_cost

static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
							x264_frame_t **frames, int p0/*前向索引*/, int p1/*後向索引*/, int b/*當前索引*/,
                            int dist_scale_factor, int do_search[2] )
{
    x264_frame_t *fref0 = frames[p0];
    x264_frame_t *fref1 = frames[p1];
    x264_frame_t *fenc  = frames[b];
    const int b_bidir = (b < p1);      // 是否進行後向參考, 也就是當前幀是否是B幀
    const int i_mb_x = h->mb.i_mb_x;   // 預測宏塊的X座標
    const int i_mb_y = h->mb.i_mb_y;   // 預測宏塊的Y座標
    const int i_mb_stride = h->sps->i_mb_width;         // 以宏塊爲單位的寬度
    const int i_mb_xy = i_mb_x + i_mb_y * i_mb_stride;  // 二維座標轉換到一維座標
    const int i_stride = fenc->i_stride_lowres;         // 待編碼sub-pixel平面的跨度
    const int i_pel_offset = 8 * ( i_mb_x + i_mb_y * i_stride );
    const int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32; // 雙向預測權重
	// [0][b-p0-1][i_mb_xy]: 前向預測宏塊的索引 [1][p1-b-1][i_mb_xy]: 後向預測宏塊的索引
	// fenc_mvs: 前後參考幀位置i_mb_xy的宏塊mv
    int16_t (*fenc_mvs[2])[2] = { &frames[b]->lowres_mvs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mvs[1][p1-b-1][i_mb_xy] };
    int (*fenc_costs[2]) = { &frames[b]->lowres_mv_costs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mv_costs[1][p1-b-1][i_mb_xy] };

    ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
    uint8_t *pix2 = pix1+8;
    x264_me_t m[2];
    int i_bcost = COST_MAX;
    int l, i;
    int list_used = 0;

    h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
    h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 ); // 拷貝1/2像素Origin平面(8x8)

    if( p0 == p1 ) // 前向索引=後向索引, 則進行I幀處理
        goto lowres_intra_mb;

    // no need for h->mb.mv_min[]
	// 設置前向全像素預測範圍
    h->mb.mv_min_fpel[0] = -8*h->mb.i_mb_x - 4;
    h->mb.mv_max_fpel[0] = 8*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 4;
	// 設置前向半像素預測範圍
    h->mb.mv_min_spel[0] = 4*( h->mb.mv_min_fpel[0] - 8 );
    h->mb.mv_max_spel[0] = 4*( h->mb.mv_max_fpel[0] + 8 );
    if( h->mb.i_mb_x >= h->sps->i_mb_width - 2 ) // 如果當前宏塊是倒數1,2列, 設置後向預測範圍. 爲什麼倒數1,2行的時候不這麼做?
    {
        h->mb.mv_min_fpel[1] = -8*h->mb.i_mb_y - 4;
        h->mb.mv_max_fpel[1] = 8*( h->sps->i_mb_height - h->mb.i_mb_y - 1 ) + 4;
        h->mb.mv_min_spel[1] = 4*( h->mb.mv_min_fpel[1] - 8 );
        h->mb.mv_max_spel[1] = 4*( h->mb.mv_max_fpel[1] + 8 );
    }

// 裝載1/2像素各個平面(原, 水平, 垂直, 對角線)
#define LOAD_HPELS_LUMA(dst, src) \
    { \
        (dst)[0] = &(src)[0][i_pel_offset]; \
        (dst)[1] = &(src)[1][i_pel_offset]; \
        (dst)[2] = &(src)[2][i_pel_offset]; \
        (dst)[3] = &(src)[3][i_pel_offset]; \
    }

// mv不能超過搜索範圍
#define CLIP_MV( mv ) \
    { \
        mv[0] = x264_clip3( mv[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); \
        mv[1] = x264_clip3( mv[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); \
    }

// NOTES>> 用亮度塊代替整個宏塊的複雜度
// src1/2: 分別以mv前/後向偏移得到的8x8塊
// pix1: src1和src2的中值塊
// h->pixf.mbcmp[PIXEL_8x8]: 計算8x8塊的SATD值
#define TRY_BIDIR( mv0, mv1, penalty ) \
    { \
        int stride1 = 16, stride2 = 16; \
        uint8_t *src1, *src2; \
        int i_cost; \
        src1 = h->mc.get_ref( pix1, &stride1, m[0].p_fref, m[0].i_stride[0], \
                              (mv0)[0], (mv0)[1], 8, 8 ); /*前向預測出8x8宏塊src1*/ \
        src2 = h->mc.get_ref( pix2, &stride2, m[1].p_fref, m[1].i_stride[0], \
                              (mv1)[0], (mv1)[1], 8, 8 ); /*後向預測出8x8宏塊src2*/ \
        h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); /*根據src1和src2中值計算出pix1*/ \
        i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
                           m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); /*計算原像素和pix1的殘差cost*/\
        COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
    }

    m[0].i_pixel = PIXEL_8x8;
    m[0].p_cost_mv = a->p_cost_mv;
    m[0].i_stride[0] = i_stride;
    m[0].p_fenc[0] = h->mb.pic.p_fenc[0];
    LOAD_HPELS_LUMA( m[0].p_fref, fref0->lowres ); // 裝載{前向參考幀}的半像素平面

    if( b_bidir ) // 如果是B幀
    {
        int16_t *mvr = fref1->lowres_mvs[0][p1-p0-1][i_mb_xy]; // 後向參考幀 的 前向參考幀 對應i_mb_xy的宏塊
        int dmv[2][2];

        h->mc.memcpy_aligned( &m[1], &m[0], sizeof(x264_me_t) );
        LOAD_HPELS_LUMA( m[1].p_fref, fref1->lowres ); // 裝載{後向參考幀}的半像素平面

		// 根據dist_scale_factor計算出一組雙向MV
        dmv[0][0] = ( mvr[0] * dist_scale_factor + 128 ) >> 8;
        dmv[0][1] = ( mvr[1] * dist_scale_factor + 128 ) >> 8;
        dmv[1][0] = dmv[0][0] - mvr[0];
        dmv[1][1] = dmv[0][1] - mvr[1];
        CLIP_MV( dmv[0] );
        CLIP_MV( dmv[1] );

        TRY_BIDIR( dmv[0], dmv[1], 0 );                     // 根據這組MV計算雙向預測cost
        if( dmv[0][0] | dmv[0][1] | dmv[1][0] | dmv[1][1] ) // 如果mv都不爲0, 以 前後向參考幀的第一個8x8宏塊預測
        {
            int i_cost;
            h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight );
            i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 );
            COPY2_IF_LT( i_bcost, i_cost, list_used, 3 );
        }
    }

    for( l = 0; l < 1 + b_bidir; l++ )
    {
        if( do_search[l] )
        {
            int i_mvc = 0;
            int16_t (*fenc_mv)[2] = fenc_mvs[l]; // 每個參考幀中每個宏塊的MV
            ALIGNED_4( int16_t mvc[4][2] );

            /* Reverse-order MV prediction. */
            *(uint32_t*)mvc[0] = 0;
            *(uint32_t*)mvc[1] = 0;
            *(uint32_t*)mvc[2] = 0;
#define MVC(mv) { *(uint32_t*)mvc[i_mvc] = *(uint32_t*)mv; i_mvc++; }
            if( i_mb_x < h->sps->i_mb_width - 1 )
                MVC(fenc_mv[1]);
            if( i_mb_y < h->sps->i_mb_height - 1 )
            {
                MVC(fenc_mv[i_mb_stride]);
                if( i_mb_x > 0 )
                    MVC(fenc_mv[i_mb_stride-1]);
                if( i_mb_x < h->sps->i_mb_width - 1 )
                    MVC(fenc_mv[i_mb_stride+1]);
            }
#undef MVC
            x264_median_mv( m[l].mvp, mvc[0], mvc[1], mvc[2] ); // 中值預測MV
            x264_me_search( h, &m[l], mvc, i_mvc );             // ** 這裏只計算最小SATD_cost **, 應該是爲了rate_control

            m[l].cost -= 2; // remove mvcost from skip mbs
            if( *(uint32_t*)m[l].mv )
                m[l].cost += 5;
            *(uint32_t*)fenc_mvs[l] = *(uint32_t*)m[l].mv; // 更新預測後得到的MV
            *fenc_costs[l] = m[l].cost;
        }
        else // 不用預測了
        {
            *(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l]; // 直接使用參考幀的MV
            m[l].cost = *fenc_costs[l];                    // 直接使用參考幀的MV_cost
        }
        COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
    }

    if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) )
        TRY_BIDIR( m[0].mv, m[1].mv, 5 );

    /* Store to width-2 bitfield. */
    frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy>>2] &= ~(3<<((i_mb_xy&3)*2));
    frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy>>2] |= list_used<<((i_mb_xy&3)*2);

lowres_intra_mb:
    /* forbid intra-mbs in B-frames, because it's rare and not worth checking */
    /* FIXME: Should we still forbid them now that we cache intra scores? */
    if( !b_bidir || h->param.rc.b_mb_tree ) // 幀內計算SATD
    {
        int i_icost, b_intra;
        if( !fenc->b_intra_calculated )
        {
            ALIGNED_ARRAY_16( uint8_t, edge,[33] );
            uint8_t *pix = &pix1[8+FDEC_STRIDE - 1];
            uint8_t *src = &fenc->lowres[0][i_pel_offset - 1];
            const int intra_penalty = 5;
            int satds[4];

            memcpy( pix-FDEC_STRIDE, src-i_stride, 17 );
            for( i=0; i<8; i++ )
                pix[i*FDEC_STRIDE] = src[i*i_stride];
            pix++;

            if( h->pixf.intra_mbcmp_x3_8x8c )
            {
                h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[0], pix, satds );
                h->predict_8x8c[I_PRED_CHROMA_P]( pix );
                satds[I_PRED_CHROMA_P] =
                    h->pixf.mbcmp[PIXEL_8x8]( pix, FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE );
            }
            else
            {
                for( i=0; i<4; i++ )
                {
                    h->predict_8x8c[i]( pix );
                    satds[i] = h->pixf.mbcmp[PIXEL_8x8]( pix, FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE );
                }
            }
            i_icost = X264_MIN4( satds[0], satds[1], satds[2], satds[3] );

            h->predict_8x8_filter( pix, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
            for( i=3; i<9; i++ )
            {
                int satd;
                h->predict_8x8[i]( pix, edge );
                satd = h->pixf.mbcmp[PIXEL_8x8]( pix, FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE );
                i_icost = X264_MIN( i_icost, satd );
            }

            i_icost += intra_penalty;
            fenc->i_intra_cost[i_mb_xy] = i_icost;
        }
        else
            i_icost = fenc->i_intra_cost[i_mb_xy];
        if( !b_bidir )
        {
            b_intra = i_icost < i_bcost;
            if( b_intra )
                i_bcost = i_icost;
            if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
                && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
                || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
            {
                fenc->i_intra_mbs[b-p0] += b_intra;
                fenc->i_cost_est[0][0] += i_icost;
                if( h->param.rc.i_aq_mode )
                    fenc->i_cost_est_aq[0][0] += (i_icost * fenc->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
            }
        }
    }

    fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost; // i_mb_xy位置的宏塊, 前向參考p0, 後向參考p1時的cost值

    return i_bcost;
}
#undef TRY_BIDIR

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章