slicetype_frame_cost
/*
將幀b以slice爲單位計算其開銷,統計其總開銷
其中p0表示b的前向參考幀,p1表示b的後向參考幀
若p0 = p1 = b,則表示沒有參考幀,即I幀
若p1 = b,則表示只有前向參考幀,即P幀
作爲I幀,所有宏塊的cost = intra cost
作爲P幀,所有宏塊的cost = min( intra cost, inter cost)
作爲B幀,所有宏塊的cost = inter cost
其中每一個幀都帶有開銷矩陣i_cost_est[b-p0][p1-b]
表示幀b以p0爲前向參考,p1爲後向參考時的幀cost
*/
static int slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b )
{
int i_score = 0; //作爲幀cost進行返回
int do_search[2]; //用來標記幀b是否已經在 前向參考幀/後向參考幀 上進行了運動搜索
const x264_weight_t *w = x264_weight_none;
x264_frame_t *fenc = frames[b]; //得到幀b
/* Check whether we already evaluated this frame
* If we have tried this frame as P, then we have also tried
* the preceding frames as B. (is this still true?)
* 檢查是否已經評估了該幀,如果我們已經把該幀當作P幀計算,
* 那我們仍然需要將前面的幀當作B幀來計算
* Also check that we already calculated the row SATDs for the current frame.
* 同樣檢查我們是否對該幀進行了行SATD計算. */
if( fenc->i_cost_est[b-p0][p1-b] >= 0 && (!h->param.rc.i_vbv_buffer_size || fenc->i_row_satds[b-p0][p1-b][0] != -1) )
//如果幀b已經計算了前向參考p0後向參考p1的cost,則直接得到開銷
i_score = fenc->i_cost_est[b-p0][p1-b];
else //沒有計算,則進行計算
{
int dist_scale_factor = 128;
/* For each list, check to see whether we have lowres motion-searched this reference frame before.
* 如果可以前向參考,且mv = 0x7fff,則將他們置0
* 如果可以後向參考,且mv = 0x7fff,同樣將他們置0. */
do_search[0] = b != p0 && fenc->lowres_mvs[0][b-p0-1][0][0] == 0x7FFF;
do_search[1] = b != p1 && fenc->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
if( do_search[0] )
{
if( h->param.analyse.i_weighted_pred && b == p1 )
{
x264_emms();
x264_weights_analyse( h, fenc, frames[p0], 1 );
w = fenc->weight[0];
}
fenc->lowres_mvs[0][b-p0-1][0][0] = 0;
}
if( do_search[1] ) fenc->lowres_mvs[1][p1-b-1][0][0] = 0;
if( p1 != p0 ) //非I幀,則計算距離因子dist_scale_factor
dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
int output_buf_size = h->mb.i_mb_height + (NUM_INTS + PAD_SIZE) * h->param.i_lookahead_threads;
int *output_inter[X264_LOOKAHEAD_THREAD_MAX+1];
int *output_intra[X264_LOOKAHEAD_THREAD_MAX+1];
output_inter[0] = h->scratch_buffer2;
output_intra[0] = output_inter[0] + output_buf_size;
{
if( h->param.i_lookahead_threads > 1 ) //多個lookahead線程
{
/* 申請X264_LOOKAHEAD_THREAD_MAX個threadslice線程所需要的數據結構體
* 其實可以優化成動態內存申請i_lookahead_threads個.
* 這個s[]裏面存儲着每個threadslice計算所需的數據及其輸出 */
x264_slicetype_slice_t s[X264_LOOKAHEAD_THREAD_MAX];
for( int i = 0; i < h->param.i_lookahead_threads; i++ ) //針對各個lookahead線程
{
x264_t *t = h->lookahead_thread[i]; //得到lookahead第i個線程的句柄
/* FIXME move this somewhere else */
//將句柄h中的 運動估計方法/下采樣運動估計質量/是否進行色度運動估計 的參數給第i個線程的句柄t
t->mb.i_me_method = h->mb.i_me_method;
t->mb.i_subpel_refine = h->mb.i_subpel_refine;
t->mb.b_chroma_me = h->mb.b_chroma_me;
//初始化threadslice計算所需數據及輸出 s[i]
s[i] = (x264_slicetype_slice_t){ t, a, frames, p0, p1, b, dist_scale_factor, do_search, w,
output_inter[i], output_intra[i] };
//得到該threadslice計算的的起始行
t->i_threadslice_start = ((h->mb.i_mb_height * i + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
//得到該threadslice計算的結束行的下一行
t->i_threadslice_end = ((h->mb.i_mb_height * (i+1) + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
//計算該threadslice所需計算的行高
int thread_height = t->i_threadslice_end - t->i_threadslice_start;
/* 計算threadslice[i]計算的輸出大小,即output_inter[i]和output_inter[i]的大小
* 每一行一個ROW_SATD輸出,另外多申請4個空間用於存儲額外數據:
* COST_EST COST_EST_AQ INTRA_MBS NUM_ROWS [ROW_SATD1 ... ROW_SATDn]*/
int thread_output_size = thread_height + NUM_INTS;
//初始化兩個輸出緩衝,大小爲threadslice行高+NUM_INTS
memset( output_inter[i], 0, thread_output_size * sizeof(int) );
memset( output_intra[i], 0, thread_output_size * sizeof(int) );
//將行高賦值給兩個輸出緩衝的第4個位置
output_inter[i][NUM_ROWS] = output_intra[i][NUM_ROWS] = thread_height;
//PADDING?
output_inter[i+1] = output_inter[i] + thread_output_size + PAD_SIZE;
output_intra[i+1] = output_intra[i] + thread_output_size + PAD_SIZE;
//開啓threadslice[i]進行計算,並將結果輸出到output_inter[i]/output_intra[i]中
x264_threadpool_run( h->lookaheadpool, (void*)slicetype_slice_cost, &s[i] );
}
//等待各threadslice線程計算結束並退出
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
x264_threadpool_wait( h->lookaheadpool, &s[i] );
}
else //單lookahead線程
{
//設置threadslice的起始行和結束行
h->i_threadslice_start = 0;
h->i_threadslice_end = h->mb.i_mb_height;
//初始化兩個輸出緩衝,大小爲threadslice行高+NUM_INTS
memset( output_inter[0], 0, (output_buf_size - PAD_SIZE) * sizeof(int) );
memset( output_intra[0], 0, (output_buf_size - PAD_SIZE) * sizeof(int) );
//將行高賦值給兩個輸出緩衝的第4個位置
output_inter[0][NUM_ROWS] = output_intra[0][NUM_ROWS] = h->mb.i_mb_height;
//初始化threadslice計算所需數據及輸出 s[i]
x264_slicetype_slice_t s = (x264_slicetype_slice_t){ h, a, frames, p0, p1, b, dist_scale_factor, do_search, w,
output_inter[0], output_intra[0] };
//計算slice的cost,將結果輸出到output_inter[0]/output_intra[0]中
slicetype_slice_cost( &s );
}
/* Sum up accumulators */
if( b == p1 ) //若是I/P幀
fenc->i_intra_mbs[b-p0] = 0; //初始化IMB的個數爲0
if( !fenc->b_intra_calculated ) //如果還沒進行幀內計算
{
//初始化幀b的開銷矩陣中[0][0]的cost爲0
fenc->i_cost_est[0][0] = 0;
fenc->i_cost_est_aq[0][0] = 0;
}
//將幀b的cost矩陣對應置0
fenc->i_cost_est[b-p0][p1-b] = 0;
fenc->i_cost_est_aq[b-p0][p1-b] = 0;
//取出inter和intra的row_satd向量
int *row_satd_inter = fenc->i_row_satds[b-p0][p1-b];
int *row_satd_intra = fenc->i_row_satds[0][0];
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
{
if( b == p1 ) //若是非B幀
fenc->i_intra_mbs[b-p0] += output_inter[i][INTRA_MBS]; //累計IMB的個數
if( !fenc->b_intra_calculated ) //若還沒有進行幀內預測計算
{
//累計intra的cost和aq_cost到開銷矩陣i_cost_est[0][0]中
fenc->i_cost_est[0][0] += output_intra[i][COST_EST];
fenc->i_cost_est_aq[0][0] += output_intra[i][COST_EST_AQ];
}
//累計inter的cost和aq_cost到開銷矩陣i_cost_est[b-p0][p1-b]中
fenc->i_cost_est[b-p0][p1-b] += output_inter[i][COST_EST];
fenc->i_cost_est_aq[b-p0][p1-b] += output_inter[i][COST_EST_AQ];
if( h->param.rc.i_vbv_buffer_size ) //若使用了vbv
{
int row_count = output_inter[i][NUM_ROWS];
/* 將output_inter中計算得到的row_satd賦值給row_satd_inter
* 若還沒進行幀內計算,則將output_intra中計算得到的row_satd賦值給row_satd_intra */
memcpy( row_satd_inter, output_inter[i] + NUM_INTS, row_count * sizeof(int) );
if( !fenc->b_intra_calculated )
memcpy( row_satd_intra, output_intra[i] + NUM_INTS, row_count * sizeof(int) );
//更新row_satd_inter/row_satd_intra指針
row_satd_inter += row_count;
row_satd_intra += row_count;
}
}
//取出幀cost
i_score = fenc->i_cost_est[b-p0][p1-b];
if( b != p1 ) //若是B幀,則根據i_bframe_bias對幀cost進行調整
i_score = (uint64_t)i_score * 100 / (120 + h->param.i_bframe_bias);
else //若I/P,則設置幀內預測計算標緻
fenc->b_intra_calculated = 1; //標記已經進行了intra計算
//將幀開銷寫回幀b的開銷矩陣中對應的位置
fenc->i_cost_est[b-p0][p1-b] = i_score;
x264_emms();
}
}
return i_score; //返回幀cost
}
slicetype_slice_cost
/*
將slice以mb爲單位計算cost
將結果輸出到s->output_inter/s->output_intra中
*/
static void slicetype_slice_cost( x264_slicetype_slice_t *s )
{
//取出傳入的句柄
x264_t *h = s->h;
/* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
* This considerably improves MV prediction overall.
* 進行低分辨率lookahead回溯,由於MV將會被用在主編碼預測中,所以回溯可以明顯的提示mv的預測精度 */
/* The edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution.
* 邊緣宏塊會降低整幀score(即cost)的預測質量,但是他們需要用於空間分佈? */
/* do_edges表示是否計算邊界
* 若 使用宏塊樹 || 使用vbv || 行<=2 || 列<=2 ,則使用計算邊界宏塊
* 否則不計算邊界宏塊 */
int do_edges = h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size || h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;
//計算slice的起始行和結束行
int start_y = X264_MIN( h->i_threadslice_end - 1, h->mb.i_mb_height - 2 + do_edges );
int end_y = X264_MAX( h->i_threadslice_start, 1 - do_edges );
//計算slice的起始列和結束列
int start_x = h->mb.i_mb_width - 2 + do_edges;
int end_x = 1 - do_edges;
for( h->mb.i_mb_y = start_y; h->mb.i_mb_y >= end_y; h->mb.i_mb_y-- ) //遍歷每一行
for( h->mb.i_mb_x = start_x; h->mb.i_mb_x >= end_x; h->mb.i_mb_x-- )//遍歷每一列
//對每一個宏塊進行cost計算
slicetype_mb_cost( h, s->a, s->frames, s->p0, s->p1, s->b, s->dist_scale_factor,
s->do_search, s->w, s->output_inter, s->output_intra );
}
slicetype_mb_cost
/*
對幀b的當前宏塊計算其intra/inter_cost,intra/inter_aq_cost根據intra/inter_cost進行修正
若intra_cost<inter_cost,則判定爲IMB,累計IMB個數到output_inter[INTRA_MBS]中,且令inter_cost=intra_cost
若當前宏塊計入全幀的score中,則
將intra/inter_cost累加進各自的output_intra/inter[COST_EST]中
將intra/inter_aq_cost累加進各自的output_intra/inter[COST_AQ_EST]中
將intra/inter_aq_cost累加進各自所在行的output_intra/inter[ROW_SATD]中
*/
static void slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b,
int dist_scale_factor, int do_search[2], const x264_weight_t *w,
int *output_inter, int *output_intra )