/*
依賴線程tld來執行幀b中的CU(cuX, cuY)以p0爲前向參考,p1爲後向參考的satd
inter_satd = min{intra_satd, inter_satd}
過程:
1.分別取低分辨率前向參考幀p0,後向參考幀p1,當前待分析幀b
2.得到幀在長寬上CU的個數widthInCU/heightInCU
3.得到當前CU align後的序號cuXY
4.得到低分辨率的CU尺寸cuSize
5.得到當前CU align後的像素偏移量pelOffset
6.加載運動估計的一些信息:失真函數、運動估計算法、YUV像素等
7.將mv限制在幀範圍內[mvmin, mvmax]
8.分別遍歷兩個預測方向,得到各個預測方向上的最優satd
1.取lowerResMvCosts
2.若不需要對該方向進行search,則表明之前已經計算過,直接更新後continue
3.取低分辨率運動向量
4.建立僞mvp集mvc,由於執行estimateCUCost()函數是逆zigzag順序進行,所以這裏的mvc與協議上的mvp位置相反,個數相同5個
1.若CU不是最後一列,則將右邊CU的mv放進mvc
2.若CU不是最後一行
1.將下面CU的mv放進mvc
2.若CU不是第一列,則將左下角CU的mv放進mvc
3.若CU不是最後一列,則將右下角CU的mv放進mvc
3.這個mvc備選集不知道什麼意義
5.遍歷mvc中的每個mv,找到最優的mv,即mvp
1.進行運動補償
2.計算satd
3.更新最優mvpcost和mvp
6.以mvp爲中心,在[mvmin, mvmax]範圍內進行運動估計,得到運動估計最優低分辨率mv,返回其satd
7.更新最優預測方向及其satd
9.若允許雙向預測,則是Bslice,則計算雙向預測的最優satd
1.分別以之前前後向運動估計得到的最優mv進行像素參考
2.得到的前後向最優mv參考像素進行均值計算,並計算satd
3.更新最優預測方向及其satd
4.分別得到前向/後向參考幀的co-located CU像素
5.得到前向/後向參考幀的co-located CU像素的均值,並計算satd
6.更新最優預測方向及其satd
10.若不允許雙向預測,則是Pslice,還要考慮intra的satd
1.先將之前得到的inter satd加上一個懲罰lowresPenalty
2.對比之前計算的intra satd,更新最優預測方向及其satd
11.判斷當前CU是否是邊界CU,邊界CU不能計算在幀satd中,因爲他們不準
12.基於satd來計算aq satd
13.若是不是邊界CU,則分別將satd和aq satd加入到幀/slice的satd/aq satd中
14.累加aq satd到行satd中
15.累加satd到低分辨率satd中
*/
void CostEstimateGroup::estimateCUCost(LookaheadTLD& tld, int cuX, int cuY, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme)
{
// 分別取前向參考幀p0,後向參考幀p1,當前待計算幀b
Lowres *fref0 = m_frames[p0];
Lowres *fref1 = m_frames[p1];
Lowres *fenc = m_frames[b];
// 若前向參考是權重的,且沒開啓層級運動估計,則取權重的前向參考幀,否則取原始前向參考幀
ReferencePlanes *wfref0 = (fenc->weightedRef[b - p0].isWeighted && !hme) ? &fenc->weightedRef[b - p0] : fref0;
// 根據是否hme來得到幀在長寬上的CU個數
const int widthInCU = hme ? m_lookahead.m_4x4Width : m_lookahead.m_8x8Width;
const int heightInCU = hme ? m_lookahead.m_4x4Height : m_lookahead.m_8x8Height;
// 若p1>b則雙向預測
const int bBidir = (b < p1);
// align後的CU偏移量
const int cuXY = cuX + cuY * widthInCU;
const int cuXY_4x4 = (cuX / 2) + (cuY / 2) * widthInCU / 2;
// 低分辨率CU尺寸
const int cuSize = X265_LOWRES_CU_SIZE;
// align後的像素偏移量
const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * (hme ? fenc->lumaStride/2 : fenc->lumaStride);
// 載入運動估計必要信息:失真函數、運動估計算法、YUV像素等
if ((bBidir || bDoSearch[0] || bDoSearch[1]) && hme)
tld.me.setSourcePU(fenc->lowerResPlane[0], fenc->lumaStride / 2, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);
else if((bBidir || bDoSearch[0] || bDoSearch[1]) && !hme)
tld.me.setSourcePU(fenc->lowresPlane[0], fenc->lumaStride, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);
/* A small, arbitrary bias to avoid VBV problems caused by zero-residual lookahead blocks. */
int lowresPenalty = 4;
int listDist[2] = { b - p0, p1 - b};
MV mvmin, mvmax;
int bcost = tld.me.COST_MAX;
int listused = 0;
// TODO: restrict to slices boundaries
// establish search bounds that don't cross extended frame boundaries
// 限制mv範圍在幀尺寸內
mvmin.x = (int32_t)(-cuX * cuSize - 8);
mvmin.y = (int32_t)(-cuY * cuSize - 8);
mvmax.x = (int32_t)((widthInCU - cuX - 1) * cuSize + 8);
mvmax.y = (int32_t)((heightInCU - cuY - 1) * cuSize + 8);
// 遍歷運動方向
for (int i = 0; i < 1 + bBidir; i++)
{
// 取fencCost
int& fencCost = hme ? fenc->lowerResMvCosts[i][listDist[i]][cuXY] : fenc->lowresMvCosts[i][listDist[i]][cuXY];
int skipCost = INT_MAX;
// 如果不需要對該方向進行search,則表明之前已經計算過,直接更新
if (!bDoSearch[i])
{
COPY2_IF_LT(bcost, fencCost, listused, i + 1);
continue;
}
int numc = 0;
MV mvc[5], mvp;
// 取低分辨率運動向量
MV* fencMV = hme ? &fenc->lowerResMvs[i][listDist[i]][cuXY] : &fenc->lowresMvs[i][listDist[i]][cuXY];
ReferencePlanes* fref = i ? fref1 : wfref0;
/* Reverse-order MV prediction
建立僞mvp集mvc,由於estimateCUCost()是逆zigzag進行調用的
所以這裏的mvc與協議上的mvp位置相反
問題:爲什麼要逆zigzag調用?直接按正常來不行麼? */
#define MVC(mv) mvc[numc++] = mv;
// 若CU不是最後一列,則將右邊的mv放進mvc
if (cuX < widthInCU - 1)
MVC(fencMV[1]);
// 若CU不是最後一行
if (!lastRow)
{
// 將下面的mv放進mvc
MVC(fencMV[widthInCU]);
// 若CU不是第一列
if (cuX > 0)
// 將左下角的mv放進mvc
MVC(fencMV[widthInCU - 1]);
// 若CU不是最後一列
if (cuX < widthInCU - 1)
// 將右下角的mv方向mvc
MVC(fencMV[widthInCU + 1]);
}
if (fenc->lowerResMvs[0][0] && !hme && fenc->lowerResMvCosts[i][listDist[i]][cuXY_4x4] > 0)
{
MVC((fenc->lowerResMvs[i][listDist[i]][cuXY_4x4]) * 2);
}
#undef MVC
// mvc備選集裏沒有mv,則置mvp = 0
if (!numc)
mvp = 0;
// mvc備選集裏有mv
else
{
ALIGN_VAR_32(pixel, subpelbuf[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
int mvpcost = MotionEstimate::COST_MAX;
/* measure SATD cost of each neighbor MV (estimating merge analysis)
* and use the lowest cost MV as MVP (estimating AMVP). Since all
* mvc[] candidates are measured here, none are passed to motionEstimate */
// 遍歷mvc中的每個mv
for (int idx = 0; idx < numc; idx++)
{
intptr_t stride = X265_LOWRES_CU_SIZE;
// 給予mvc中的mv進行運動補償
pixel *src = fref->lowresMC(pelOffset, mvc[idx], subpelbuf, stride, hme);
// 得到satd
int cost = tld.me.bufSATD(src, stride);
// 更新最優mvp及其cost
COPY2_IF_LT(mvpcost, cost, mvp, mvc[idx]);
/* Except for mv0 case, everyting else is likely to have enough residual to not trigger the skip. */
// 若mvp爲0向量 && 雙向預測,則可能是skip,將該mvp的cost給skipCost
if (!mvp.notZero() && bBidir)
skipCost = cost;
}
}
/* ME will never return a cost larger than the cost @MVP, so we do not
* have to check that ME cost is more than the estimated merge cost
* 運動估計得到的satd一定會小於等於之前mvp得到的satd,因爲搜索的mv包含mvp */
// 進行運動估計,得到其satd
if(!hme)
fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices);
else
fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices, fref->lowerResPlane[0]);
// 若skipcost<64 且skipcost < 這裏運動估計的最優satd && 允許雙向,則定爲skip
if (skipCost < 64 && skipCost < fencCost && bBidir)
{
fencCost = skipCost;
*fencMV = 0;
}
// 更新最優mv的satd,並記錄下參考方向
// listused = 0 intra
// = 1 前向
// = 2 後向
// = 3 雙向
COPY2_IF_LT(bcost, fencCost, listused, i + 1);
} // end of for (int i = 0; i < 1 + bBidir; i++)
if (hme)
return;
// 若允許雙向預測,則進行雙向估計
if (bBidir) /* B, also consider bidir */
{
/* NOTE: the wfref0 (weightp) is not used for BIDIR */
/* avg(l0-mv, l1-mv) candidate */
ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;
pixel *src0 = fref0->lowresMC(pelOffset, fenc->lowresMvs[0][listDist[0]][cuXY], subpelbuf0, stride0, 0);
pixel *src1 = fref1->lowresMC(pelOffset, fenc->lowresMvs[1][listDist[1]][cuXY], subpelbuf1, stride1, 0);
ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
// 對雙向預測的像素進行均值計算
primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
// 得到雙向預測
int bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);
// 存儲最優satd
COPY2_IF_LT(bcost, bicost, listused, 3);
/* co-located candidate */
// 得到前向co-located像素
src0 = fref0->lowresPlane[0] + pelOffset;
// 得到後向co-located像素
src1 = fref1->lowresPlane[0] + pelOffset;
// 計算他們的均值
primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, fref0->lumaStride, src1, fref1->lumaStride, 32);
// 得到satd
bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);
// 更新最優satd
COPY2_IF_LT(bcost, bicost, listused, 3);
// inter的cost要加上lowresPenalty
bcost += lowresPenalty;
}
// Pslice,Pslice允許intra,所以將intra與inter對比
else /* P, also consider intra */
{
// inter的cost要加上lowresPenalty
bcost += lowresPenalty;
// 若intra的satd < 之前計算的inter最優satd,則更新其爲listused和cost
if (fenc->intraCost[cuXY] < bcost)
{
bcost = fenc->intraCost[cuXY];
listused = 0; // listused = 0表示intra
}
}
/* do not include edge blocks in the frame cost estimates, they are not very accurate */
// 判斷當前CU是否邊界CU,若是邊界CU則不加入到frame的cost中
const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
// 得到adaptive quan satd
int bcostAq;
if (m_lookahead.m_param->rc.qgSize == 8)
bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor8x8[cuXY] + 128) >> 8) : bcost;
else
bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor[cuXY] +128) >> 8) : bcost;
// 若不是邊界CU,則累加上satd和adaptive quan satd到frame/slice的satd/aq satd中
if (bFrameScoreCU)
{
if (slice < 0)
{
fenc->costEst[b - p0][p1 - b] += bcost;
fenc->costEstAq[b - p0][p1 - b] += bcostAq;
if (!listused && !bBidir)
fenc->intraMbs[b - p0]++;
}
else
{
m_slice[slice].costEst += bcost;
m_slice[slice].costEstAq += bcostAq;
if (!listused && !bBidir)
m_slice[slice].intraMbs++;
}
}
// 累加上當前CU的satd到行satd中
fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq;
// 存儲下當前CU的satd
fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost, LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT));
}
CostEstimateGroup::estimateCUCost()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.