/*
通過
對當前PU進行計算殘差+變換+量化+反量化+反變換+重建幀
得到嚴格意義上的distortion(sse)開銷
對當前CU進行完整的bits編碼,則到嚴格意義的bits開銷
基於distortion和bits來得到rdcost
基於來得到當前TU的最優split模式(即TU split tree),及其distortion、bits、rdcost、energy
過程:
1.載入CUdata、depth等信息
2.判斷mightNotSplit?mightSplit?
3.計算mightNotSplit狀態下的各個數據
·若mightNotSplit,即mightNotSplit=true,分析計算不再split狀態下的各個數據
1.若mightSplit,則將當前熵編碼上下文存儲到rqtRoot中,用於後期計算split時候加載,保證上下文一致性
2.得到相鄰PU的可參考像素信息
3.進行相鄰PU像素補全及平滑濾波
4.按照指定幀內預測方向IPM進行幀內預測到pred中
5.設置transformSkip爲false,TUdepth
6.計算殘差resi = fenc - pred
7.對殘差進行轉換和量化,得到非零係數的個數
8.得到重構幀recon
·若存在非零係數,則進行反量化反轉換,並得到重構幀recon = pred + resi
·否則,recon = pred
9.根據非零係數來設置cbf
10.根據recon和fenc來計算sse distortion
11.計算bits開銷
1.重置bits
2.若absPartIdx=0,則
1.若非Islice,則編碼transform bypass flag、skipFlag、predMode
2.編碼predSize
3.編碼幀內預測方向
4.若當前TUsize不是所允許的最小size,則編碼subDivFlag = false
5.編碼cbf
6.若有cbf,編碼殘差係數
7.得到前面所有編碼的bits總數
8.若開啓了rdPenalty,且TUsize爲32x32,且非Islice,則bits翻四倍
12.根據distortion和bits開銷,計算psyCost和rdCost,存儲到fullCost中
·若不mightNotSplit,則其cost,即fullCost爲MAX
4.計算split狀態下的各個數據,若mightSplit,則
1.若mightNotSplit,則
1.將之前分析的mightNotSplit上下文暫存下來
2.加載最初的上下文,保證上下文一致性
2.計算是否TransformSkip
3.遍歷四個split出來的子TU
1.遞歸調用函數進行分析計算
·若TransformSkip,則調用codeIntraLumaTSkip
·否則,調用codeIntraLumaQT
2.整合四個子TU的cbf
4.存儲下cbf
5.若mightNotSplit,且TUsize不是所允許的最小size,則
1.重置bits
2.編碼subDivFlag = true
3.累加subDivFlag的bits
4.基於distortion和bits開銷,計算split狀態的rdcost
6.對比split和notSplit
·若split的rdcost<notSplit的rdcost,則直接結算rdcost、distortion、bits、energy,返回return
·否則,notSplit較優,加載之前暫存下的mightNotSplit上下文,恢復mightNotSplit的tuDepth、cbf、transforSkip
5.執行到這裏notSplit較優,保存recon的YUV數據
6.結算rdcost、distortion、bits、energy輸出
*/
void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, const uint32_t depthRange[2])
{
//取CUData
CUData& cu = mode.cu;
//取fullDepth = CUDepth + TUDepth
uint32_t fullDepth = cuGeom.depth + tuDepth;
//log TUsize
uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
uint32_t qtLayer = log2TrSize - 2;
uint32_t sizeIdx = log2TrSize - 2;
//只要TUsize在上限以下,就可以不再split
bool mightNotSplit = log2TrSize <= depthRange[1];
//只要TUsize在下限以上,就可以split
bool mightSplit = (log2TrSize > depthRange[0]) && (bAllowSplit || !mightNotSplit);
bool bEnableRDOQ = !!m_param->rdoqLevel;
/* If maximum RD penalty, force spits at TU size 32x32 if SPS allows TUs of 16x16
若rdPenaly爲2,即full,且非Islice,且TU的尺寸在32x32,且TU尺寸允許小於等於16x16,則強制split*/
if (m_param->rdPenalty == 2 && m_slice->m_sliceType != I_SLICE && log2TrSize == 5 && depthRange[0] <= 4)
{
mightNotSplit = false;
mightSplit = true;
}
/* fullCost表示當前CU不進行TU的劃分的cost,整個CU就是一個TU;
與之對應的有splitCost,表示進行了TU劃分的cost*/
Cost fullCost;
//CBF
uint32_t bCBF = 0;
//存儲recon
pixel* reconQt = m_rqt[qtLayer].reconQtYuv.getLumaAddr(absPartIdx);
uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_size;
/*
若可以不再split,則計算不再split的cost,即fullCost
*/
if (mightNotSplit)
{
//若可以split,則將當前上下文存儲到rqtRoot中,保證後面計算split時上下文的一致性
if (mightSplit)
m_entropyCoder.store(m_rqt[fullDepth].rqtRoot);
//取原始YUV
const pixel* fenc = mode.fencYuv->getLumaAddr(absPartIdx);
//取預測的YUV
pixel* pred = mode.predYuv.getLumaAddr(absPartIdx);
//得到殘差YUV
int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);
uint32_t stride = mode.fencYuv->m_size;
// init availability pattern
uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];
IntraNeighbors intraNeighbors;
//得到相鄰PU的可參考信息
initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors);
//進行相鄰PU像素補全及平滑濾波
initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode);
// get prediction signal 按照幀內預測方向進行預測計算,輸出到pred中
predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
//設置TransformSkip爲false
cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
//設置tuDepth爲fullDepth,即cuDepth+initTuDepth
cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
coeff_t* coeffY = m_rqt[qtLayer].coeffRQT[0] + coeffOffsetY;
// store original entropy coding status 這是是啥
if (bEnableRDOQ)
m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSize, true);
//計算殘差resi = fenc - pred
primitives.cu[sizeIdx].calcresidual[stride % 64 == 0](fenc, pred, residual, stride);
//若殘差進行tranform,輸出到coeffY中,並得到非零係數的個數numSig
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
/* 得到重構幀recon */
if (numSig) //若有殘差係數
{
//進行反transform
m_quant.invtransformNxN(cu, residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
bool reconQtYuvAlign = m_rqt[qtLayer].reconQtYuv.getAddrOffset(absPartIdx, mode.predYuv.m_size) % 64 == 0;
bool predAlign = mode.predYuv.getAddrOffset(absPartIdx, mode.predYuv.m_size) % 64 == 0;
bool residualAlign = m_rqt[cuGeom.depth].tmpResiYuv.getAddrOffset(absPartIdx, mode.predYuv.m_size) % 64 == 0;
bool bufferAlignCheck = (reconQtStride % 64 == 0) && (stride % 64 == 0) && reconQtYuvAlign && predAlign && residualAlign;
//重構recon = pred + resi
primitives.cu[sizeIdx].add_ps[bufferAlignCheck](reconQt, reconQtStride, pred, residual, stride, stride);
}
else
// no coded residual, recon = pred,將pred輸出到recon中
primitives.cu[sizeIdx].copy_pp(reconQt, reconQtStride, pred, stride);
//記錄CBF
bCBF = !!numSig << tuDepth;
//設置CBF
cu.setCbfSubParts(bCBF, TEXT_LUMA, absPartIdx, fullDepth);
//根據fecn和recon來計算sse失真
fullCost.distortion = primitives.cu[sizeIdx].sse_pp(reconQt, reconQtStride, fenc, stride);
/*
到這裏已經計算了嚴格意義上的distortion(fenc , recon)
*/
//重置bits
m_entropyCoder.resetBits();
if (!absPartIdx)
{
//若非Islice
if (!cu.m_slice->isIntra())
{
//若允許旁路trans和quan,則編碼bypass flag
if (cu.m_slice->m_pps->bTransquantBypassEnabled)
m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
//編碼skip flag
m_entropyCoder.codeSkipFlag(cu, 0);
//編碼幀內預測方向
m_entropyCoder.codePredMode(cu.m_predMode[0]);
}
//編碼partSize
m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
}
/* 編碼幀內預測方向 */
//若當前CU爲SIZE_2Nx2N,則只需要編碼一個方向
if (cu.m_partSize[0] == SIZE_2Nx2N)
{
if (!absPartIdx)
m_entropyCoder.codeIntraDirLumaAng(cu, 0, false);
}
//若非SIZE_2Nx2N,則需要編碼四個PU的方向。。。還沒理清楚
else
{
uint32_t qNumParts = cuGeom.numPartitions >> 2;
//若initTuDepth = 0
if (!tuDepth)
{
for (uint32_t qIdx = 0; qIdx < 4; ++qIdx)
m_entropyCoder.codeIntraDirLumaAng(cu, qIdx * qNumParts, false);
}
else if (!(absPartIdx & (qNumParts - 1)))
m_entropyCoder.codeIntraDirLumaAng(cu, absPartIdx, false);
}
//若當前TUsize不是允許的最小size,則編碼subDivFlag = false
if (log2TrSize != depthRange[0])
m_entropyCoder.codeTransformSubdivFlag(0, 5 - log2TrSize);
//編碼cbf
m_entropyCoder.codeQtCbfLuma(!!numSig, tuDepth);
//若有cbf,即有殘差,則編碼殘差
if (cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth))
m_entropyCoder.codeCoeffNxN(cu, coeffY, absPartIdx, log2TrSize, TEXT_LUMA);
//得到前面編碼的bits開銷總和
fullCost.bits = m_entropyCoder.getNumberOfWrittenBits();
//若開啓了rdPenalty,且TUsize爲32x32,且非Islice,則bits翻四倍
if (m_param->rdPenalty && log2TrSize == 5 && m_slice->m_sliceType != I_SLICE)
fullCost.bits *= 4;
//計算根據distortion(fenc,recon)和全部的編碼bits來計算rdcost和enerpy
if (m_rdCost.m_psyRd)
{
fullCost.energy = m_rdCost.psyCost(sizeIdx, fenc, mode.fencYuv->m_size, reconQt, reconQtStride);
fullCost.rdcost = m_rdCost.calcPsyRdCost(fullCost.distortion, fullCost.bits, fullCost.energy);
}
else if(m_rdCost.m_ssimRd)
{
fullCost.energy = m_quant.ssimDistortion(cu, fenc, stride, reconQt, reconQtStride, log2TrSize, TEXT_LUMA, absPartIdx);
fullCost.rdcost = m_rdCost.calcSsimRdCost(fullCost.distortion, fullCost.bits, fullCost.energy);
}
else
fullCost.rdcost = m_rdCost.calcRdCost(fullCost.distortion, fullCost.bits);
}
//if !(mightNotSplit),即一定要split,則fullCcost的rdcost爲max
else
fullCost.rdcost = MAX_INT64;
/*
若可以split,則計算split的cost,即splitCost
*/
if (mightSplit)
{
//若可以不split,則將之前分析不split的上下文先保存下來,再恢復沒計算split之前的上下文
if (mightNotSplit)
{
//保存熵編碼上下文到rqtTest中
m_entropyCoder.store(m_rqt[fullDepth].rqtTest); // save state after full TU encode
//重新加載rqtRoot的熵編碼上下文
m_entropyCoder.load(m_rqt[fullDepth].rqtRoot); // prep state of split encode
}
/* code split block */
uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
//是否跳過transForm
int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && (log2TrSize - 1) <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0];
if (m_param->bEnableTSkipFast)
checkTransformSkip &= cu.m_partSize[0] != SIZE_2Nx2N;
Cost splitCost;
uint32_t cbf = 0;
//遍歷四個TU
for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
{
//遞歸調用對四個split出來的TU進行殘差編碼
if (checkTransformSkip)
codeIntraLumaTSkip(mode, cuGeom, tuDepth + 1, qPartIdx, splitCost);
else
codeIntraLumaQT(mode, cuGeom, tuDepth + 1, qPartIdx, bAllowSplit, splitCost, depthRange);
//merge四個TU的cbf
cbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
}
//cbf[plane][absPartIdx] 存儲cbf
cu.m_cbf[0][absPartIdx] |= (cbf << tuDepth);
//若可以不split,且TUsize不是所允許的最小size
if (mightNotSplit && log2TrSize != depthRange[0])
{
/* If we could have coded this TU depth, include cost of subdiv flag */
//重置bits
m_entropyCoder.resetBits();
//編碼subDivFlag = true
m_entropyCoder.codeTransformSubdivFlag(1, 5 - log2TrSize);
//累加subDivFlag的bits
splitCost.bits += m_entropyCoder.getNumberOfWrittenBits();
//計算rdcost
if (m_rdCost.m_psyRd)
splitCost.rdcost = m_rdCost.calcPsyRdCost(splitCost.distortion, splitCost.bits, splitCost.energy);
else if(m_rdCost.m_ssimRd)
splitCost.rdcost = m_rdCost.calcSsimRdCost(splitCost.distortion, splitCost.bits, splitCost.energy);
else
splitCost.rdcost = m_rdCost.calcRdCost(splitCost.distortion, splitCost.bits);
} //end of if (mightNotSplit && log2TrSize != depthRange[0])
/*
對比notSplit和split的cost,最優存儲兩者較優的
rdcost、distortion、bits、enery、transform、cbf等信息
*/
//若split的rdcost < full的rdcost,則更新,return結束
if (splitCost.rdcost < fullCost.rdcost)
{
outCost.rdcost += splitCost.rdcost;
outCost.distortion += splitCost.distortion;
outCost.bits += splitCost.bits;
outCost.energy += splitCost.energy;
return;
}
//若full的rdcost < split的rdcost
else
{
// recover entropy state of full-size TU encode 恢復notSplit的上下文
m_entropyCoder.load(m_rqt[fullDepth].rqtTest);
// recover transform index and Cbf values 恢復transform indx 和 cbf
cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
cu.setCbfSubParts(bCBF, TEXT_LUMA, absPartIdx, fullDepth);
cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
}
} //end of if (mightSplit)
/* set reconstruction for next intra prediction blocks if full TU prediction won
若最後notSplit較優(split優的話執行不到這裏),恢復recon的YUV數據,並存儲下來 */
PicYuv* reconPic = m_frame->m_reconPic;
pixel* picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
intptr_t picStride = reconPic->m_stride;
primitives.cu[sizeIdx].copy_pp(picReconY, picStride, reconQt, reconQtStride);
//結算cost
outCost.rdcost += fullCost.rdcost;
outCost.distortion += fullCost.distortion;
outCost.bits += fullCost.bits;
outCost.energy += fullCost.energy;
}
Search::codeIntraLumaQT()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.