Search::encodeResAndCalcRdSkipCU()
void Search::encodeResAndCalcRdSkipCU(Mode& interMode)
{
CUData& cu = interMode.cu;
Yuv* reconYuv = &interMode.reconYuv;
const Yuv* fencYuv = interMode.fencYuv;
Yuv* predYuv = &interMode.predYuv;
X265_CHECK(!cu.isIntra(0), "intra CU not expected\n");
uint32_t depth = cu.m_cuDepth[0];
cu.setPredModeSubParts(MODE_SKIP);
cu.clearCbf();
cu.setTUDepthSubParts(0, 0, depth);
reconYuv->copyFromYuv(interMode.predYuv);
int part = partitionFromLog2Size(cu.m_log2CUSize[0]);
interMode.lumaDistortion = primitives.cu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
interMode.distortion = interMode.lumaDistortion;
if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
{
interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
interMode.distortion += interMode.chromaDistortion;
}
cu.m_distortion[0] = interMode.distortion;
m_entropyCoder.load(m_rqt[depth].cur);
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
m_entropyCoder.codeSkipFlag(cu, 0);
int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits();
m_entropyCoder.codeMergeIndex(cu, 0);
interMode.mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits;
interMode.coeffBits = 0;
interMode.totalBits = interMode.mvBits + skipFlagBits;
if (m_rdCost.m_psyRd)
interMode.psyEnergy = m_rdCost.psyCost(part, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
else if(m_rdCost.m_ssimRd)
interMode.ssimEnergy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size, cu.m_log2CUSize[0], TEXT_LUMA, 0);
interMode.resEnergy = primitives.cu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
updateModeCost(interMode);
m_entropyCoder.store(interMode.contexts);
}
Search::encodeResAndCalcRdInterCU()
void Search::encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom)
{
ProfileCUScope(interMode.cu, interRDOElapsedTime[cuGeom.depth], countInterRDO[cuGeom.depth]);
CUData& cu = interMode.cu;
Yuv* reconYuv = &interMode.reconYuv;
Yuv* predYuv = &interMode.predYuv;
const Yuv* fencYuv = interMode.fencYuv;
uint32_t depth = cuGeom.depth;
ShortYuv* resiYuv = &m_rqt[depth].tmpResiYuv;
X265_CHECK(!cu.isIntra(0), "intra CU not expected\n");
uint32_t log2CUSize = cuGeom.log2CUSize;
int sizeIdx = log2CUSize - 2;
resiYuv->subtract(*fencYuv, *predYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);
uint32_t tuDepthRange[2];
cu.getInterTUQtDepthRange(tuDepthRange, 0);
m_entropyCoder.load(m_rqt[depth].cur);
if ((m_limitTU & X265_TU_LIMIT_DFS) && !(m_limitTU & X265_TU_LIMIT_NEIGH))
m_maxTUDepth = -1;
else if (m_limitTU & X265_TU_LIMIT_BFS)
memset(&m_cacheTU, 0, sizeof(TUInfoCache));
Cost costs;
if (m_limitTU & X265_TU_LIMIT_NEIGH)
{
int32_t tempDepth = m_maxTUDepth;
if (m_maxTUDepth != -1)
{
uint32_t splitFlag = interMode.cu.m_partSize[0] != SIZE_2Nx2N;
uint32_t minSize = tuDepthRange[0];
uint32_t maxSize = tuDepthRange[1];
maxSize = X265_MIN(maxSize, cuGeom.log2CUSize - splitFlag);
m_maxTUDepth = x265_clip3(cuGeom.log2CUSize - maxSize, cuGeom.log2CUSize - minSize, (uint32_t)m_maxTUDepth);
}
estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
m_maxTUDepth = tempDepth;
}
else
estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
uint32_t tqBypass = cu.m_tqBypass[0];
if (!tqBypass)
{
sse_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
{
cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
}
m_entropyCoder.load(m_rqt[depth].cur);
m_entropyCoder.resetBits();
m_entropyCoder.codeQtRootCbfZero();
uint32_t cbf0Bits = m_entropyCoder.getNumberOfWrittenBits();
uint32_t cbf0Energy; uint64_t cbf0Cost;
if (m_rdCost.m_psyRd)
{
cbf0Energy = m_rdCost.psyCost(log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
cbf0Cost = m_rdCost.calcPsyRdCost(cbf0Dist, cbf0Bits, cbf0Energy);
}
else if(m_rdCost.m_ssimRd)
{
cbf0Energy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size, log2CUSize, TEXT_LUMA, 0);
cbf0Cost = m_rdCost.calcSsimRdCost(cbf0Dist, cbf0Bits, cbf0Energy);
}
else
cbf0Cost = m_rdCost.calcRdCost(cbf0Dist, cbf0Bits);
if (cbf0Cost < costs.rdcost)
{
cu.clearCbf();
cu.setTUDepthSubParts(0, 0, depth);
}
}
if (cu.getQtRootCbf(0))
saveResidualQTData(cu, *resiYuv, 0, 0);
m_entropyCoder.load(m_rqt[depth].cur);
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
m_entropyCoder.codeCUTransquantBypassFlag(tqBypass);
uint32_t coeffBits, bits, mvBits;
if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))
{
cu.setPredModeSubParts(MODE_SKIP);
coeffBits = mvBits = 0;
m_entropyCoder.codeSkipFlag(cu, 0);
int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits();
m_entropyCoder.codeMergeIndex(cu, 0);
mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits;
bits = mvBits + skipFlagBits;
}
else
{
m_entropyCoder.codeSkipFlag(cu, 0);
int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits();
m_entropyCoder.codePredMode(cu.m_predMode[0]);
m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
m_entropyCoder.codePredInfo(cu, 0);
mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits;
bool bCodeDQP = m_slice->m_pps->bUseDQP;
m_entropyCoder.codeCoeff(cu, 0, bCodeDQP, tuDepthRange);
bits = m_entropyCoder.getNumberOfWrittenBits();
coeffBits = bits - mvBits - skipFlagBits;
}
m_entropyCoder.store(interMode.contexts);
if (cu.getQtRootCbf(0))
reconYuv->addClip(*predYuv, *resiYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);
else
reconYuv->copyFromYuv(*predYuv);
sse_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
interMode.distortion = bestLumaDist;
if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
{
sse_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
interMode.chromaDistortion = bestChromaDist;
interMode.distortion += bestChromaDist;
}
if (m_rdCost.m_psyRd)
interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
else if(m_rdCost.m_ssimRd)
interMode.ssimEnergy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size, cu.m_log2CUSize[0], TEXT_LUMA, 0);
interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
interMode.totalBits = bits;
interMode.lumaDistortion = bestLumaDist;
interMode.coeffBits = coeffBits;
interMode.mvBits = mvBits;
cu.m_distortion[0] = interMode.distortion;
updateModeCost(interMode);
checkDQP(interMode, cuGeom);
}