estIntraPredLumaQT函數用作從0-66個intra modes中選出最佳模式(uiBestPUMode)
Void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner )
{
CodingStructure &cs = *cu.cs;
const SPS &sps = *cs.sps;
//printf("pcv=%d WidthBit=%d\n", cs.pcv->rectCUs,uiWidthBit);
//cs.pcv->rectCUs = 1
//當前CU的寬、高
//cu.lwidth
const UInt uiWidthBit = cs.pcv->rectCUs ? g_aucLog2[partitioner.currArea().lwidth() ] : CU::getIntraSizeIdx(cu);
const UInt uiHeightBit = g_aucLog2[partitioner.currArea().lheight()];
#if HEVC_USE_PART_SIZE
const UInt uiNumPU = CU::getNumPUs( cu );
#endif
// Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantization divisor is 1.
// 取得 Lambda, 該參數用作率失真優化
const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda(cu.transQuantBypass) / double(1 << SCALE_BITS);
//===== loop over partitions =====
// 用作CABAC編碼
const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() );
const TempCtx ctxStartIntraMode ( m_CtxCache, SubCtx( Ctx::IPredMode[CHANNEL_TYPE_LUMA], m_CABACEstimator->getCtx() ) );
#if HEVC_USE_PART_SIZE
if( uiNumPU > 1 ) partitioner.splitCurrArea( TU_QUAD_SPLIT, cs );
#endif
CHECK( !cu.firstPU, "CU has no PUs" );
const bool keepResi = cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() || KEEP_PRED_AND_RESI_SIGNALS;
//lisx
//printf("keepResi=%d\n", keepResi); allways is 0 false
UInt extraModes = 0; // add two extra modes, which would be used after uiMode <= DC_IDX is removed for cu.nsstIdx == 3
#if HEVC_USE_PART_SIZE
UInt puIndex = 0;
#endif
//這三個list 用做記錄每個參與模式選擇的mode的 Cost值等,
static_vector<UInt, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList;
static_vector<Double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
static_vector<Double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
#if HEVC_USE_PART_SIZE
for( auto &pu : CU::traversePUs( cu ) )
#else
auto &pu = *cu.firstPU;
#endif
{
CandHadList.clear();
CandCostList.clear();
uiHadModeList.clear();
CHECK(pu.cu != &cu, "PU is not contained in the CU");
//===== determine set of modes to be tested (using prediction signal only) =====
Int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
static_vector< UInt, FAST_UDI_MAX_RDMODE_NUM > uiRdModeList;
Int numModesForFullRD = 3;
//lisx
//printf("pcv=%d WidthBit=%d\n", cs.pcv->rectCUs,uiWidthBit);
if( cs.pcv->rectCUs )
{
//有多少個Modes參與FullRD
numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2];
}
else
{
numModesForFullRD = m_pcEncCfg->getFastUDIUseMPMEnabled() ? g_aucIntraModeNumFast_UseMPM[uiWidthBit] : g_aucIntraModeNumFast_NotUseMPM[uiWidthBit];
//printf("test cs.pcv->rectCUs\n");
}
#if INTRA_FULL_SEARCH
numModesForFullRD = numModesAvailable;
#endif
{
// this should always be true
CHECK( !pu.Y().valid(), "PU is not valid" );
//===== init pattern for luma prediction =====
initIntraPatternChType( cu, pu.Y(), IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, pu, false, pu ) );
if( numModesForFullRD != numModesAvailable )
{
CHECK( numModesForFullRD >= numModesAvailable, "Too many modes for full RD search" );
const CompArea &area = pu.Y();
PelBuf piOrg = cs.getOrgBuf(area); //Original 原始圖像
PelBuf piPred = cs.getPredBuf(area); //Prediction 預測圖像
DistParam distParam; //
const Bool bUseHadamard = cu.transQuantBypass == 0;
//set distortion 參數?
m_pcRdCost->setDistParam(distParam, piOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
distParam.applyWeight = false;
bool bSatdChecked[NUM_INTRA_MODE];
memset( bSatdChecked, 0, sizeof( bSatdChecked ) );
{
for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
{
UInt uiMode = modeIdx;
Distortion uiSad = 0;
// Skip checking extended Angular modes in the first round of SATD
// mode未單數時 continue
if( uiMode > DC_IDX && ( uiMode & 1 ) )
{
continue;
}
bSatdChecked[uiMode] = true;
pu.intraDir[0] = modeIdx;
if( useDPCMForFirstPassIntraEstimation( pu, uiMode ) ) //PCM這個模式暫不考慮
{
encPredIntraDPCM( COMPONENT_Y, piOrg, piPred, uiMode );
}
else //預測
{
predIntraAng( COMPONENT_Y, piPred, pu, IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, pu, true, pu ) );
}
// use Hadamard transform here
//變換
uiSad += distParam.distFunc(distParam);
// NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
m_CABACEstimator->getCtx() = SubCtx( Ctx::IPredMode[CHANNEL_TYPE_LUMA], ctxStartIntraMode );
//編碼
UInt64 fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
Double cost = ( Double ) uiSad + ( Double ) fracModeBits * sqrtLambdaForFirstPass;
DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", uiSad, fracModeBits, cost, uiMode );
updateCandList( uiMode, cost, uiRdModeList, CandCostList, numModesForFullRD + extraModes );
updateCandList( uiMode, uiSad, uiHadModeList, CandHadList, 3 + extraModes );
}
} // NSSTFlag
// forget the extra modes
uiRdModeList.resize( numModesForFullRD ); // uiRdModeList數組大小爲numModesForFullRD
if( m_pcEncCfg->getFastUDIUseMPMEnabled() ) //如果MPM mode不在 則追加上
{
unsigned numMPMs = pu.cs->pcv->numMPMs;
unsigned *uiPreds = ( unsigned* ) alloca( numMPMs * sizeof( unsigned ) );
const Int numCand = PU::getIntraMPMs( pu, uiPreds );
for( Int j = 0; j < numCand; j++ )
{
Bool mostProbableModeIncluded = false;
Int mostProbableMode = uiPreds[j];
for( Int i = 0; i < numModesForFullRD; i++ )
{
mostProbableModeIncluded |= ( mostProbableMode == uiRdModeList[i] );
}
if( !mostProbableModeIncluded )
{
numModesForFullRD++;
uiRdModeList.push_back( mostProbableMode );
}
}
}
}
else
{
for( Int i = 0; i < numModesForFullRD; i++ )
{
uiRdModeList.push_back( i );
//printf("testprint numModesForFullRD\n");
}
}
}
CHECK( numModesForFullRD != uiRdModeList.size(), "Inconsistent state!" );
// after this point, don't use numModesForFullRD
// PBINTRA fast
//這個if略過暫不考慮
if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && cu.partSize == SIZE_2Nx2N && uiRdModeList.size() < numModesAvailable )
{
if( CandHadList.size() < 3 || CandHadList[2] > cs.interHad * PBINTRA_RATIO )
{
uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 2 ) );
}
if( CandHadList.size() < 2 || CandHadList[1] > cs.interHad * PBINTRA_RATIO )
{
uiRdModeList.resize( std::min<size_t>( uiRdModeList.size(), 1 ) );
}
if( CandHadList.size() < 1 || CandHadList[0] > cs.interHad * PBINTRA_RATIO )
{
cs.dist = MAX_UINT;
cs.interHad = 0;
//===== reset context models =====
m_CABACEstimator->getCtx() = SubCtx( Ctx::IPredMode [CHANNEL_TYPE_LUMA], ctxStartIntraMode );
return;
}
}
//===== check modes (using r-d costs) =====
//進入到RDO選擇
#if ENABLE_RQT_INTRA_SPEEDUP_MOD
UInt uiSecondBestMode = MAX_UINT;
Double dSecondBestPUCost = MAX_DOUBLE;
#endif
UInt uiBestPUMode = 0;
CodingStructure *csTemp = m_pTempCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
CodingStructure *csBest = m_pBestCS[gp_sizeIdxInfo->idxFrom( cu.lwidth() )][gp_sizeIdxInfo->idxFrom( cu.lheight() )];
csTemp->slice = cs.slice;
csBest->slice = cs.slice;
csTemp->initStructData();
csBest->initStructData();
// just to be sure
numModesForFullRD = ( int ) uiRdModeList.size();
for (UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++)
{
// set luma prediction mode
UInt uiOrgMode = uiRdModeList[uiMode];
pu.intraDir[0] = uiOrgMode;
// set context models
m_CABACEstimator->getCtx() = ctxStart;
// determine residual for partition
cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true );
#if ENABLE_RQT_INTRA_SPEEDUP
xRecurIntraCodingLumaQT( *csTemp, partitioner, true );
#else
xRecurIntraCodingLumaQT( *csTemp, partitioner );
#endif
DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost T %f (%d) \n", csTemp->cost, uiOrgMode );
// check r-d cost
if( csTemp->cost < csBest->cost )
{
std::swap( csTemp, csBest );
#if ENABLE_RQT_INTRA_SPEEDUP_MOD
uiSecondBestMode = uiBestPUMode;
dSecondBestPUCost = csTemp->cost;
#endif
uiBestPUMode = uiOrgMode;
}
#if ENABLE_RQT_INTRA_SPEEDUP_MOD
else if( csTemp->cost < dSecondBestPUCost )
{
uiSecondBestMode = uiOrgMode;
dSecondBestPUCost = csTemp->cost;
}
#endif
csTemp->releaseIntermediateData();
} // Mode loop
#if HEVC_USE_RQT
// don't need to run full depth search - with QTBT there is only tr depth 0
if( !cs.pcv->noRQT && pu.lwidth() > MIN_TU_SIZE )
{
#if ENABLE_RQT_INTRA_SPEEDUP
#if ENABLE_RQT_INTRA_SPEEDUP_MOD
for( UInt ui = 0; ui < 2; ++ui )
#endif
{
#if ENABLE_RQT_INTRA_SPEEDUP_MOD
UInt uiOrgMode = ui ? uiSecondBestMode : uiBestPUMode;
cu.diffFilterIdx = ui ? uiSecondBestDiffFilter : uiBestDiffFilter;
if( pu.cu->intra_NN )
{
pu.intraNN_Mode_True = m_modeListNN[uiOrgMode];
}
if( uiOrgMode == MAX_UINT )
{
break;
}
#else
UInt uiOrgMode = uiBestPUMode;
#endif
pu.intraDir[0] = uiOrgMode;
// set context models
m_CABACEstimator->getCtx() = ctxStart;
// determine residual for partition
cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true );
xRecurIntraCodingLumaQT( *csTemp, partitioner, false );
DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost F %f (%d) \n", csTemp->cost, uiOrgMode );
// check r-d cost
if( csTemp->cost < csBest->cost )
{
std::swap( csTemp, csBest );
uiBestPUMode = uiOrgMode;
}
csTemp->releaseIntermediateData();
} // Mode loop
#endif
}
#endif
//存儲本次劃分(本層)信息(bit, distortion,Resi)
cs.useSubStructure( *csBest, partitioner.chType, pu.singleChan( CHANNEL_TYPE_LUMA ), KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, keepResi );
csBest->releaseIntermediateData();
//=== update PU data ====
#if HEVC_USE_PART_SIZE
if( uiNumPU > 1 ) partitioner.nextPart( cs );
puIndex = puIndex + 1;
#endif
pu.intraDir[0] = uiBestPUMode;
}
#if HEVC_USE_PART_SIZE
if (uiNumPU > 1)
{
partitioner.exitCurrSplit();
Bool cbf[3] = { false, false, false };
for (const auto &ptu : cs.tus)
{
cbf[0] |= TU::getCbfAtDepth(*ptu, COMPONENT_Y, 1);
}
for (auto &ptu : cs.tus)
{
TU::setCbfAtDepth(*ptu, COMPONENT_Y, 0, cbf[0] ? 1 : 0);
}
}
#endif
//===== reset context models =====
//重置?
m_CABACEstimator->getCtx() = ctxStart;
}