在HEVC 參考代碼中,一個CTU塊通過xcompressCU()函數進行CU遞歸得到最優的CU深度。
遞歸的過程可如下圖(from:Fast CU Splitting and Pruning for Suboptimal CU Partitioning in HEVC Intra Coding)所示。圖中每一個方框表示一個CU塊,方框內的數字表示xcompressCU()函數的執行順序。顯而易見,如果能在做xcompressCU()函數之前,將CU的遞歸深度確定下,顯然可以減小HEVC編碼器的複雜度。
針對幀內編碼器,已經有很多文獻提出了提前確定CU遞歸深度的方法。這裏介紹了"Fast CU Size Decision and Mode Decision Algorithm for HEVC Intra Coding"中Section II.A部分的具體實現。在這篇文獻中,周邊塊的CTU depth size用來給當前塊深度進行預測。具體的細節可以去查看該文獻。
Void TEncCu::compressCU( TComDataCU*& rpcCU )
{
// initialize CU data
m_ppcBestCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );
m_ppcTempCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );
memset( m_preAnalyzeDepth, 0, rpcCU->getTotalNumPart() );
memset( m_preAnaDepthDetermined, 0, rpcCU->getTotalNumPart() );
memset( m_preAnaDepthRange , 0, rpcCU->getTotalNumPart() );
// Neighboring CTUs.
TComDataCU* t_pcCULeft = rpcCU->getCULeft();
TComDataCU* t_pcCUAbove = rpcCU->getCUAbove();
TComDataCU* t_pcCUAboveLeft = rpcCU->getCUAboveLeft();
TComDataCU* t_pcCUAboveRight= rpcCU->getCUAboveRight();
UInt DepthLeft = 0; // Max Depth of LeftCTU.
UInt DepthAbove = 0; // Max Depth of AboveCTU.
UInt DepthAboveLeft = 0;
UInt DepthAboveRight = 0;
UInt picWidth = rpcCU->getSlice()->getSPS()->getPicWidthInLumaSamples();
UInt picHeight = rpcCU->getSlice()->getSPS()->getPicHeightInLumaSamples();
UInt uiLPelX = rpcCU->getCUPelX();
UInt uiRPelX = uiLPelX + rpcCU->getWidth(0) - 1;
UInt uiTPelY = rpcCU->getCUPelY();
UInt uiBPelY = uiTPelY + rpcCU->getHeight(0) - 1;
UChar tDepth;
m_insidePicture= (uiRPelX<picWidth) && (uiBPelY<picHeight);
// Considering Border CTUs.
if ( t_pcCULeft!=NULL ) //獲取左邊CTU塊最大的depth信息
{
for ( Int i=0; i<256; i++ )
{
tDepth = t_pcCULeft->getDepth(i);
if ( tDepth>DepthLeft )
{
DepthLeft = (UInt)tDepth;
}
}
}
else
DepthLeft = 2; //如果是NULL,直接賦值2(16X16)
if ( t_pcCUAbove!=NULL )
{
for ( Int i=0; i<256; i++ )
{
tDepth = t_pcCUAbove->getDepth(i);
if ( tDepth>DepthAbove )
{
DepthAbove = (UInt)tDepth;
}
}
}
else
DepthAbove = 2;
if ( t_pcCUAboveLeft!=NULL )
{
DepthAboveLeft = t_pcCUAboveLeft->getDepth(g_auiRasterToZscan[16*15+15]);
}
else
DepthAboveLeft = 2;
if ( t_pcCUAboveRight!=NULL )
{
DepthAboveRight = t_pcCUAboveRight->getDepth(g_auiRasterToZscan[16*15]);
}
else
DepthAboveRight = 2;
Double DepthPre = 0.3*DepthLeft+0.3*DepthAbove+0.2*DepthAboveLeft+0.2*DepthAboveRight; // 論文中Prediction Depth Type
if ( DepthPre<=0.5 ) // 依據論文中的公式給出最小的depth level和最大的depth level
{
memset( m_preAnaDepthDetermined, 1, 256 );
memset( m_preAnaDepthRange, 2, 256 );
memset( m_preAnalyzeDepth, 0, 256 );
}
else if ( DepthPre<=1.5 )
{
memset( m_preAnaDepthDetermined, 1, 256 );
memset( m_preAnaDepthRange, 3, 256 );
memset( m_preAnalyzeDepth, 0, 256 );
}
else
{
memset( m_preAnaDepthDetermined, 1, 256 );
memset( m_preAnaDepthRange, 3, 256 );
memset( m_preAnalyzeDepth, 1, 256 );
}
DEBUG_STRING_NEW(sDebug)
xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 DEBUG_STRING_PASS_INTO(sDebug) );
DEBUG_STRING_OUTPUT(std::cout, sDebug)
// Double Check.
UInt MaxDepthSize=0;
// UInt CTUPelX, CTUPelY;
if ( m_insidePicture )
{
for ( Int i=0; i<256; i++ )
{
// Decisioned.
tDepth = m_ppcBestCU[0]->getDepth(i);
UChar cuDepth = m_preAnalyzeDepth[i];
UChar cuPreDetermined = m_preAnaDepthDetermined[i];
UChar cuRange = m_preAnaDepthRange[i];
if ( tDepth<cuDepth && tDepth>=cuDepth+cuRange )
{
assert(0);
}
}
}
#if ADAPTIVE_QP_SELECTION
if( m_pcEncCfg->getUseAdaptQpSelect() )
{
if(rpcCU->getSlice()->getSliceType()!=I_SLICE) //IIII
{
xLcuCollectARLStats( rpcCU);
}
}
#endif
}
在xcompressCU函數中加入相關條件跳轉。
// If slice start or slice end is within this cu...
TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx());
Bool bSliceStart = pcSlice->getSliceSegmentCurStartCUAddr()>rpcTempCU->getSCUAddr()&&pcSlice->getSliceSegmentCurStartCUAddr()<rpcTempCU->getSCUAddr()+rpcTempCU->getTotalNumPart();
Bool bSliceEnd = (pcSlice->getSliceSegmentCurEndCUAddr()>rpcTempCU->getSCUAddr()&&pcSlice->getSliceSegmentCurEndCUAddr()<rpcTempCU->getSCUAddr()+rpcTempCU->getTotalNumPart());
Bool bInsidePicture = ( uiRPelX < rpcBestCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiBPelY < rpcBestCU->getSlice()->getSPS()->getPicHeightInLumaSamples() );
// Fast CU decision Process.
// When Current depth is not in the PreAnalyzedDepth Range, it just skips the PU/TU Decision process.
// Added by xfHuang.
Bool t_enCUSkip=false;
if ( m_insidePicture )
{
// Split Analysis For CU32X32 And CU16X16.
if ( checkCurDepthInPreAnaRange( rpcBestCU, uiDepth ) == false ) //如果當前的depth level不在預測的depth level之內,後面直接將cost賦值成最大,不進行後面的預測操作。
{
t_enCUSkip = true;
rpcBestCU->getTotalCost() = MAX_DOUBLE/16;
rpcBestCU->getTotalDistortion() = MAX_UINT>>3;
rpcBestCU->getTotalBits() = MAX_UINT>>3;
// avoid assert disable.
if ( uiDepth==3 )
{
rpcBestCU->setPartitionSize ( 0, SIZE_2Nx2N );
rpcBestCU->setPredictionMode( 0, MODE_INTRA );
}
}
}
// We need to split, so don't try these modes.
if(!bSliceEnd && !bSliceStart && bInsidePicture )
{
if( t_enCUSkip==false )
{
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
{
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);
if (bIsLosslessMode)
{
iQP = lowestQP;
}
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
其中checkCurDepthInPreAnaRange函數如下:
Bool TEncCu::checkCurDepthInPreAnaRange( TComDataCU*& pCU, UInt uidepth )
{
UChar cuDepth = m_preAnalyzeDepth[pCU->getZorderIdxInCU()];
UChar cuPreDetermined = m_preAnaDepthDetermined[pCU->getZorderIdxInCU()];
UChar cuRange = m_preAnaDepthRange[pCU->getZorderIdxInCU()];
assert(cuDepth+cuRange<=5);
if ( /*cuPreDetermined &&*/ uidepth>=cuDepth && uidepth<cuDepth+cuRange )
{
return true;
}
else
{
return false;
}
}
以上是一種基於周邊CTU塊信息來進行CU深度優化的一種方法。這個方法對於大部分來說只是不做64X64這一層depth,因此性能損失很小,平均大概在0.2%左右。時間可以節省10%左右。
[轉載請註明作者和出處]