Fast CU Depth Decision Algorithm for HEVC Intra Coding

在HEVC 參考代碼中，一個CTU塊通過xcompressCU()函數進行CU遞歸得到最優的CU深度。

遞歸的過程可如下圖(from:Fast CU Splitting and Pruning for Suboptimal CU Partitioning in HEVC Intra Coding)所示。圖中每一個方框表示一個CU塊，方框內的數字表示xcompressCU()函數的執行順序。顯而易見，如果能在做xcompressCU()函數之前，將CU的遞歸深度確定下，顯然可以減小HEVC編碼器的複雜度。

針對幀內編碼器，已經有很多文獻提出了提前確定CU遞歸深度的方法。這裏介紹了"Fast CU Size Decision and Mode Decision Algorithm for HEVC Intra Coding"中Section II.A部分的具體實現。在這篇文獻中，周邊塊的CTU depth size用來給當前塊深度進行預測。具體的細節可以去查看該文獻。

Void TEncCu::compressCU( TComDataCU*& rpcCU )
{
  // initialize CU data
  m_ppcBestCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );
  m_ppcTempCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );

  memset( m_preAnalyzeDepth,       0, rpcCU->getTotalNumPart() );
  memset( m_preAnaDepthDetermined, 0, rpcCU->getTotalNumPart() );
  memset( m_preAnaDepthRange     , 0, rpcCU->getTotalNumPart() );

  // Neighboring CTUs.
  TComDataCU* t_pcCULeft      = rpcCU->getCULeft();
  TComDataCU* t_pcCUAbove     = rpcCU->getCUAbove();
  TComDataCU* t_pcCUAboveLeft = rpcCU->getCUAboveLeft();
  TComDataCU* t_pcCUAboveRight= rpcCU->getCUAboveRight();
  UInt DepthLeft       = 0;       // Max Depth of LeftCTU.
  UInt DepthAbove      = 0;       // Max Depth of AboveCTU.
  UInt DepthAboveLeft  = 0;
  UInt DepthAboveRight = 0;

  UInt picWidth  = rpcCU->getSlice()->getSPS()->getPicWidthInLumaSamples();
  UInt picHeight = rpcCU->getSlice()->getSPS()->getPicHeightInLumaSamples();
  UInt uiLPelX   = rpcCU->getCUPelX();
  UInt uiRPelX   = uiLPelX + rpcCU->getWidth(0)  - 1;
  UInt uiTPelY   = rpcCU->getCUPelY();
  UInt uiBPelY   = uiTPelY + rpcCU->getHeight(0) - 1;
  
  UChar    tDepth;

  m_insidePicture= (uiRPelX<picWidth) && (uiBPelY<picHeight);
  // Considering Border CTUs.
  if ( t_pcCULeft!=NULL ) //獲取左邊CTU塊最大的depth信息
  {
	  for ( Int i=0; i<256; i++ )
	  {
		  tDepth    = t_pcCULeft->getDepth(i);
		  if ( tDepth>DepthLeft )
		  {
			  DepthLeft = (UInt)tDepth;
		  }
	  }
  }
  else
	  DepthLeft = 2; //如果是NULL，直接賦值2(16X16)

  if ( t_pcCUAbove!=NULL )
  {
	  for ( Int i=0; i<256; i++ )
	  {
		  tDepth    = t_pcCUAbove->getDepth(i);
		  if ( tDepth>DepthAbove )
		  {
			  DepthAbove = (UInt)tDepth;
		  }
	  }
  }
  else
	  DepthAbove = 2;

  if ( t_pcCUAboveLeft!=NULL )
  {
	  DepthAboveLeft = t_pcCUAboveLeft->getDepth(g_auiRasterToZscan[16*15+15]);
  }
  else
	  DepthAboveLeft = 2;

  if ( t_pcCUAboveRight!=NULL )
  {
	  DepthAboveRight = t_pcCUAboveRight->getDepth(g_auiRasterToZscan[16*15]);
  }
  else
	  DepthAboveRight = 2;
  
  Double DepthPre = 0.3*DepthLeft+0.3*DepthAbove+0.2*DepthAboveLeft+0.2*DepthAboveRight; // 論文中Prediction Depth Type
  if ( DepthPre<=0.5 ) // 依據論文中的公式給出最小的depth level和最大的depth level
  {
	  memset( m_preAnaDepthDetermined, 1, 256 );
	  memset( m_preAnaDepthRange,      2, 256 );
	  memset( m_preAnalyzeDepth,       0, 256 );
  }
  else if ( DepthPre<=1.5 )
  {
	  memset( m_preAnaDepthDetermined, 1, 256 );
	  memset( m_preAnaDepthRange,      3, 256 );
	  memset( m_preAnalyzeDepth,       0, 256 );
  }
  else
  {
	  memset( m_preAnaDepthDetermined, 1, 256 );
	  memset( m_preAnaDepthRange,      3, 256 );
	  memset( m_preAnalyzeDepth,       1, 256 );
  }

  DEBUG_STRING_NEW(sDebug)

  xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 DEBUG_STRING_PASS_INTO(sDebug) );
  DEBUG_STRING_OUTPUT(std::cout, sDebug)
  // Double Check.
  UInt MaxDepthSize=0;
  // UInt CTUPelX, CTUPelY;
  if ( m_insidePicture )
  {
	  for ( Int i=0; i<256; i++ )
	  {
		  // Decisioned.
		  tDepth    = m_ppcBestCU[0]->getDepth(i);

		  UChar cuDepth         = m_preAnalyzeDepth[i];
		  UChar cuPreDetermined = m_preAnaDepthDetermined[i];
		  UChar cuRange         = m_preAnaDepthRange[i];

		  if ( tDepth<cuDepth && tDepth>=cuDepth+cuRange )
		  {
			  assert(0);
		  }
	  }
  }

#if ADAPTIVE_QP_SELECTION
  if( m_pcEncCfg->getUseAdaptQpSelect() )
  {
    if(rpcCU->getSlice()->getSliceType()!=I_SLICE) //IIII
    {
      xLcuCollectARLStats( rpcCU);
    }
  }
#endif
}

在xcompressCU函數中加入相關條件跳轉。

 // If slice start or slice end is within this cu...
  TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx());
  Bool bSliceStart = pcSlice->getSliceSegmentCurStartCUAddr()>rpcTempCU->getSCUAddr()&&pcSlice->getSliceSegmentCurStartCUAddr()<rpcTempCU->getSCUAddr()+rpcTempCU->getTotalNumPart();
  Bool bSliceEnd = (pcSlice->getSliceSegmentCurEndCUAddr()>rpcTempCU->getSCUAddr()&&pcSlice->getSliceSegmentCurEndCUAddr()<rpcTempCU->getSCUAddr()+rpcTempCU->getTotalNumPart());
  Bool bInsidePicture = ( uiRPelX < rpcBestCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiBPelY < rpcBestCU->getSlice()->getSPS()->getPicHeightInLumaSamples() );
  // Fast CU decision Process.
  // When Current depth is not in the PreAnalyzedDepth Range, it just skips the PU/TU Decision process.
  // Added by xfHuang.
  Bool t_enCUSkip=false;
  if ( m_insidePicture )
  {
	  // Split Analysis For CU32X32 And CU16X16.

	  if ( checkCurDepthInPreAnaRange( rpcBestCU, uiDepth ) == false ) //如果當前的depth level不在預測的depth level之內，後面直接將cost賦值成最大，不進行後面的預測操作。
	  {
		  t_enCUSkip = true;
		  rpcBestCU->getTotalCost() = MAX_DOUBLE/16;
		  rpcBestCU->getTotalDistortion() = MAX_UINT>>3;
		  rpcBestCU->getTotalBits() = MAX_UINT>>3;
		  // avoid assert disable.
		  if ( uiDepth==3 )
		  {
			  rpcBestCU->setPartitionSize ( 0, SIZE_2Nx2N );      
			  rpcBestCU->setPredictionMode( 0, MODE_INTRA );	
		  }
	  }
  }
  
  // We need to split, so don't try these modes.
  if(!bSliceEnd && !bSliceStart && bInsidePicture )
  {
	if( t_enCUSkip==false )
	{
    for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
    {
      const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);

      if (bIsLosslessMode)
      {
        iQP = lowestQP;
      }

      rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );

其中checkCurDepthInPreAnaRange函數如下：

Bool TEncCu::checkCurDepthInPreAnaRange( TComDataCU*& pCU, UInt uidepth )
{
	UChar cuDepth         = m_preAnalyzeDepth[pCU->getZorderIdxInCU()];
	UChar cuPreDetermined = m_preAnaDepthDetermined[pCU->getZorderIdxInCU()];
	UChar cuRange         = m_preAnaDepthRange[pCU->getZorderIdxInCU()];
	assert(cuDepth+cuRange<=5);
	if ( /*cuPreDetermined &&*/ uidepth>=cuDepth && uidepth<cuDepth+cuRange )
	{
		return true;
	}
	else
	{
		return false;
	}
}

以上是一種基於周邊CTU塊信息來進行CU深度優化的一種方法。這個方法對於大部分來說只是不做64X64這一層depth，因此性能損失很小，平均大概在0.2%左右。時間可以節省10%左右。

[轉載請註明作者和出處]