int main(int argc, char* argv[])
{
TAppEncTop cTAppEncTop;
// print information
fprintf( stdout, "\n" );
fprintf( stdout, "HM software: Encoder Version [%s]", NV_VERSION );
fprintf( stdout, NVM_ONOS );
fprintf( stdout, NVM_COMPILEDBY );
fprintf( stdout, NVM_BITS );
fprintf( stdout, "\n" );
// create application encoder class
cTAppEncTop.create();
// parse configuration
try
{
if(!cTAppEncTop.parseCfg( argc, argv ))
{
cTAppEncTop.destroy();
return 1;
}
}
catch (po::ParseFailure& e)
{
cerr << "Error parsing option \""<< e.arg <<"\" with argument \""<< e.val <<"\"." << endl;
return 1;
}
// starting time
double dResult;
long lBefore = clock();
// call encoding function
cTAppEncTop.encode();
// ending time
dResult = (double)(clock()-lBefore) / CLOCKS_PER_SEC;
printf("\n Total Time: %12.3f sec.\n", dResult);
// destroy application encoder class
cTAppEncTop.destroy();
return 0;
}
可以很清楚地看到,整個main函數非常簡潔清晰,主要可以分爲幾大部分,分別是輸入軟件信息、創建編碼器類的實例、解析配置文件、獲取開始時間、編碼數據、計算耗費時間和銷燬編碼器類的實例幾大部分。我們主要關心的編碼過程僅通過調用編碼器實例的一個方法實現:
// call encoding function
cTAppEncTop.encode();
該函數的實現如下:
Void TAppEncTop::encode()
{
fstream bitstreamFile(m_pchBitstreamFile, fstream::binary | fstream::out);
if (!bitstreamFile)
{
fprintf(stderr, "\nfailed to open bitstream file `%s' for writing\n", m_pchBitstreamFile);
exit(EXIT_FAILURE);
}
TComPicYuv* pcPicYuvOrg = new TComPicYuv;
TComPicYuv* pcPicYuvRec = NULL;
// initialize internal class & member variables
xInitLibCfg();
xCreateLib();
xInitLib();
// main encoder loop
Int iNumEncoded = 0;
Bool bEos = false;
list<AccessUnit> outputAccessUnits; ///< list of access units to write out. is populated by the encoding process
// allocate original YUV buffer
pcPicYuvOrg->create( m_iSourceWidth, m_iSourceHeight, m_uiMaxCUWidth, m_uiMaxCUHeight, m_uiMaxCUDepth );
while ( !bEos )
{
// get buffers
xGetBuffer(pcPicYuvRec);
// read input YUV file
m_cTVideoIOYuvInputFile.read( pcPicYuvOrg, m_aiPad );
// increase number of received frames
m_iFrameRcvd++;
bEos = (m_iFrameRcvd == m_framesToBeEncoded);
Bool flush = 0;
// if end of file (which is only detected on a read failure) flush the encoder of any queued pictures
if (m_cTVideoIOYuvInputFile.isEof())
{
flush = true;
bEos = true;
m_iFrameRcvd--;
m_cTEncTop.setFramesToBeEncoded(m_iFrameRcvd);
}
// call encoding function for one frame
m_cTEncTop.encode( bEos, flush ? 0 : pcPicYuvOrg, m_cListPicYuvRec, outputAccessUnits, iNumEncoded );
// write bistream to file if necessary
if ( iNumEncoded > 0 )
{
xWriteOutput(bitstreamFile, iNumEncoded, outputAccessUnits);
outputAccessUnits.clear();
}
}
m_cTEncTop.printSummary();
// delete original YUV buffer
pcPicYuvOrg->destroy();
delete pcPicYuvOrg;
pcPicYuvOrg = NULL;
// delete used buffers in encoder class
m_cTEncTop.deletePicBuffer();
// delete buffers & classes
xDeleteBuffer();
xDestroyLib();
printRateSummary();
return;
}
該函數中首先調用pcPicYuvOrg->create( m_iSourceWidth, m_iSourceHeight, m_uiMaxCUWidth, m_uiMaxCUHeight, m_uiMaxCUDepth )分配YUV數據緩存,然後再while循環中逐幀讀取YUV數據、設置當前以編碼的幀數、編碼當前幀、寫出碼流,隨後做其他清理工作。核心功能實現在m_cTEncTop.encode( bEos, flush ? 0 : pcPicYuvOrg, m_cListPicYuvRec, outputAccessUnits, iNumEncoded )函數中。在該函數中調用m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, accessUnitsOut)進行編碼一個GOP的操作。這個函數奇長無比,用了接近1500行代碼,看來實現了很多很多很多的功能。這個碉堡了的函數究竟做了些啥事兒呢?這個函數中大部分內容就是在爲了編碼當前slice做準備,以及編碼完成之後一些輔助操作。實際編碼過程的操作由以下函數m_pcSliceEncoder->compressSlice( pcPic )實現。
這又是一個碉堡了的函數,佔了將近400行……代碼就不貼了,會死人的……簡單看下好了。
首先還是各種編碼的配置,包括配置熵編碼器、初始化CU編碼器等。在完成了一長串的設置之後,在compressCU函數中實現對一個CU的編碼:
m_pcCuEncoder->compressCU( pcCU );
在一個compressSlice()中,在compressCU函數中實現對一個CU的編碼,其中主要進行了CU的初始化,以及實際的編碼操作。Void TEncCu::compressCU( TComDataCU*& rpcCU )
{
// initialize CU data
m_ppcBestCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );
m_ppcTempCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );
#if RATE_CONTROL_LAMBDA_DOMAIN
m_addSADDepth = 0;
m_LCUPredictionSAD = 0;
m_temporalSAD = 0;
#endif
// analysis of CU
xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 );
#if ADAPTIVE_QP_SELECTION
if( m_pcEncCfg->getUseAdaptQpSelect() )
{
if(rpcCU->getSlice()->getSliceType()!=I_SLICE) //IIII
{
xLcuCollectARLStats( rpcCU);
}
}
#endif
}
其中完成實際編碼一個CU操作的是xCompressCU方法。前面的綜述中已經描述過,每一個CTU按照四叉樹結構進行劃分,CompressCU中調用的xCompressCU則相當於四叉樹的根節點。另外,在每一個xCompressCU方法中間,會對每一個CU進行分析判斷是否進行下一級劃分。xCompressCU函數由於包含了Intra和InterFrame編碼的代碼,因此同樣非常長,共有600餘行。下面着重對幀內編碼的部分做一下梳理。
實現幀內編碼的部分代碼如下:
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth, PartSize eParentPartSize )
{
//......
// do normal intra modes
if ( !bEarlySkip )
{
// speedup for inter frames
if( rpcBestCU->getSlice()->getSliceType() == I_SLICE ||
rpcBestCU->getCbf( 0, TEXT_LUMA ) != 0 ||
rpcBestCU->getCbf( 0, TEXT_CHROMA_U ) != 0 ||
rpcBestCU->getCbf( 0, TEXT_CHROMA_V ) != 0 ) // avoid very complex intra if it is unlikely
{
xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_2Nx2N );
rpcTempCU->initEstData( uiDepth, iQP );
if( uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth )
{
if( rpcTempCU->getWidth(0) > ( 1 << rpcTempCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize() ) )
{
xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_NxN );
rpcTempCU->initEstData( uiDepth, iQP );
}
}
}
}
//......
}
在這部分代碼中xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_2Nx2N )查看了各種intra預測模式下的代價:Void TEncCu::xCheckRDCostIntra( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize eSize )
{
UInt uiDepth = rpcTempCU->getDepth( 0 );
rpcTempCU->setSkipFlagSubParts( false, 0, uiDepth );
rpcTempCU->setPartSizeSubParts( eSize, 0, uiDepth );
rpcTempCU->setPredModeSubParts( MODE_INTRA, 0, uiDepth );
rpcTempCU->setCUTransquantBypassSubParts( m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uiDepth );
Bool bSeparateLumaChroma = true; // choose estimation mode
UInt uiPreCalcDistC = 0;
if( !bSeparateLumaChroma )
{
m_pcPredSearch->preestChromaPredMode( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth] );
}
m_pcPredSearch ->estIntraPredQT ( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC, bSeparateLumaChroma );
m_ppcRecoYuvTemp[uiDepth]->copyToPicLuma(rpcTempCU->getPic()->getPicYuvRec(), rpcTempCU->getAddr(), rpcTempCU->getZorderIdxInCU() );
m_pcPredSearch ->estIntraPredChromaQT( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC );
m_pcEntropyCoder->resetBits();
if ( rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
{
m_pcEntropyCoder->encodeCUTransquantBypassFlag( rpcTempCU, 0, true );
}
m_pcEntropyCoder->encodeSkipFlag ( rpcTempCU, 0, true );
m_pcEntropyCoder->encodePredMode( rpcTempCU, 0, true );
m_pcEntropyCoder->encodePartSize( rpcTempCU, 0, uiDepth, true );
m_pcEntropyCoder->encodePredInfo( rpcTempCU, 0, true );
m_pcEntropyCoder->encodeIPCMInfo(rpcTempCU, 0, true );
// Encode Coefficients
Bool bCodeDQP = getdQPFlag();
m_pcEntropyCoder->encodeCoeff( rpcTempCU, 0, uiDepth, rpcTempCU->getWidth (0), rpcTempCU->getHeight(0), bCodeDQP );
setdQPFlag( bCodeDQP );
if( m_bUseSBACRD ) m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
rpcTempCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
if(m_pcEncCfg->getUseSBACRD())
{
rpcTempCU->getTotalBins() = ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
}
rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
xCheckDQP( rpcTempCU );
xCheckBestMode(rpcBestCU, rpcTempCU, uiDepth);
}
在這個函數中,調用了estIntraPredQT和estIntraPredChromaQT方法,這兩個函數的作用是類似的,區別只在於前者針對亮度分量後者針對色度分量。我們重點關注對亮度分量的操作,即estIntraPredQT函數。
下面是estIntraPredQT的一段代碼:
Void
TEncSearch::estIntraPredQT( TComDataCU* pcCU,
TComYuv* pcOrgYuv,
TComYuv* pcPredYuv,
TComYuv* pcResiYuv,
TComYuv* pcRecoYuv,
UInt& ruiDistC,
Bool bLumaOnly )
{
//......
for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
{
UInt uiMode = modeIdx;
predIntraLumaAng( pcCU->getPattern(), uiMode, piPred, uiStride, uiWidth, uiHeight, bAboveAvail, bLeftAvail );
// use hadamard transform here
UInt uiSad = m_pcRdCost->calcHAD(g_bitDepthY, piOrg, uiStride, piPred, uiStride, uiWidth, uiHeight );
UInt iModeBits = xModeBitsIntra( pcCU, uiMode, uiPU, uiPartOffset, uiDepth, uiInitTrDepth );
Double cost = (Double)uiSad + (Double)iModeBits * m_pcRdCost->getSqrtLambda();
CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
}
//......
}
這個for循環的意義就是遍歷多種幀內預測模式,其中numModesAvailable==35,對應整個intra的35個模式。
在predIntraLumaAng函數中,編碼器完成計算出當前PU的預測值:
Void TComPrediction::predIntraLumaAng(TComPattern* pcTComPattern, UInt uiDirMode, Pel* piPred, UInt uiStride, Int iWidth, Int iHeight, Bool bAbove, Bool bLeft )
{
Pel *pDst = piPred;
Int *ptrSrc;
assert( g_aucConvertToBit[ iWidth ] >= 0 ); // 4x 4
assert( g_aucConvertToBit[ iWidth ] <= 5 ); // 128x128
assert( iWidth == iHeight );
ptrSrc = pcTComPattern->getPredictorPtr( uiDirMode, g_aucConvertToBit[ iWidth ] + 2, m_piYuvExt );
// get starting pixel in block
Int sw = 2 * iWidth + 1;
// Create the prediction
if ( uiDirMode == PLANAR_IDX )
{
xPredIntraPlanar( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight );
}
else
{
if ( (iWidth > 16) || (iHeight > 16) )
{
xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, false );
}
else
{
xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, true );
if( (uiDirMode == DC_IDX ) && bAbove && bLeft )
{
xDCPredFiltering( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight);
}
}
}
}
在這個函數中主要起作用的是xPredIntraPlanar和xPredIntraAng兩個函數,另外在PU大小小於16×16,且模式爲DC模式時還會調用xDCPredFiltering函數。在這裏我們主要關心前面兩個。
xPredIntraPlanar的作用是以平面模式構建當前PU的幀內預測塊:
Void TComPrediction::xPredIntraPlanar( Int* pSrc, Int srcStride, Pel* rpDst, Int dstStride, UInt width, UInt height )
{
assert(width == height);
Int k, l, bottomLeft, topRight;
Int horPred;
Int leftColumn[MAX_CU_SIZE], topRow[MAX_CU_SIZE], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];
UInt blkSize = width;
UInt offset2D = width;
UInt shift1D = g_aucConvertToBit[ width ] + 2;
UInt shift2D = shift1D + 1;
// Get left and above reference column and row
for(k=0;k<blkSize+1;k++)
{
topRow[k] = pSrc[k-srcStride];
leftColumn[k] = pSrc[k*srcStride-1];
}
// Prepare intermediate variables used in interpolation
bottomLeft = leftColumn[blkSize];
topRight = topRow[blkSize];
for (k=0;k<blkSize;k++)
{
bottomRow[k] = bottomLeft - topRow[k];
rightColumn[k] = topRight - leftColumn[k];
topRow[k] <<= shift1D;
leftColumn[k] <<= shift1D;
}
// Generate prediction signal
for (k=0;k<blkSize;k++)
{
horPred = leftColumn[k] + offset2D;
for (l=0;l<blkSize;l++)
{
horPred += rightColumn[k];
topRow[l] += bottomRow[l];
rpDst[k*dstStride+l] = ( (horPred + topRow[l]) >> shift2D );
}
}
}
而xPredIntraAng函數則承擔了其他模式的預測塊構建,也即,不同的模式索引值代表N多中不同的預測角度,從這些角度上以參考數據構建預測塊。
Void TComPrediction::xPredIntraAng(Int bitDepth, Int* pSrc, Int srcStride, Pel*& rpDst, Int dstStride, UInt width, UInt height, UInt dirMode, Bool blkAboveAvailable, Bool blkLeftAvailable, Bool bFilter )
{
Int k,l;
Int blkSize = width;
Pel* pDst = rpDst;
// Map the mode index to main prediction direction and angle
assert( dirMode > 0 ); //no planar
Bool modeDC = dirMode < 2;
Bool modeHor = !modeDC && (dirMode < 18);
Bool modeVer = !modeDC && !modeHor;
Int intraPredAngle = modeVer ? (Int)dirMode - VER_IDX : modeHor ? -((Int)dirMode - HOR_IDX) : 0;
Int absAng = abs(intraPredAngle);
Int signAng = intraPredAngle < 0 ? -1 : 1;
// Set bitshifts and scale the angle parameter to block size
Int angTable[9] = {0, 2, 5, 9, 13, 17, 21, 26, 32};
Int invAngTable[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / Angle
Int invAngle = invAngTable[absAng];
absAng = angTable[absAng];
intraPredAngle = signAng * absAng;
// Do the DC prediction
if (modeDC)
{
Pel dcval = predIntraGetPredValDC(pSrc, srcStride, width, height, blkAboveAvailable, blkLeftAvailable);
for (k=0;k<blkSize;k++)
{
for (l=0;l<blkSize;l++)
{
pDst[k*dstStride+l] = dcval;
}
}
}
// Do angular predictions
else
{
Pel* refMain;
Pel* refSide;
Pel refAbove[2*MAX_CU_SIZE+1];
Pel refLeft[2*MAX_CU_SIZE+1];
// Initialise the Main and Left reference array.
if (intraPredAngle < 0)
{
for (k=0;k<blkSize+1;k++)
{
refAbove[k+blkSize-1] = pSrc[k-srcStride-1];
}
for (k=0;k<blkSize+1;k++)
{
refLeft[k+blkSize-1] = pSrc[(k-1)*srcStride-1];
}
refMain = (modeVer ? refAbove : refLeft) + (blkSize-1);
refSide = (modeVer ? refLeft : refAbove) + (blkSize-1);
// Extend the Main reference to the left.
Int invAngleSum = 128; // rounding for (shift by 8)
for (k=-1; k>blkSize*intraPredAngle>>5; k--)
{
invAngleSum += invAngle;
refMain[k] = refSide[invAngleSum>>8];
}
}
else
{
for (k=0;k<2*blkSize+1;k++)
{
refAbove[k] = pSrc[k-srcStride-1];
}
for (k=0;k<2*blkSize+1;k++)
{
refLeft[k] = pSrc[(k-1)*srcStride-1];
}
refMain = modeVer ? refAbove : refLeft;
refSide = modeVer ? refLeft : refAbove;
}
if (intraPredAngle == 0)
{
for (k=0;k<blkSize;k++)
{
for (l=0;l<blkSize;l++)
{
pDst[k*dstStride+l] = refMain[l+1];
}
}
if ( bFilter )
{
for (k=0;k<blkSize;k++)
{
pDst[k*dstStride] = Clip3(0, (1<<bitDepth)-1, pDst[k*dstStride] + (( refSide[k+1] - refSide[0] ) >> 1) );
}
}
}
else
{
Int deltaPos=0;
Int deltaInt;
Int deltaFract;
Int refMainIndex;
for (k=0;k<blkSize;k++)
{
deltaPos += intraPredAngle;
deltaInt = deltaPos >> 5;
deltaFract = deltaPos & (32 - 1);
if (deltaFract)
{
// Do linear filtering
for (l=0;l<blkSize;l++)
{
refMainIndex = l+deltaInt+1;
pDst[k*dstStride+l] = (Pel) ( ((32-deltaFract)*refMain[refMainIndex]+deltaFract*refMain[refMainIndex+1]+16) >> 5 );
}
}
else
{
// Just copy the integer samples
for (l=0;l<blkSize;l++)
{
pDst[k*dstStride+l] = refMain[l+deltaInt+1];
}
}
}
}
// Flip the block if this is the horizontal mode
if (modeHor)
{
Pel tmp;
for (k=0;k<blkSize-1;k++)
{
for (l=k+1;l<blkSize;l++)
{
tmp = pDst[k*dstStride+l];
pDst[k*dstStride+l] = pDst[l*dstStride+k];
pDst[l*dstStride+k] = tmp;
}
}
}
}
}
具體的預測塊構建的原理:
HEVC中一共定義了35中幀內編碼預測模式,編號分別以0-34定義。其中模式0定義爲平面模式(INTRA_PLANAR),模式1定義爲均值模式(INTRA_DC),模式2~34定義爲角度預測模式(INTRA_ANGULAR2~INTRA_ANGULAR34),分別代表了不同的角度。具體的示意圖如標準文檔的圖8-1所示:
這三大類的預測方法均有實現的代碼。首先看最簡單的Intra_DC模式,該模式同角度預測模式實現在同一個函數Void TComPrediction::xPredIntraAng(...)中:
Void TComPrediction::xPredIntraAng(Int bitDepth, Int* pSrc, Int srcStride, Pel*& rpDst, Int dstStride, UInt width, UInt height, UInt dirMode, Bool blkAboveAvailable, Bool blkLeftAvailable, Bool bFilter )
{
//......
// Do the DC prediction
if (modeDC)
{
Pel dcval = predIntraGetPredValDC(pSrc, srcStride, width, height, blkAboveAvailable, blkLeftAvailable);
for (k=0;k<blkSize;k++)
{
for (l=0;l<blkSize;l++)
{
pDst[k*dstStride+l] = dcval;
}
}
}
//......
}
在這個函數中可以看到,Intra_DC模式中所有預測塊的像素值都是同一個值dcval,這個值是由一個函數predIntraGetPredValDC計算得到:
Pel TComPrediction::predIntraGetPredValDC( Int* pSrc, Int iSrcStride, UInt iWidth, UInt iHeight, Bool bAbove, Bool bLeft )
{
Int iInd, iSum = 0;
Pel pDcVal;
if (bAbove)
{
for (iInd = 0;iInd < iWidth;iInd++)
{
iSum += pSrc[iInd-iSrcStride];
}
}
if (bLeft)
{
for (iInd = 0;iInd < iHeight;iInd++)
{
iSum += pSrc[iInd*iSrcStride-1];
}
}
if (bAbove && bLeft)
{
pDcVal = (iSum + iWidth) / (iWidth + iHeight);
}
else if (bAbove)
{
pDcVal = (iSum + iWidth/2) / iWidth;
}
else if (bLeft)
{
pDcVal = (iSum + iHeight/2) / iHeight;
}
else
{
pDcVal = pSrc[-1]; // Default DC value already calculated and placed in the prediction array if no neighbors are available
}
return pDcVal;
}
在該函數中,編碼器通過判斷上方和左方參考像素是否有效而選擇將相應的數據(指針pSrc指向的數據)累加到iSum中,並對這些參考數據取平均返回。所以,在DC模式下,所有預測像素值都是同一個值,也即參考數據的均值,這也是DC模式命名的由來。
第二種預測模式時平面模式,該模式定義在xPredIntraPlanar函數中。
Void TComPrediction::xPredIntraPlanar( Int* pSrc, Int srcStride, Pel* rpDst, Int dstStride, UInt width, UInt height )
{
assert(width == height);
Int k, l, bottomLeft, topRight;
Int horPred;
Int leftColumn[MAX_CU_SIZE], topRow[MAX_CU_SIZE], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];
UInt blkSize = width;
UInt offset2D = width;
UInt shift1D = g_aucConvertToBit[ width ] + 2;
UInt shift2D = shift1D + 1;
// Get left and above reference column and row
for(k=0;k<blkSize+1;k++)
{
topRow[k] = pSrc[k-srcStride];
leftColumn[k] = pSrc[k*srcStride-1];
}
// Prepare intermediate variables used in interpolation
bottomLeft = leftColumn[blkSize];
topRight = topRow[blkSize];
for (k=0;k<blkSize;k++)
{
bottomRow[k] = bottomLeft - topRow[k];
rightColumn[k] = topRight - leftColumn[k];
topRow[k] <<= shift1D;
leftColumn[k] <<= shift1D;
}
// Generate prediction signal
for (k=0;k<blkSize;k++)
{
horPred = leftColumn[k] + offset2D;
for (l=0;l<blkSize;l++)
{
horPred += rightColumn[k];
topRow[l] += bottomRow[l];
rpDst[k*dstStride+l] = ( (horPred + topRow[l]) >> shift2D );
}
}
}
首先從參考數據中獲取的是頂行和左列的數據,並記錄一下左下角和右上角的兩個像素值。然後計算底行和右列的數據,方法是用左下角的像素減去頂行相應位置的像素得到底行,右上角的像素減去左列相應位置的像素得到右列。預測塊中每個像素的數據,就是對應的四個邊的像素值的平均。
第三種預測模式,即mode=2~34時採用角度預測模式。實現的方式在xPredIntraAng中:
Void TComPrediction::xPredIntraAng(Int bitDepth, Int* pSrc, Int srcStride, Pel*& rpDst, Int dstStride, UInt width, UInt height, UInt dirMode, Bool blkAboveAvailable, Bool blkLeftAvailable, Bool bFilter )
{
Int k,l;
Int blkSize = width;
Pel* pDst = rpDst;
// Map the mode index to main prediction direction and angle
assert( dirMode > 0 ); //no planar
Bool modeDC = dirMode < 2;
Bool modeHor = !modeDC && (dirMode < 18);
Bool modeVer = !modeDC && !modeHor;
Int intraPredAngle = modeVer ? (Int)dirMode - VER_IDX : modeHor ? -((Int)dirMode - HOR_IDX) : 0;//計算當前模式同水平/垂直模式之間的角度差
Int absAng = abs(intraPredAngle);
Int signAng = intraPredAngle < 0 ? -1 : 1;
// Set bitshifts and scale the angle parameter to block size
Int angTable[9] = {0, 2, 5, 9, 13, 17, 21, 26, 32};
Int invAngTable[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / Angle
Int invAngle = invAngTable[absAng];
absAng = angTable[absAng];
intraPredAngle = signAng * absAng;
// ......
// Do angular predictions
else
{
Pel* refMain;
Pel* refSide;
Pel refAbove[2*MAX_CU_SIZE+1];
Pel refLeft[2*MAX_CU_SIZE+1];
// Initialise the Main and Left reference array.
if (intraPredAngle < 0)
{
for (k=0;k<blkSize+1;k++)
{
refAbove[k+blkSize-1] = pSrc[k-srcStride-1];
}
for (k=0;k<blkSize+1;k++)
{
refLeft[k+blkSize-1] = pSrc[(k-1)*srcStride-1];
}
refMain = (modeVer ? refAbove : refLeft) + (blkSize-1);
refSide = (modeVer ? refLeft : refAbove) + (blkSize-1);
// Extend the Main reference to the left.
Int invAngleSum = 128; // rounding for (shift by 8)
for (k=-1; k>blkSize*intraPredAngle>>5; k--)
{
invAngleSum += invAngle;
refMain[k] = refSide[invAngleSum>>8];
}
}
else
{
for (k=0;k<2*blkSize+1;k++)
{
refAbove[k] = pSrc[k-srcStride-1];
}
for (k=0;k<2*blkSize+1;k++)
{
refLeft[k] = pSrc[(k-1)*srcStride-1];
}
refMain = modeVer ? refAbove : refLeft;
refSide = modeVer ? refLeft : refAbove;
}
if (intraPredAngle == 0)
{
for (k=0;k<blkSize;k++)
{
for (l=0;l<blkSize;l++)
{
pDst[k*dstStride+l] = refMain[l+1];
}
}
if ( bFilter )
{
for (k=0;k<blkSize;k++)
{
pDst[k*dstStride] = Clip3(0, (1<<bitDepth)-1, pDst[k*dstStride] + (( refSide[k+1] - refSide[0] ) >> 1) );
}
}
}
else
{
Int deltaPos=0;
Int deltaInt;
Int deltaFract;
Int refMainIndex;
for (k=0;k<blkSize;k++)
{
deltaPos += intraPredAngle;
deltaInt = deltaPos >> 5;
deltaFract = deltaPos & (32 - 1);
if (deltaFract)
{
// Do linear filtering
for (l=0;l<blkSize;l++)
{
refMainIndex = l+deltaInt+1;
pDst[k*dstStride+l] = (Pel) ( ((32-deltaFract)*refMain[refMainIndex]+deltaFract*refMain[refMainIndex+1]+16) >> 5 );
}
}
else
{
// Just copy the integer samples
for (l=0;l<blkSize;l++)
{
pDst[k*dstStride+l] = refMain[l+deltaInt+1];
}
}
}
}
// Flip the block if this is the horizontal mode
if (modeHor)
{
Pel tmp;
for (k=0;k<blkSize-1;k++)
{
for (l=k+1;l<blkSize;l++)
{
tmp = pDst[k*dstStride+l];
pDst[k*dstStride+l] = pDst[l*dstStride+k];
pDst[l*dstStride+k] = tmp;
}
}
}
}
}
除此之外,這個函數還實現了對小於16×16尺寸塊實現濾波操作,以及水平模式時將預測矩陣進行轉置操作。
大致上Intra預測塊的生成方法就這樣了,下一個問題在於,參考像素是如何來的?pSrc指針指向的數據又是如何獲取的?
HEVC參考軟件HM中Intra預測參考像素的獲取與管理
繼續上一個section所討論的問題。在section 33中討論了HEVC幀內預測的幾種不同模式,代表這幾種模式的函數xPredIntraPlanar、xPredIntraAng和xDCPredFiltering調用的位置位於Void TComPrediction::predIntraLumaAng()中,所以也可以說,在一個PU內,函數Void TComPrediction::predIntraLumaAng實現了亮度分量的幀內預測。該函數的實現方法如下:
Void TComPrediction::predIntraLumaAng(TComPattern* pcTComPattern, UInt uiDirMode, Pel* piPred, UInt uiStride, Int iWidth, Int iHeight, Bool bAbove, Bool bLeft )
{
Pel *pDst = piPred;
Int *ptrSrc;
assert( g_aucConvertToBit[ iWidth ] >= 0 ); // 4x 4
assert( g_aucConvertToBit[ iWidth ] <= 5 ); // 128x128
assert( iWidth == iHeight );
ptrSrc = pcTComPattern->getPredictorPtr( uiDirMode, g_aucConvertToBit[ iWidth ] + 2, m_piYuvExt );//獲取參考數據的指針
// get starting pixel in block
Int sw = 2 * iWidth + 1;
// Create the prediction
if ( uiDirMode == PLANAR_IDX )//Intra平面模式
{
xPredIntraPlanar( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight );
}
else
{
if ( (iWidth > 16) || (iHeight > 16) )//Intra角度模式
{
xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, false );
}
else//對Intra16×16模式的特殊處理
{
xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, true );
if( (uiDirMode == DC_IDX ) && bAbove && bLeft )
{
xDCPredFiltering( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight);
}
}
}
}
該函數中存在一個非常關鍵的指針變量ptrSrc,指向的是當前塊的參考數據。這個指針通過m_piYuvExt計算得來,方法是pcTComPattern->getPredictorPtr:
Int* TComPattern::getPredictorPtr( UInt uiDirMode, UInt log2BlkSize, Int* piAdiBuf )
{
Int* piSrc;
assert(log2BlkSize >= 2 && log2BlkSize < 7);
Int diff = min<Int>(abs((Int) uiDirMode - HOR_IDX), abs((Int)uiDirMode - VER_IDX));
UChar ucFiltIdx = diff > m_aucIntraFilter[log2BlkSize - 2] ? 1 : 0;
if (uiDirMode == DC_IDX)
{
ucFiltIdx = 0; //no smoothing for DC or LM chroma
}
assert( ucFiltIdx <= 1 );
Int width = 1 << log2BlkSize;
Int height = 1 << log2BlkSize;
piSrc = getAdiOrgBuf( width, height, piAdiBuf );//該函數其實沒有實際意義,直接返回<span style="font-family: Arial, Helvetica, sans-serif;">piAdiBuf </span>
if ( ucFiltIdx )
{
piSrc += (2 * width + 1) * (2 * height + 1);
}
return piSrc;
}
該函數首先判斷當前的幀內預測方向同HOR_IDX、VER_IDX兩個預設模式之絕對差的較小值,與某一個預定義的Filter指示標識(m_aucIntraFilter)進行比較。m_aucIntraFilter定義爲:
const UChar TComPattern::m_aucIntraFilter[5] =
{
10, //4x4
7, //8x8
1, //16x16
0, //32x32
10, //64x64
};
我們已經知道,HOR_IDX = 10,VER_IDX = 26,uiDirMode共有0~35這些取值。所以diff的取值範圍只有[0, 10]這11個值,結合aucIntraFilter定義來看,可以認爲是對於4×4和64×64的尺寸,ucFiltIdx始終爲0;對於其他尺寸,塊大小越大越需要濾波,對於32×32的塊都需要濾波操作(至於如何進行濾波將在後面研究),而取濾波的數據就是講指針piSrc向後移動一段距離,這段距離剛好是一組Intra參考數據的長度。
回到上一級函數之後,發現getPredictorPtr所操作的數據地址指針,其實就是m_piYuvExt。看來文章就在這個指針變量中了。m_piYuvExt定義在TComPrediction類中,在其構造函數中初始化,在析構函數中釋放內存。分配響應的內存空間在函數Void TComPrediction::initTempBuff()中實現,這個函數在編碼開始之前就會被調用。
實際的參考數據呢?實際上,在對當前PU的每一種模式進行遍歷(TEncSearch::estIntraPredQT函數)之前,會有專門操作對m_piYuvExt進行數據填充操作,具體的操作在TComPattern::initAdiPattern中實現。該函數比較長就不貼在這裏了,裏面的核心部分是調用了fillReferenceSamples函數填充參考數據,隨後生成Intra預測的濾波參考數據。下篇研究fillReferenceSamples的實現以及Intra參考數據濾波的原理。
幀內預測參考數據的獲取和濾波處理
幀內預測的參考像素值的獲取在標準文檔的8.4.4.2.2中指明。
舉例說明,當前demo中,我們用來單步調試的第一個CU爲64×64像素大小,那麼參考像素由兩部分組成,一部分包含2×64+1=129個,另一部分包含2×64=128個像素。這兩部分分別作爲垂直和水平方向上的預測數據。在編碼的過程中,根據預測數據是否可得,共分爲兩種情況:
第一種:所有的預測數據都不可得。最直觀的情況就是一幀數據中的第一個CU,該CU左側和上方的數據都不存在,如下圖所示。此時所有的預測數據都會制定一個默認值,計算方法爲:1 << (bitDepth - 1);(圖中的格子數只是示意圖,不代表CU的像素大小和參考像素的個數)。
第二種:至少有一個像素點是可獲得的,如下圖所示。如果參考數據中的第一個點是不可獲得的,那麼將沿着當前CU的邊緣,先從下到上,後從左到右查找第一個可獲得的參考點並賦給第一個點;對於其他的點,如果不可得,那麼就直接複製它前面一個參考點的值。如果所有點都是可獲得的,那麼參考數據直接使用該值就可以了。
基本算法已經明瞭,接下來研究一下HM中的實現。代碼如下:
Void TComPattern::fillReferenceSamples(Int bitDepth, Pel* piRoiOrigin, Int* piAdiTemp, Bool* bNeighborFlags, Int iNumIntraNeighbor, Int iUnitSize, Int iNumUnitsInCu, Int iTotalUnits, UInt uiCuWidth, UInt uiCuHeight, UInt uiWidth, UInt uiHeight, Int iPicStride, Bool bLMmode )
{
Pel* piRoiTemp;
Int i, j;
Int iDCValue = 1 << (bitDepth - 1);
if (iNumIntraNeighbor == 0)//所欲參考點均不可得,按照DC模式設置參考點
{
// Fill border with DC value
for (i=0; i<uiWidth; i++)
{
piAdiTemp[i] = iDCValue;//<span style="font-family: Arial, Helvetica, sans-serif;">piAdiTemp指向數據接收內存,保存了實際的參考像素數組的地址;</span>
}
for (i=1; i<uiHeight; i++)
{
piAdiTemp[i*uiWidth] = iDCValue;
}
}
else if (iNumIntraNeighbor == iTotalUnits)//所有參考點都可獲得,直接設爲當前CU的參考值
{
// Fill top-left border with rec. samples
piRoiTemp = piRoiOrigin - iPicStride - 1;//左上角邊界,其實就是CU左上角的一個點
piAdiTemp[0] = piRoiTemp[0];
// Fill left border with rec. samples
piRoiTemp = piRoiOrigin - 1;//當前CU左上頂點的左邊像素
if (bLMmode)
{
piRoiTemp --; // move to the second left column
}
for (i=0; i<uiCuHeight; i++)//將左列的像素設爲參考像素
{
piAdiTemp[(1+i)*uiWidth] = piRoiTemp[0];
piRoiTemp += iPicStride;
}
// Fill below left border with rec. samples
for (i=0; i<uiCuHeight; i++)//繼續將該列下面的像素值作爲左下方的參考像素
{
piAdiTemp[(1+uiCuHeight+i)*uiWidth] = piRoiTemp[0];
piRoiTemp += iPicStride;
}
// Fill top border with rec. samples
piRoiTemp = piRoiOrigin - iPicStride;//指向當前CU左上角像素的正上方
for (i=0; i<uiCuWidth; i++)
{
piAdiTemp[1+i] = piRoiTemp[i];
}
// Fill top right border with rec. samples
piRoiTemp = piRoiOrigin - iPicStride + uiCuWidth;//當前CU右上方的像素起始位置
for (i=0; i<uiCuWidth; i++)
{
piAdiTemp[1+uiCuWidth+i] = piRoiTemp[i];
}
}
else // reference samples are partially available
{
Int iNumUnits2 = iNumUnitsInCu<<1;
Int iTotalSamples = iTotalUnits*iUnitSize;
Pel piAdiLine[5 * MAX_CU_SIZE];
Pel *piAdiLineTemp;
Bool *pbNeighborFlags;
Int iNext, iCurr;
Pel piRef = 0;
// Initialize
for (i=0; i<iTotalSamples; i++)//用均值模式進行初始化
{
piAdiLine[i] = iDCValue;
}
// Fill top-left sample
piRoiTemp = piRoiOrigin - iPicStride - 1;//指向重建像素中當前CU的左上角位置
piAdiLineTemp = piAdiLine + (iNumUnits2*iUnitSize);
pbNeighborFlags = bNeighborFlags + iNumUnits2;
if (*pbNeighborFlags)//如果左上方的參考數據可用
{
piAdiLineTemp[0] = piRoiTemp[0];
for (i=1; i<iUnitSize; i++)
{
piAdiLineTemp[i] = piAdiLineTemp[0];
}
}
// Fill left & below-left samples
piRoiTemp += iPicStride;//從左上頂點的左上角移動到左方
if (bLMmode)
{
piRoiTemp --; // move the second left column
}
piAdiLineTemp--;//緩存指針前移一位
pbNeighborFlags--;//可用性標記指針前移一位
for (j=0; j<iNumUnits2; j++)
{
if (*pbNeighborFlags)
{
for (i=0; i<iUnitSize; i++)//判斷過程分組進行處理,如對於一個32×32的CU,左側和左下側共64個預測點,總共進行16×4次賦值
{
piAdiLineTemp[-i] = piRoiTemp[i*iPicStride];
}
}
piRoiTemp += iUnitSize*iPicStride;
piAdiLineTemp -= iUnitSize;
pbNeighborFlags--;
}
// Fill above & above-right samples
piRoiTemp = piRoiOrigin - iPicStride;//水平方向上的處理與垂直方向類似
piAdiLineTemp = piAdiLine + ((iNumUnits2+1)*iUnitSize);
pbNeighborFlags = bNeighborFlags + iNumUnits2 + 1;
for (j=0; j<iNumUnits2; j++)
{
if (*pbNeighborFlags)
{
for (i=0; i<iUnitSize; i++)
{
piAdiLineTemp[i] = piRoiTemp[i];
}
}
piRoiTemp += iUnitSize;
piAdiLineTemp += iUnitSize;
pbNeighborFlags++;
}
// Pad reference samples when necessary
iCurr = 0;
iNext = 1;
piAdiLineTemp = piAdiLine;//指向參考數組的起點,見上圖
while (iCurr < iTotalUnits)//遍歷給定的參考點
{
if (!bNeighborFlags[iCurr])//某個點不可獲得
{
if(iCurr == 0)//第一個參考點就找不到
{
while (iNext < iTotalUnits && !bNeighborFlags[iNext])//找到第一個可以獲得的點
{
iNext++;
}
piRef = piAdiLine[iNext*iUnitSize];//記錄該點的值
// Pad unavailable samples with new value
while (iCurr < iNext)//將找到的可用參考點賦給第一個參考點(以4個像素點一組爲單位)
{
for (i=0; i<iUnitSize; i++)
{
piAdiLineTemp[i] = piRef;
}
piAdiLineTemp += iUnitSize;
iCurr++;
}
}
else
{
piRef = piAdiLine[iCurr*iUnitSize-1];//不可用的點不是第一個參考點,查找前一個可用的點爲其賦值。
for (i=0; i<iUnitSize; i++)
{
piAdiLineTemp[i] = piRef;
}
piAdiLineTemp += iUnitSize;
iCurr++;
}
}
else//當前點可用,pass
{
piAdiLineTemp += iUnitSize;
iCurr++;
}
}
// Copy processed samples 輸出前面所準備的數據
piAdiLineTemp = piAdiLine + uiHeight + iUnitSize - 2;
for (i=0; i<uiWidth; i++)
{
piAdiTemp[i] = piAdiLineTemp[i];
}
piAdiLineTemp = piAdiLine + uiHeight - 1;
for (i=1; i<uiHeight; i++)
{
piAdiTemp[i*uiWidth] = piAdiLineTemp[-i];
}
}
}
對幀內預測參考數據進行濾波處理
在幀內預測的過程中,獲取臨近的Prediction Unit的邊緣數據作爲當前PU的參考數據。數據獲取完成後,並不一定會直接使用這些數據進行預測,而可能會先將這些預測數據進行一次濾波操作。幀內參考像素的濾波在標準文檔的8.4.4.2.3節詳述。
幀內參考像素的濾波使能標記由一個標誌位filterFlag標識。該標誌位的判定方法爲:
1、如果當前預測模式爲DC模式,或者幀內預測的PU爲4×4大小時,filterFlag一律爲0;
2、計算當前的Intra預測模式同“水平”和“垂直”預測模式的index之間的差值;將這個差值同針對不同大小PU所分別設定的閾值(對於8×8PU爲7,對於16×16PU爲1,對於32×32PU爲0)進行比較,如果大於閾值則filterFlag爲1,否則爲0。
思想:對於角度預測而言,該算法的目的是對不同的PU大小和預測方向進行區分,越小的PU越不需要濾波,越接近於“水平”和“垂直”的預測模式越不需要濾波。也就是說,4×4PU全不需要濾波,8×8PU只有接近於對角線的部分模式需要濾波,16×16PU除了水平和垂直模式其他都需要濾波,而32×32PU全部必須進行濾波處理。
當設定爲需要濾波時,濾波操作根據一個開關變量bInitFlag又有所區分。bInitFlag的判定方法如下:
1、SPS中指定的一個設置位strong_intra_smoothing_enabled_flag設置爲1,並且PU大小爲32×32,並且指定參考點數據之間的差值不是很大(具體的判定方法見標準文檔)的時候,該標誌位設爲1;
2、其他情況下,該標誌位設爲0。
設定bInitFlag完成後,根據該標識取值,濾波過程分爲兩種不同情況:
1、當bInitFlag取1時,緩存區中兩個端點和中心店不進行濾波,其他值根據距離這三個點的距離不同進行加權平均濾波;
2、當bInitFlag取0時,緩存區中的相鄰數據進行[1,2,1]平滑濾波。
代碼中的實現方法如下,很容易看出代碼的實現和標準文檔是匹配的:
Void TComPattern::initAdiPattern( TComDataCU* pcCU, UInt uiZorderIdxInPart, UInt uiPartDepth, Int* piAdiBuf, Int iOrgBufStride, Int iOrgBufHeight, Bool& bAbove, Bool& bLeft, Bool bLMmode )
{
//......
if (pcCU->getSlice()->getSPS()->getUseStrongIntraSmoothing())
{
Int blkSize = 32;
Int bottomLeft = piFilterBuf[0];
Int topLeft = piFilterBuf[uiCuHeight2];
Int topRight = piFilterBuf[iBufSize-1];
Int threshold = 1 << (g_bitDepthY - 5);
Bool bilinearLeft = abs(bottomLeft+topLeft-2*piFilterBuf[uiCuHeight]) < threshold;
Bool bilinearAbove = abs(topLeft+topRight-2*piFilterBuf[uiCuHeight2+uiCuHeight]) < threshold;
if (uiCuWidth>=blkSize && (bilinearLeft && bilinearAbove))
{
Int shift = g_aucConvertToBit[uiCuWidth] + 3; // log2(uiCuHeight2)
piFilterBufN[0] = piFilterBuf[0];
piFilterBufN[uiCuHeight2] = piFilterBuf[uiCuHeight2];
piFilterBufN[iBufSize - 1] = piFilterBuf[iBufSize - 1];
for (i = 1; i < uiCuHeight2; i++)
{
piFilterBufN[i] = ((uiCuHeight2-i)*bottomLeft + i*topLeft + uiCuHeight) >> shift;
}
for (i = 1; i < uiCuWidth2; i++)
{
piFilterBufN[uiCuHeight2 + i] = ((uiCuWidth2-i)*topLeft + i*topRight + uiCuWidth) >> shift;
}
}
else
{
// 1. filtering with [1 2 1]
piFilterBufN[0] = piFilterBuf[0];
piFilterBufN[iBufSize - 1] = piFilterBuf[iBufSize - 1];
for (i = 1; i < iBufSize - 1; i++)
{
piFilterBufN[i] = (piFilterBuf[i - 1] + 2 * piFilterBuf[i]+piFilterBuf[i + 1] + 2) >> 2;
}
}
}
else
{
// 1. filtering with [1 2 1]
piFilterBufN[0] = piFilterBuf[0];
piFilterBufN[iBufSize - 1] = piFilterBuf[iBufSize - 1];
for (i = 1; i < iBufSize - 1; i++)
{
piFilterBufN[i] = (piFilterBuf[i - 1] + 2 * piFilterBuf[i]+piFilterBuf[i + 1] + 2) >> 2;
}
}
//......
}