int main(int argc, char* argv[])  
{  
    TAppEncTop  cTAppEncTop;  
  
    // print information  
    fprintf( stdout, "\n" );  
    fprintf( stdout, "HM software: Encoder Version [%s]", NV_VERSION );  
    fprintf( stdout, NVM_ONOS );  
    fprintf( stdout, NVM_COMPILEDBY );  
    fprintf( stdout, NVM_BITS );  
    fprintf( stdout, "\n" );  
  
    // create application encoder class  
    cTAppEncTop.create();  
  
    // parse configuration  
    try  
    {  
        if(!cTAppEncTop.parseCfg( argc, argv ))  
        {  
            cTAppEncTop.destroy();  
            return 1;  
        }  
    }  
    catch (po::ParseFailure& e)  
    {  
        cerr << "Error parsing option \""<< e.arg <<"\" with argument \""<< e.val <<"\"." << endl;  
        return 1;  
    }  
  
    // starting time  
    double dResult;  
    long lBefore = clock();  
  
    // call encoding function  
    cTAppEncTop.encode();  
  
    // ending time  
    dResult = (double)(clock()-lBefore) / CLOCKS_PER_SEC;  
    printf("\n Total Time: %12.3f sec.\n", dResult);  
  
    // destroy application encoder class  
    cTAppEncTop.destroy();  
  
    return 0;  
}

可以很清楚地看到，整个main函数非常简洁清晰，主要可以分为几大部分，分别是输入软件信息、创建编码器类的实例、解析配置文件、获取开始时间、编码数据、计算耗费时间和销毁编码器类的实例几大部分。我们主要关心的编码过程仅通过调用编码器实例的一个方法实现：

// call encoding function  
cTAppEncTop.encode();

该函数的实现如下：

Void TAppEncTop::encode()  
{  
    fstream bitstreamFile(m_pchBitstreamFile, fstream::binary | fstream::out);  
    if (!bitstreamFile)  
    {  
        fprintf(stderr, "\nfailed to open bitstream file `%s' for writing\n", m_pchBitstreamFile);  
        exit(EXIT_FAILURE);  
    }  
  
    TComPicYuv*       pcPicYuvOrg = new TComPicYuv;  
    TComPicYuv*       pcPicYuvRec = NULL;  
  
    // initialize internal class & member variables  
    xInitLibCfg();  
    xCreateLib();  
    xInitLib();  
  
    // main encoder loop  
    Int   iNumEncoded = 0;  
    Bool  bEos = false;  
  
    list<AccessUnit> outputAccessUnits; ///< list of access units to write out.  is populated by the encoding process  
  
    // allocate original YUV buffer  
    pcPicYuvOrg->create( m_iSourceWidth, m_iSourceHeight, m_uiMaxCUWidth, m_uiMaxCUHeight, m_uiMaxCUDepth );  
  
    while ( !bEos )  
    {  
        // get buffers  
        xGetBuffer(pcPicYuvRec);  
  
        // read input YUV file  
        m_cTVideoIOYuvInputFile.read( pcPicYuvOrg, m_aiPad );  
  
        // increase number of received frames  
        m_iFrameRcvd++;  
  
        bEos = (m_iFrameRcvd == m_framesToBeEncoded);  
  
        Bool flush = 0;  
        // if end of file (which is only detected on a read failure) flush the encoder of any queued pictures  
        if (m_cTVideoIOYuvInputFile.isEof())  
        {  
            flush = true;  
            bEos = true;  
            m_iFrameRcvd--;  
            m_cTEncTop.setFramesToBeEncoded(m_iFrameRcvd);  
        }  
  
        // call encoding function for one frame  
        m_cTEncTop.encode( bEos, flush ? 0 : pcPicYuvOrg, m_cListPicYuvRec, outputAccessUnits, iNumEncoded );  
  
        // write bistream to file if necessary  
        if ( iNumEncoded > 0 )  
        {  
            xWriteOutput(bitstreamFile, iNumEncoded, outputAccessUnits);  
            outputAccessUnits.clear();  
        }  
    }  
  
    m_cTEncTop.printSummary();  
  
    // delete original YUV buffer  
    pcPicYuvOrg->destroy();  
    delete pcPicYuvOrg;  
    pcPicYuvOrg = NULL;  
  
    // delete used buffers in encoder class  
    m_cTEncTop.deletePicBuffer();  
  
    // delete buffers & classes  
    xDeleteBuffer();  
    xDestroyLib();  
  
    printRateSummary();  
  
    return;  
}

该函数中首先调用pcPicYuvOrg->create( m_iSourceWidth, m_iSourceHeight, m_uiMaxCUWidth, m_uiMaxCUHeight, m_uiMaxCUDepth )分配YUV数据缓存，然后再while循环中逐帧读取YUV数据、设置当前以编码的帧数、编码当前帧、写出码流，随后做其他清理工作。核心功能实现在m_cTEncTop.encode( bEos, flush ? 0 : pcPicYuvOrg, m_cListPicYuvRec, outputAccessUnits, iNumEncoded )函数中。在该函数中调用m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, accessUnitsOut)进行编码一个GOP的操作。这个函数奇长无比，用了接近1500行代码，看来实现了很多很多很多的功能。这个碉堡了的函数究竟做了些啥事儿呢？这个函数中大部分内容就是在为了编码当前slice做准备，以及编码完成之后一些辅助操作。实际编码过程的操作由以下函数m_pcSliceEncoder->compressSlice( pcPic )实现。

这又是一个碉堡了的函数，占了将近400行……代码就不贴了，会死人的……简单看下好了。

首先还是各种编码的配置，包括配置熵编码器、初始化CU编码器等。在完成了一长串的设置之后，在compressCU函数中实现对一个CU的编码：

m_pcCuEncoder->compressCU( pcCU );

在一个compressSlice()中，在compressCU函数中实现对一个CU的编码，其中主要进行了CU的初始化，以及实际的编码操作。

Void TEncCu::compressCU( TComDataCU*& rpcCU )  
{  
  // initialize CU data  
  m_ppcBestCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );  
  m_ppcTempCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );  
  
#if RATE_CONTROL_LAMBDA_DOMAIN  
  m_addSADDepth      = 0;  
  m_LCUPredictionSAD = 0;  
  m_temporalSAD      = 0;  
#endif  
  
  // analysis of CU  
  xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 );  
  
#if ADAPTIVE_QP_SELECTION  
  if( m_pcEncCfg->getUseAdaptQpSelect() )  
  {  
    if(rpcCU->getSlice()->getSliceType()!=I_SLICE) //IIII  
    {  
      xLcuCollectARLStats( rpcCU);  
    }  
  }  
#endif  
}

其中完成实际编码一个CU操作的是xCompressCU方法。前面的综述中已经描述过，每一个CTU按照四叉树结构进行划分，CompressCU中调用的xCompressCU则相当于四叉树的根节点。另外，在每一个xCompressCU方法中间，会对每一个CU进行分析判断是否进行下一级划分。

xCompressCU函数由于包含了Intra和InterFrame编码的代码，因此同样非常长，共有600余行。下面着重对帧内编码的部分做一下梳理。

实现帧内编码的部分代码如下：

Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth, PartSize eParentPartSize )  
{  
//......  
// do normal intra modes  
        if ( !bEarlySkip )  
        {  
          // speedup for inter frames  
          if( rpcBestCU->getSlice()->getSliceType() == I_SLICE ||   
            rpcBestCU->getCbf( 0, TEXT_LUMA     ) != 0   ||  
            rpcBestCU->getCbf( 0, TEXT_CHROMA_U ) != 0   ||  
            rpcBestCU->getCbf( 0, TEXT_CHROMA_V ) != 0     ) // avoid very complex intra if it is unlikely  
          {  
            xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_2Nx2N );  
            rpcTempCU->initEstData( uiDepth, iQP );  
            if( uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth )  
            {  
              if( rpcTempCU->getWidth(0) > ( 1 << rpcTempCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize() ) )  
              {  
                xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_NxN   );  
                rpcTempCU->initEstData( uiDepth, iQP );  
              }  
            }  
          }  
        }  
//......  
}

在这部分代码中xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_2Nx2N )查看了各种intra预测模式下的代价：

Void TEncCu::xCheckRDCostIntra( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize eSize )  
{  
  UInt uiDepth = rpcTempCU->getDepth( 0 );  
    
  rpcTempCU->setSkipFlagSubParts( false, 0, uiDepth );  
  
  rpcTempCU->setPartSizeSubParts( eSize, 0, uiDepth );  
  rpcTempCU->setPredModeSubParts( MODE_INTRA, 0, uiDepth );  
  rpcTempCU->setCUTransquantBypassSubParts( m_pcEncCfg->getCUTransquantBypassFlagValue(), 0, uiDepth );  
    
  Bool bSeparateLumaChroma = true; // choose estimation mode  
  UInt uiPreCalcDistC      = 0;  
  if( !bSeparateLumaChroma )  
  {  
    m_pcPredSearch->preestChromaPredMode( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth] );  
  }  
  m_pcPredSearch  ->estIntraPredQT      ( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC, bSeparateLumaChroma );  
  
  m_ppcRecoYuvTemp[uiDepth]->copyToPicLuma(rpcTempCU->getPic()->getPicYuvRec(), rpcTempCU->getAddr(), rpcTempCU->getZorderIdxInCU() );  
    
  m_pcPredSearch  ->estIntraPredChromaQT( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], uiPreCalcDistC );  
    
  m_pcEntropyCoder->resetBits();  
  if ( rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())  
  {  
    m_pcEntropyCoder->encodeCUTransquantBypassFlag( rpcTempCU, 0,          true );  
  }  
  m_pcEntropyCoder->encodeSkipFlag ( rpcTempCU, 0,          true );  
  m_pcEntropyCoder->encodePredMode( rpcTempCU, 0,          true );  
  m_pcEntropyCoder->encodePartSize( rpcTempCU, 0, uiDepth, true );  
  m_pcEntropyCoder->encodePredInfo( rpcTempCU, 0,          true );  
  m_pcEntropyCoder->encodeIPCMInfo(rpcTempCU, 0, true );  
  
  // Encode Coefficients  
  Bool bCodeDQP = getdQPFlag();  
  m_pcEntropyCoder->encodeCoeff( rpcTempCU, 0, uiDepth, rpcTempCU->getWidth (0), rpcTempCU->getHeight(0), bCodeDQP );  
  setdQPFlag( bCodeDQP );  
    
  if( m_bUseSBACRD ) m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);  
    
  rpcTempCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();  
  if(m_pcEncCfg->getUseSBACRD())  
  {  
    rpcTempCU->getTotalBins() = ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();  
  }  
  rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );  
    
  xCheckDQP( rpcTempCU );  
  xCheckBestMode(rpcBestCU, rpcTempCU, uiDepth);  
}

在这个函数中，调用了estIntraPredQT和estIntraPredChromaQT方法，这两个函数的作用是类似的，区别只在于前者针对亮度分量后者针对色度分量。我们重点关注对亮度分量的操作，即estIntraPredQT函数。

下面是estIntraPredQT的一段代码：

Void   
TEncSearch::estIntraPredQT( TComDataCU* pcCU,   
                           TComYuv*    pcOrgYuv,   
                           TComYuv*    pcPredYuv,   
                           TComYuv*    pcResiYuv,   
                           TComYuv*    pcRecoYuv,  
                           UInt&       ruiDistC,  
                           Bool        bLumaOnly )  
{  
//......  
      for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )  
      {  
        UInt uiMode = modeIdx;  
  
        predIntraLumaAng( pcCU->getPattern(), uiMode, piPred, uiStride, uiWidth, uiHeight, bAboveAvail, bLeftAvail );  
          
        // use hadamard transform here  
        UInt uiSad = m_pcRdCost->calcHAD(g_bitDepthY, piOrg, uiStride, piPred, uiStride, uiWidth, uiHeight );  
          
        UInt   iModeBits = xModeBitsIntra( pcCU, uiMode, uiPU, uiPartOffset, uiDepth, uiInitTrDepth );  
        Double cost      = (Double)uiSad + (Double)iModeBits * m_pcRdCost->getSqrtLambda();  
          
        CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );  
      }  
//......  
}

这个for循环的意义就是遍历多种帧内预测模式，其中numModesAvailable==35，对应整个intra的35个模式。

在predIntraLumaAng函数中，编码器完成计算出当前PU的预测值：

Void TComPrediction::predIntraLumaAng(TComPattern* pcTComPattern, UInt uiDirMode, Pel* piPred, UInt uiStride, Int iWidth, Int iHeight, Bool bAbove, Bool bLeft )  
{  
    Pel *pDst = piPred;  
    Int *ptrSrc;  
  
    assert( g_aucConvertToBit[ iWidth ] >= 0 ); //   4x  4  
    assert( g_aucConvertToBit[ iWidth ] <= 5 ); // 128x128  
    assert( iWidth == iHeight  );  
  
    ptrSrc = pcTComPattern->getPredictorPtr( uiDirMode, g_aucConvertToBit[ iWidth ] + 2, m_piYuvExt );  
  
    // get starting pixel in block  
    Int sw = 2 * iWidth + 1;  
  
    // Create the prediction  
    if ( uiDirMode == PLANAR_IDX )  
    {  
        xPredIntraPlanar( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight );  
    }  
    else  
    {  
        if ( (iWidth > 16) || (iHeight > 16) )  
        {  
            xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, false );  
        }  
        else  
        {  
            xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, true );  
  
            if( (uiDirMode == DC_IDX ) && bAbove && bLeft )  
            {  
                xDCPredFiltering( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight);  
            }  
        }  
    }  
}

在这个函数中主要起作用的是xPredIntraPlanar和xPredIntraAng两个函数，另外在PU大小小于16×16，且模式为DC模式时还会调用xDCPredFiltering函数。在这里我们主要关心前面两个。

xPredIntraPlanar的作用是以平面模式构建当前PU的帧内预测块：

Void TComPrediction::xPredIntraPlanar( Int* pSrc, Int srcStride, Pel* rpDst, Int dstStride, UInt width, UInt height )  
{  
    assert(width == height);  
    Int k, l, bottomLeft, topRight;  
    Int horPred;  
    Int leftColumn[MAX_CU_SIZE], topRow[MAX_CU_SIZE], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];  
    UInt blkSize = width;  
    UInt offset2D = width;  
    UInt shift1D = g_aucConvertToBit[ width ] + 2;  
    UInt shift2D = shift1D + 1;  
  
    // Get left and above reference column and row  
    for(k=0;k<blkSize+1;k++)  
    {  
        topRow[k] = pSrc[k-srcStride];  
        leftColumn[k] = pSrc[k*srcStride-1];  
    }  
  
    // Prepare intermediate variables used in interpolation  
    bottomLeft = leftColumn[blkSize];  
    topRight   = topRow[blkSize];  
    for (k=0;k<blkSize;k++)  
    {  
        bottomRow[k]   = bottomLeft - topRow[k];  
        rightColumn[k] = topRight   - leftColumn[k];  
        topRow[k]      <<= shift1D;  
        leftColumn[k]  <<= shift1D;  
    }  
  
    // Generate prediction signal  
    for (k=0;k<blkSize;k++)  
    {  
        horPred = leftColumn[k] + offset2D;  
        for (l=0;l<blkSize;l++)  
        {  
            horPred += rightColumn[k];  
            topRow[l] += bottomRow[l];  
            rpDst[k*dstStride+l] = ( (horPred + topRow[l]) >> shift2D );  
        }  
    }  
}

而xPredIntraAng函数则承担了其他模式的预测块构建，也即，不同的模式索引值代表N多中不同的预测角度，从这些角度上以参考数据构建预测块。

Void TComPrediction::xPredIntraAng(Int bitDepth, Int* pSrc, Int srcStride, Pel*& rpDst, Int dstStride, UInt width, UInt height, UInt dirMode, Bool blkAboveAvailable, Bool blkLeftAvailable, Bool bFilter )  
{  
    Int k,l;  
    Int blkSize        = width;  
    Pel* pDst          = rpDst;  
  
    // Map the mode index to main prediction direction and angle  
    assert( dirMode > 0 ); //no planar  
    Bool modeDC        = dirMode < 2;  
    Bool modeHor       = !modeDC && (dirMode < 18);  
    Bool modeVer       = !modeDC && !modeHor;  
    Int intraPredAngle = modeVer ? (Int)dirMode - VER_IDX : modeHor ? -((Int)dirMode - HOR_IDX) : 0;  
    Int absAng         = abs(intraPredAngle);  
    Int signAng        = intraPredAngle < 0 ? -1 : 1;  
  
    // Set bitshifts and scale the angle parameter to block size  
    Int angTable[9]    = {0,    2,    5,   9,  13,  17,  21,  26,  32};  
    Int invAngTable[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / Angle  
    Int invAngle       = invAngTable[absAng];  
    absAng             = angTable[absAng];  
    intraPredAngle     = signAng * absAng;  
  
    // Do the DC prediction  
    if (modeDC)  
    {  
        Pel dcval = predIntraGetPredValDC(pSrc, srcStride, width, height, blkAboveAvailable, blkLeftAvailable);  
  
        for (k=0;k<blkSize;k++)  
        {  
            for (l=0;l<blkSize;l++)  
            {  
                pDst[k*dstStride+l] = dcval;  
            }  
        }  
    }  
  
    // Do angular predictions  
    else  
    {  
        Pel* refMain;  
        Pel* refSide;  
        Pel  refAbove[2*MAX_CU_SIZE+1];  
        Pel  refLeft[2*MAX_CU_SIZE+1];  
  
        // Initialise the Main and Left reference array.  
        if (intraPredAngle < 0)  
        {  
            for (k=0;k<blkSize+1;k++)  
            {  
                refAbove[k+blkSize-1] = pSrc[k-srcStride-1];  
            }  
            for (k=0;k<blkSize+1;k++)  
            {  
                refLeft[k+blkSize-1] = pSrc[(k-1)*srcStride-1];  
            }  
            refMain = (modeVer ? refAbove : refLeft) + (blkSize-1);  
            refSide = (modeVer ? refLeft : refAbove) + (blkSize-1);  
  
            // Extend the Main reference to the left.  
            Int invAngleSum    = 128;       // rounding for (shift by 8)  
            for (k=-1; k>blkSize*intraPredAngle>>5; k--)  
            {  
                invAngleSum += invAngle;  
                refMain[k] = refSide[invAngleSum>>8];  
            }  
        }  
        else  
        {  
            for (k=0;k<2*blkSize+1;k++)  
            {  
                refAbove[k] = pSrc[k-srcStride-1];  
            }  
            for (k=0;k<2*blkSize+1;k++)  
            {  
                refLeft[k] = pSrc[(k-1)*srcStride-1];  
            }  
            refMain = modeVer ? refAbove : refLeft;  
            refSide = modeVer ? refLeft  : refAbove;  
        }  
  
        if (intraPredAngle == 0)  
        {  
            for (k=0;k<blkSize;k++)  
            {  
                for (l=0;l<blkSize;l++)  
                {  
                    pDst[k*dstStride+l] = refMain[l+1];  
                }  
            }  
  
            if ( bFilter )  
            {  
                for (k=0;k<blkSize;k++)  
                {  
                    pDst[k*dstStride] = Clip3(0, (1<<bitDepth)-1, pDst[k*dstStride] + (( refSide[k+1] - refSide[0] ) >> 1) );  
                }  
            }  
        }  
        else  
        {  
            Int deltaPos=0;  
            Int deltaInt;  
            Int deltaFract;  
            Int refMainIndex;  
  
            for (k=0;k<blkSize;k++)  
            {  
                deltaPos += intraPredAngle;  
                deltaInt   = deltaPos >> 5;  
                deltaFract = deltaPos & (32 - 1);  
  
                if (deltaFract)  
                {  
                    // Do linear filtering  
                    for (l=0;l<blkSize;l++)  
                    {  
                        refMainIndex        = l+deltaInt+1;  
                        pDst[k*dstStride+l] = (Pel) ( ((32-deltaFract)*refMain[refMainIndex]+deltaFract*refMain[refMainIndex+1]+16) >> 5 );  
                    }  
                }  
                else  
                {  
                    // Just copy the integer samples  
                    for (l=0;l<blkSize;l++)  
                    {  
                        pDst[k*dstStride+l] = refMain[l+deltaInt+1];  
                    }  
                }  
            }  
        }  
  
        // Flip the block if this is the horizontal mode  
        if (modeHor)  
        {  
            Pel  tmp;  
            for (k=0;k<blkSize-1;k++)  
            {  
                for (l=k+1;l<blkSize;l++)  
                {  
                    tmp                 = pDst[k*dstStride+l];  
                    pDst[k*dstStride+l] = pDst[l*dstStride+k];  
                    pDst[l*dstStride+k] = tmp;  
                }  
            }  
        }  
    }  
}

具体的预测块构建的原理：

HEVC中一共定义了35中帧内编码预测模式，编号分别以0-34定义。其中模式0定义为平面模式（INTRA_PLANAR），模式1定义为均值模式（INTRA_DC），模式2~34定义为角度预测模式（INTRA_ANGULAR2~INTRA_ANGULAR34），分别代表了不同的角度。具体的示意图如标准文档的图8-1所示：

这三大类的预测方法均有实现的代码。首先看最简单的Intra_DC模式，该模式同角度预测模式实现在同一个函数Void TComPrediction::xPredIntraAng(...)中：

Void TComPrediction::xPredIntraAng(Int bitDepth, Int* pSrc, Int srcStride, Pel*& rpDst, Int dstStride, UInt width, UInt height, UInt dirMode, Bool blkAboveAvailable, Bool blkLeftAvailable, Bool bFilter )  
{  
        //......  
        // Do the DC prediction  
    if (modeDC)  
    {  
        Pel dcval = predIntraGetPredValDC(pSrc, srcStride, width, height, blkAboveAvailable, blkLeftAvailable);  
  
        for (k=0;k<blkSize;k++)  
        {  
            for (l=0;l<blkSize;l++)  
            {  
                pDst[k*dstStride+l] = dcval;  
            }  
        }  
    }  
        //......  
}

在这个函数中可以看到，Intra_DC模式中所有预测块的像素值都是同一个值dcval，这个值是由一个函数predIntraGetPredValDC计算得到：

Pel TComPrediction::predIntraGetPredValDC( Int* pSrc, Int iSrcStride, UInt iWidth, UInt iHeight, Bool bAbove, Bool bLeft )  
{  
    Int iInd, iSum = 0;  
    Pel pDcVal;  
  
    if (bAbove)  
    {  
        for (iInd = 0;iInd < iWidth;iInd++)  
        {  
            iSum += pSrc[iInd-iSrcStride];  
        }  
    }  
    if (bLeft)  
    {  
        for (iInd = 0;iInd < iHeight;iInd++)  
        {  
            iSum += pSrc[iInd*iSrcStride-1];  
        }  
    }  
  
    if (bAbove && bLeft)  
    {  
        pDcVal = (iSum + iWidth) / (iWidth + iHeight);  
    }  
    else if (bAbove)  
    {  
        pDcVal = (iSum + iWidth/2) / iWidth;  
    }  
    else if (bLeft)  
    {  
        pDcVal = (iSum + iHeight/2) / iHeight;  
    }  
    else  
    {  
        pDcVal = pSrc[-1]; // Default DC value already calculated and placed in the prediction array if no neighbors are available  
    }  
  
    return pDcVal;  
}

在该函数中，编码器通过判断上方和左方参考像素是否有效而选择将相应的数据（指针pSrc指向的数据）累加到iSum中，并对这些参考数据取平均返回。所以，在DC模式下，所有预测像素值都是同一个值，也即参考数据的均值，这也是DC模式命名的由来。

第二种预测模式时平面模式，该模式定义在xPredIntraPlanar函数中。

Void TComPrediction::xPredIntraPlanar( Int* pSrc, Int srcStride, Pel* rpDst, Int dstStride, UInt width, UInt height )  
{  
    assert(width == height);  
  
    Int k, l, bottomLeft, topRight;  
    Int horPred;  
    Int leftColumn[MAX_CU_SIZE], topRow[MAX_CU_SIZE], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];  
    UInt blkSize = width;  
    UInt offset2D = width;  
    UInt shift1D = g_aucConvertToBit[ width ] + 2;  
    UInt shift2D = shift1D + 1;  
  
    // Get left and above reference column and row  
    for(k=0;k<blkSize+1;k++)  
    {  
        topRow[k] = pSrc[k-srcStride];  
        leftColumn[k] = pSrc[k*srcStride-1];  
    }  
  
    // Prepare intermediate variables used in interpolation  
    bottomLeft = leftColumn[blkSize];  
    topRight   = topRow[blkSize];  
    for (k=0;k<blkSize;k++)  
    {  
        bottomRow[k]   = bottomLeft - topRow[k];  
        rightColumn[k] = topRight   - leftColumn[k];  
        topRow[k]      <<= shift1D;  
        leftColumn[k]  <<= shift1D;  
    }  
  
    // Generate prediction signal  
    for (k=0;k<blkSize;k++)  
    {  
        horPred = leftColumn[k] + offset2D;  
        for (l=0;l<blkSize;l++)  
        {  
            horPred += rightColumn[k];  
            topRow[l] += bottomRow[l];  
            rpDst[k*dstStride+l] = ( (horPred + topRow[l]) >> shift2D );  
        }  
    }  
}

首先从参考数据中获取的是顶行和左列的数据，并记录一下左下角和右上角的两个像素值。然后计算底行和右列的数据，方法是用左下角的像素减去顶行相应位置的像素得到底行，右上角的像素减去左列相应位置的像素得到右列。预测块中每个像素的数据，就是对应的四个边的像素值的平均。

第三种预测模式，即mode=2~34时采用角度预测模式。实现的方式在xPredIntraAng中:

Void TComPrediction::xPredIntraAng(Int bitDepth, Int* pSrc, Int srcStride, Pel*& rpDst, Int dstStride, UInt width, UInt height, UInt dirMode, Bool blkAboveAvailable, Bool blkLeftAvailable, Bool bFilter )  
{  
    Int k,l;  
    Int blkSize        = width;  
    Pel* pDst          = rpDst;  
  
    // Map the mode index to main prediction direction and angle  
    assert( dirMode > 0 ); //no planar  
    Bool modeDC        = dirMode < 2;  
    Bool modeHor       = !modeDC && (dirMode < 18);  
    Bool modeVer       = !modeDC && !modeHor;  
    Int intraPredAngle = modeVer ? (Int)dirMode - VER_IDX : modeHor ? -((Int)dirMode - HOR_IDX) : 0;//计算当前模式同水平/垂直模式之间的角度差  
    Int absAng         = abs(intraPredAngle);  
    Int signAng        = intraPredAngle < 0 ? -1 : 1;  
  
    // Set bitshifts and scale the angle parameter to block size  
    Int angTable[9]    = {0,    2,    5,   9,  13,  17,  21,  26,  32};  
    Int invAngTable[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / Angle  
    Int invAngle       = invAngTable[absAng];  
    absAng             = angTable[absAng];  
    intraPredAngle     = signAng * absAng;  
    // ......  
        // Do angular predictions  
    else  
    {  
        Pel* refMain;  
        Pel* refSide;  
        Pel  refAbove[2*MAX_CU_SIZE+1];  
        Pel  refLeft[2*MAX_CU_SIZE+1];  
  
        // Initialise the Main and Left reference array.  
        if (intraPredAngle < 0)  
        {  
            for (k=0;k<blkSize+1;k++)  
            {  
                refAbove[k+blkSize-1] = pSrc[k-srcStride-1];  
            }  
            for (k=0;k<blkSize+1;k++)  
            {  
                refLeft[k+blkSize-1] = pSrc[(k-1)*srcStride-1];  
            }  
            refMain = (modeVer ? refAbove : refLeft) + (blkSize-1);  
            refSide = (modeVer ? refLeft : refAbove) + (blkSize-1);  
  
            // Extend the Main reference to the left.  
            Int invAngleSum    = 128;       // rounding for (shift by 8)  
            for (k=-1; k>blkSize*intraPredAngle>>5; k--)  
            {  
                invAngleSum += invAngle;  
                refMain[k] = refSide[invAngleSum>>8];  
            }  
        }  
        else  
        {  
            for (k=0;k<2*blkSize+1;k++)  
            {  
                refAbove[k] = pSrc[k-srcStride-1];  
            }  
            for (k=0;k<2*blkSize+1;k++)  
            {  
                refLeft[k] = pSrc[(k-1)*srcStride-1];  
            }  
            refMain = modeVer ? refAbove : refLeft;  
            refSide = modeVer ? refLeft  : refAbove;  
        }  
  
        if (intraPredAngle == 0)  
        {  
            for (k=0;k<blkSize;k++)  
            {  
                for (l=0;l<blkSize;l++)  
                {  
                    pDst[k*dstStride+l] = refMain[l+1];  
                }  
            }  
  
            if ( bFilter )  
            {  
                for (k=0;k<blkSize;k++)  
                {  
                    pDst[k*dstStride] = Clip3(0, (1<<bitDepth)-1, pDst[k*dstStride] + (( refSide[k+1] - refSide[0] ) >> 1) );  
                }  
            }  
        }  
        else  
        {  
            Int deltaPos=0;  
            Int deltaInt;  
            Int deltaFract;  
            Int refMainIndex;  
  
            for (k=0;k<blkSize;k++)  
            {  
                deltaPos += intraPredAngle;  
                deltaInt   = deltaPos >> 5;  
                deltaFract = deltaPos & (32 - 1);  
  
                if (deltaFract)  
                {  
                    // Do linear filtering  
                    for (l=0;l<blkSize;l++)  
                    {  
                        refMainIndex        = l+deltaInt+1;  
                        pDst[k*dstStride+l] = (Pel) ( ((32-deltaFract)*refMain[refMainIndex]+deltaFract*refMain[refMainIndex+1]+16) >> 5 );  
                    }  
                }  
                else  
                {  
                    // Just copy the integer samples  
                    for (l=0;l<blkSize;l++)  
                    {  
                        pDst[k*dstStride+l] = refMain[l+deltaInt+1];  
                    }  
                }  
            }  
        }  
  
        // Flip the block if this is the horizontal mode  
        if (modeHor)  
        {  
            Pel  tmp;  
            for (k=0;k<blkSize-1;k++)  
            {  
                for (l=k+1;l<blkSize;l++)  
                {  
                    tmp                 = pDst[k*dstStride+l];  
                    pDst[k*dstStride+l] = pDst[l*dstStride+k];  
                    pDst[l*dstStride+k] = tmp;  
                }  
            }  
        }  
    }  
}

在图8.1中可以看出，模式18的预测方向相当于对角线预测。所以以模式18为分界线，2~17分为水平模式(modeHor)，18~33分为垂直模式(modeVer)，这样区分有利于减少代码的冗余。另外，从该图中也可以看出，模式10和26即相当于水平模式和垂直模式，在代码中也定义了两个宏HOR_IDX和VER_IDX表示，然后计算当前模式同水平/垂直模式之间的角度差，用intraPredAngle表示。intraPredAngle不同的取值对应的预测方向可以参考图8-2：

图中可见，intraPredAngle的取值可能出现正值或负值。当intraPredAngle取非负值时，垂直模式下只参考上方的参考点，水平模式下只参考左方的参考点；当intraPredAngle取负值的时候，refMain会依照refSide中的数据进行部分扩充，因此会同时参考左方和上方两部分的参考点。当intraPredAngle为0的时候，表示预测模式为10或者26，这是也就是水平或者垂直模式，直接复制参考像素的值就OK了；否则，会对角度做一个判断，如果对应的是参考像素中的整像素点那么就不需要进行计算，直接获取该点数据；如果对应的不是整像素点，那么会按照相邻两点按照“距离”进行加权平均作为参考像素点的值。

除此之外，这个函数还实现了对小于16×16尺寸块实现滤波操作，以及水平模式时将预测矩阵进行转置操作。

大致上Intra预测块的生成方法就这样了，下一个问题在于，参考像素是如何来的？pSrc指针指向的数据又是如何获取的？

HEVC参考软件HM中Intra预测参考像素的获取与管理

继续上一个section所讨论的问题。在section 33中讨论了HEVC帧内预测的几种不同模式，代表这几种模式的函数xPredIntraPlanar、xPredIntraAng和xDCPredFiltering调用的位置位于Void TComPrediction::predIntraLumaAng()中，所以也可以说，在一个PU内，函数Void TComPrediction::predIntraLumaAng实现了亮度分量的帧内预测。该函数的实现方法如下：

Void TComPrediction::predIntraLumaAng(TComPattern* pcTComPattern, UInt uiDirMode, Pel* piPred, UInt uiStride, Int iWidth, Int iHeight, Bool bAbove, Bool bLeft )  
{  
  Pel *pDst = piPred;  
  Int *ptrSrc;  
  
  assert( g_aucConvertToBit[ iWidth ] >= 0 ); //   4x  4  
  assert( g_aucConvertToBit[ iWidth ] <= 5 ); // 128x128  
  assert( iWidth == iHeight  );  
  
  ptrSrc = pcTComPattern->getPredictorPtr( uiDirMode, g_aucConvertToBit[ iWidth ] + 2, m_piYuvExt );//获取参考数据的指针  
  
  // get starting pixel in block  
  Int sw = 2 * iWidth + 1;  
  
  // Create the prediction  
  if ( uiDirMode == PLANAR_IDX )//Intra平面模式  
  {  
    xPredIntraPlanar( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight );  
  }  
  else  
  {  
    if ( (iWidth > 16) || (iHeight > 16) )//Intra角度模式  
    {  
      xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, false );  
    }  
    else//对Intra16×16模式的特殊处理  
    {  
      xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, true );  
  
      if( (uiDirMode == DC_IDX ) && bAbove && bLeft )  
      {  
        xDCPredFiltering( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight);  
      }  
    }  
  }  
}

该函数中存在一个非常关键的指针变量ptrSrc，指向的是当前块的参考数据。这个指针通过m_piYuvExt计算得来，方法是pcTComPattern->getPredictorPtr：

Int* TComPattern::getPredictorPtr( UInt uiDirMode, UInt log2BlkSize, Int* piAdiBuf )  
{  
  Int* piSrc;  
  assert(log2BlkSize >= 2 && log2BlkSize < 7);  
  Int diff = min<Int>(abs((Int) uiDirMode - HOR_IDX), abs((Int)uiDirMode - VER_IDX));  
  UChar ucFiltIdx = diff > m_aucIntraFilter[log2BlkSize - 2] ? 1 : 0;  
  if (uiDirMode == DC_IDX)  
  {  
    ucFiltIdx = 0; //no smoothing for DC or LM chroma  
  }  
  
  assert( ucFiltIdx <= 1 );  
  
  Int width  = 1 << log2BlkSize;  
  Int height = 1 << log2BlkSize;  
    
  piSrc = getAdiOrgBuf( width, height, piAdiBuf );//该函数其实没有实际意义，直接返回<span style="font-family: Arial, Helvetica, sans-serif;">piAdiBuf </span>  
  
  if ( ucFiltIdx )  
  {  
    piSrc += (2 * width + 1) * (2 * height + 1);  
  }  
  
  return piSrc;  
}

该函数首先判断当前的帧内预测方向同HOR_IDX、VER_IDX两个预设模式之绝对差的较小值，与某一个预定义的Filter指示标识（m_aucIntraFilter）进行比较。m_aucIntraFilter定义为：

const UChar TComPattern::m_aucIntraFilter[5] =  
{  
  10, //4x4  
  7, //8x8  
  1, //16x16  
  0, //32x32  
  10, //64x64  
};

我们已经知道，HOR_IDX = 10，VER_IDX = 26，uiDirMode共有0~35这些取值。所以diff的取值范围只有[0, 10]这11个值，结合aucIntraFilter定义来看，可以认为是对于4×4和64×64的尺寸，ucFiltIdx始终为0；对于其他尺寸，块大小越大越需要滤波，对于32×32的块都需要滤波操作（至于如何进行滤波将在后面研究），而取滤波的数据就是讲指针piSrc向后移动一段距离，这段距离刚好是一组Intra参考数据的长度。

回到上一级函数之后，发现getPredictorPtr所操作的数据地址指针，其实就是m_piYuvExt。看来文章就在这个指针变量中了。m_piYuvExt定义在TComPrediction类中，在其构造函数中初始化，在析构函数中释放内存。分配响应的内存空间在函数Void TComPrediction::initTempBuff()中实现，这个函数在编码开始之前就会被调用。

实际的参考数据呢？实际上，在对当前PU的每一种模式进行遍历（TEncSearch::estIntraPredQT函数）之前，会有专门操作对m_piYuvExt进行数据填充操作，具体的操作在TComPattern::initAdiPattern中实现。该函数比较长就不贴在这里了，里面的核心部分是调用了fillReferenceSamples函数填充参考数据，随后生成Intra预测的滤波参考数据。下篇研究fillReferenceSamples的实现以及Intra参考数据滤波的原理。

帧内预测参考数据的获取和滤波处理

帧内预测的参考像素值的获取在标准文档的8.4.4.2.2中指明。

举例说明，当前demo中，我们用来单步调试的第一个CU为64×64像素大小，那么参考像素由两部分组成，一部分包含2×64+1=129个，另一部分包含2×64=128个像素。这两部分分别作为垂直和水平方向上的预测数据。在编码的过程中，根据预测数据是否可得，共分为两种情况：

第一种：所有的预测数据都不可得。最直观的情况就是一帧数据中的第一个CU，该CU左侧和上方的数据都不存在，如下图所示。此时所有的预测数据都会制定一个默认值，计算方法为：1 << (bitDepth - 1)；（图中的格子数只是示意图，不代表CU的像素大小和参考像素的个数）。

第二种：至少有一个像素点是可获得的，如下图所示。如果参考数据中的第一个点是不可获得的，那么将沿着当前CU的边缘，先从下到上，后从左到右查找第一个可获得的参考点并赋给第一个点；对于其他的点，如果不可得，那么就直接复制它前面一个参考点的值。如果所有点都是可获得的，那么参考数据直接使用该值就可以了。

基本算法已经明了，接下来研究一下HM中的实现。代码如下：

Void TComPattern::fillReferenceSamples(Int bitDepth, Pel* piRoiOrigin, Int* piAdiTemp, Bool* bNeighborFlags, Int iNumIntraNeighbor, Int iUnitSize, Int iNumUnitsInCu, Int iTotalUnits, UInt uiCuWidth, UInt uiCuHeight, UInt uiWidth, UInt uiHeight, Int iPicStride, Bool bLMmode )  
{  
  Pel* piRoiTemp;  
  Int  i, j;  
  Int  iDCValue = 1 << (bitDepth - 1);  
  
  if (iNumIntraNeighbor == 0)//所欲参考点均不可得，按照DC模式设置参考点  
  {  
    // Fill border with DC value  
    for (i=0; i<uiWidth; i++)  
    {  
      piAdiTemp[i] = iDCValue;//<span style="font-family: Arial, Helvetica, sans-serif;">piAdiTemp指向数据接收内存，保存了实际的参考像素数组的地址；</span>  
    }  
    for (i=1; i<uiHeight; i++)  
    {  
      piAdiTemp[i*uiWidth] = iDCValue;  
    }  
  }  
  else if (iNumIntraNeighbor == iTotalUnits)//所有参考点都可获得，直接设为当前CU的参考值  
  {  
    // Fill top-left border with rec. samples  
    piRoiTemp = piRoiOrigin - iPicStride - 1;//左上角边界，其实就是CU左上角的一个点  
    piAdiTemp[0] = piRoiTemp[0];  
  
    // Fill left border with rec. samples  
    piRoiTemp = piRoiOrigin - 1;//当前CU左上顶点的左边像素  
  
    if (bLMmode)  
    {  
      piRoiTemp --; // move to the second left column  
    }  
  
    for (i=0; i<uiCuHeight; i++)//将左列的像素设为参考像素  
    {  
      piAdiTemp[(1+i)*uiWidth] = piRoiTemp[0];  
      piRoiTemp += iPicStride;  
    }  
  
    // Fill below left border with rec. samples  
    for (i=0; i<uiCuHeight; i++)//继续将该列下面的像素值作为左下方的参考像素  
    {  
      piAdiTemp[(1+uiCuHeight+i)*uiWidth] = piRoiTemp[0];  
      piRoiTemp += iPicStride;  
    }  
  
    // Fill top border with rec. samples  
    piRoiTemp = piRoiOrigin - iPicStride;//指向当前CU左上角像素的正上方  
    for (i=0; i<uiCuWidth; i++)  
    {  
      piAdiTemp[1+i] = piRoiTemp[i];  
    }  
      
    // Fill top right border with rec. samples  
    piRoiTemp = piRoiOrigin - iPicStride + uiCuWidth;//当前CU右上方的像素起始位置  
    for (i=0; i<uiCuWidth; i++)  
    {  
      piAdiTemp[1+uiCuWidth+i] = piRoiTemp[i];  
    }  
  }  
  else // reference samples are partially available  
  {  
    Int  iNumUnits2 = iNumUnitsInCu<<1;  
    Int  iTotalSamples = iTotalUnits*iUnitSize;  
    Pel  piAdiLine[5 * MAX_CU_SIZE];  
    Pel  *piAdiLineTemp;   
    Bool *pbNeighborFlags;  
    Int  iNext, iCurr;  
    Pel  piRef = 0;  
  
    // Initialize  
    for (i=0; i<iTotalSamples; i++)//用均值模式进行初始化  
    {  
      piAdiLine[i] = iDCValue;  
    }  
      
    // Fill top-left sample  
    piRoiTemp = piRoiOrigin - iPicStride - 1;//指向重建像素中当前CU的左上角位置  
    piAdiLineTemp = piAdiLine + (iNumUnits2*iUnitSize);  
    pbNeighborFlags = bNeighborFlags + iNumUnits2;  
    if (*pbNeighborFlags)//如果左上方的参考数据可用  
    {  
      piAdiLineTemp[0] = piRoiTemp[0];  
      for (i=1; i<iUnitSize; i++)  
      {  
        piAdiLineTemp[i] = piAdiLineTemp[0];  
      }  
    }  
  
    // Fill left & below-left samples  
    piRoiTemp += iPicStride;//从左上顶点的左上角移动到左方  
    if (bLMmode)  
    {  
      piRoiTemp --; // move the second left column  
    }  
    piAdiLineTemp--;//缓存指针前移一位  
    pbNeighborFlags--;//可用性标记指针前移一位  
    for (j=0; j<iNumUnits2; j++)  
    {  
      if (*pbNeighborFlags)  
      {  
        for (i=0; i<iUnitSize; i++)//判断过程分组进行处理，如对于一个32×32的CU，左侧和左下侧共64个预测点，总共进行16×4次赋值  
        {  
          piAdiLineTemp[-i] = piRoiTemp[i*iPicStride];  
        }  
      }  
      piRoiTemp += iUnitSize*iPicStride;  
      piAdiLineTemp -= iUnitSize;  
      pbNeighborFlags--;  
    }  
  
    // Fill above & above-right samples  
    piRoiTemp = piRoiOrigin - iPicStride;//水平方向上的处理与垂直方向类似  
    piAdiLineTemp = piAdiLine + ((iNumUnits2+1)*iUnitSize);  
    pbNeighborFlags = bNeighborFlags + iNumUnits2 + 1;  
    for (j=0; j<iNumUnits2; j++)  
    {  
      if (*pbNeighborFlags)  
      {  
        for (i=0; i<iUnitSize; i++)  
        {  
          piAdiLineTemp[i] = piRoiTemp[i];  
        }  
      }  
      piRoiTemp += iUnitSize;  
      piAdiLineTemp += iUnitSize;  
      pbNeighborFlags++;  
    }  
  
    // Pad reference samples when necessary  
    iCurr = 0;  
    iNext = 1;  
    piAdiLineTemp = piAdiLine;//指向参考数组的起点，见上图  
    while (iCurr < iTotalUnits)//遍历给定的参考点  
    {  
      if (!bNeighborFlags[iCurr])//某个点不可获得  
      {  
        if(iCurr == 0)//第一个参考点就找不到  
        {  
          while (iNext < iTotalUnits && !bNeighborFlags[iNext])//找到第一个可以获得的点  
          {  
            iNext++;  
          }  
          piRef = piAdiLine[iNext*iUnitSize];//记录该点的值  
          // Pad unavailable samples with new value  
          while (iCurr < iNext)//将找到的可用参考点赋给第一个参考点（以4个像素点一组为单位）  
          {  
            for (i=0; i<iUnitSize; i++)  
            {  
              piAdiLineTemp[i] = piRef;  
            }  
            piAdiLineTemp += iUnitSize;  
            iCurr++;  
          }  
        }  
        else  
        {  
          piRef = piAdiLine[iCurr*iUnitSize-1];//不可用的点不是第一个参考点，查找前一个可用的点为其赋值。  
          for (i=0; i<iUnitSize; i++)  
          {  
            piAdiLineTemp[i] = piRef;  
          }  
          piAdiLineTemp += iUnitSize;  
          iCurr++;  
        }  
      }  
      else//当前点可用，pass  
      {  
        piAdiLineTemp += iUnitSize;  
        iCurr++;  
      }  
    }  
  
    // Copy processed samples 输出前面所准备的数据  
    piAdiLineTemp = piAdiLine + uiHeight + iUnitSize - 2;  
    for (i=0; i<uiWidth; i++)  
    {  
      piAdiTemp[i] = piAdiLineTemp[i];  
    }  
    piAdiLineTemp = piAdiLine + uiHeight - 1;  
    for (i=1; i<uiHeight; i++)  
    {  
      piAdiTemp[i*uiWidth] = piAdiLineTemp[-i];  
    }  
  }  
}

对帧内预测参考数据进行滤波处理

在帧内预测的过程中，获取临近的Prediction Unit的边缘数据作为当前PU的参考数据。数据获取完成后，并不一定会直接使用这些数据进行预测，而可能会先将这些预测数据进行一次滤波操作。帧内参考像素的滤波在标准文档的8.4.4.2.3节详述。

帧内参考像素的滤波使能标记由一个标志位filterFlag标识。该标志位的判定方法为：

1、如果当前预测模式为DC模式，或者帧内预测的PU为4×4大小时，filterFlag一律为0；

2、计算当前的Intra预测模式同“水平”和“垂直”预测模式的index之间的差值；将这个差值同针对不同大小PU所分别设定的阈值（对于8×8PU为7，对于16×16PU为1，对于32×32PU为0）进行比较，如果大于阈值则filterFlag为1，否则为0。

思想：对于角度预测而言，该算法的目的是对不同的PU大小和预测方向进行区分，越小的PU越不需要滤波，越接近于“水平”和“垂直”的预测模式越不需要滤波。也就是说，4×4PU全不需要滤波，8×8PU只有接近于对角线的部分模式需要滤波，16×16PU除了水平和垂直模式其他都需要滤波，而32×32PU全部必须进行滤波处理。

当设定为需要滤波时，滤波操作根据一个开关变量bInitFlag又有所区分。bInitFlag的判定方法如下：

1、SPS中指定的一个设置位strong_intra_smoothing_enabled_flag设置为1，并且PU大小为32×32，并且指定参考点数据之间的差值不是很大（具体的判定方法见标准文档）的时候，该标志位设为1；

2、其他情况下，该标志位设为0。

设定bInitFlag完成后，根据该标识取值，滤波过程分为两种不同情况：

1、当bInitFlag取1时，缓存区中两个端点和中心店不进行滤波，其他值根据距离这三个点的距离不同进行加权平均滤波；

2、当bInitFlag取0时，缓存区中的相邻数据进行[1,2,1]平滑滤波。

代码中的实现方法如下，很容易看出代码的实现和标准文档是匹配的：

Void TComPattern::initAdiPattern( TComDataCU* pcCU, UInt uiZorderIdxInPart, UInt uiPartDepth, Int* piAdiBuf, Int iOrgBufStride, Int iOrgBufHeight, Bool& bAbove, Bool& bLeft, Bool bLMmode )  
{  
//......  
if (pcCU->getSlice()->getSPS()->getUseStrongIntraSmoothing())  
  {  
    Int blkSize = 32;  
    Int bottomLeft = piFilterBuf[0];  
    Int topLeft = piFilterBuf[uiCuHeight2];  
    Int topRight = piFilterBuf[iBufSize-1];  
    Int threshold = 1 << (g_bitDepthY - 5);  
    Bool bilinearLeft = abs(bottomLeft+topLeft-2*piFilterBuf[uiCuHeight]) < threshold;  
    Bool bilinearAbove  = abs(topLeft+topRight-2*piFilterBuf[uiCuHeight2+uiCuHeight]) < threshold;  
    
    if (uiCuWidth>=blkSize && (bilinearLeft && bilinearAbove))  
    {  
      Int shift = g_aucConvertToBit[uiCuWidth] + 3;  // log2(uiCuHeight2)  
      piFilterBufN[0] = piFilterBuf[0];  
      piFilterBufN[uiCuHeight2] = piFilterBuf[uiCuHeight2];  
      piFilterBufN[iBufSize - 1] = piFilterBuf[iBufSize - 1];  
      for (i = 1; i < uiCuHeight2; i++)  
      {  
        piFilterBufN[i] = ((uiCuHeight2-i)*bottomLeft + i*topLeft + uiCuHeight) >> shift;  
      }  
    
      for (i = 1; i < uiCuWidth2; i++)  
      {  
        piFilterBufN[uiCuHeight2 + i] = ((uiCuWidth2-i)*topLeft + i*topRight + uiCuWidth) >> shift;  
      }  
    }  
    else   
    {  
      // 1. filtering with [1 2 1]  
      piFilterBufN[0] = piFilterBuf[0];  
      piFilterBufN[iBufSize - 1] = piFilterBuf[iBufSize - 1];  
      for (i = 1; i < iBufSize - 1; i++)  
      {  
        piFilterBufN[i] = (piFilterBuf[i - 1] + 2 * piFilterBuf[i]+piFilterBuf[i + 1] + 2) >> 2;  
      }  
    }  
  }  
  else   
  {  
    // 1. filtering with [1 2 1]  
    piFilterBufN[0] = piFilterBuf[0];  
    piFilterBufN[iBufSize - 1] = piFilterBuf[iBufSize - 1];  
    for (i = 1; i < iBufSize - 1; i++)  
    {  
      piFilterBufN[i] = (piFilterBuf[i - 1] + 2 * piFilterBuf[i]+piFilterBuf[i + 1] + 2) >> 2;  
    }  
  }  
//......  
}

HM编码器的基本结构

HEVC参考软件HM中Intra预测参考像素的获取与管理

帧内预测参考数据的获取和滤波处理

对帧内预测参考数据进行滤波处理

SAD，SAE，SATD，SSD，SSE，MAD，MAE，MSD，MSE理解

CU 級別的分析：TComDataCU

變換編碼（二）

量化（一）

量化（三）

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結