Search::codeIntraLumaQT()

/*
通過
	對當前PU進行計算殘差+變換+量化+反量化+反變換+重建幀
	得到嚴格意義上的distortion(sse)開銷
	對當前CU進行完整的bits編碼,則到嚴格意義的bits開銷
	基於distortion和bits來得到rdcost
基於來得到當前TU的最優split模式(即TU split tree),及其distortion、bits、rdcost、energy

	過程:
		1.載入CUdata、depth等信息
		2.判斷mightNotSplit?mightSplit?
		3.計算mightNotSplit狀態下的各個數據
			·若mightNotSplit,即mightNotSplit=true,分析計算不再split狀態下的各個數據
				1.若mightSplit,則將當前熵編碼上下文存儲到rqtRoot中,用於後期計算split時候加載,保證上下文一致性
				2.得到相鄰PU的可參考像素信息
				3.進行相鄰PU像素補全及平滑濾波
				4.按照指定幀內預測方向IPM進行幀內預測到pred中
				5.設置transformSkip爲false,TUdepth
				6.計算殘差resi = fenc - pred
				7.對殘差進行轉換和量化,得到非零係數的個數
				8.得到重構幀recon
					·若存在非零係數,則進行反量化反轉換,並得到重構幀recon = pred + resi
					·否則,recon = pred
				9.根據非零係數來設置cbf
				10.根據recon和fenc來計算sse distortion
				11.計算bits開銷
					1.重置bits
					2.若absPartIdx=0,則
						1.若非Islice,則編碼transform bypass flag、skipFlag、predMode
						2.編碼predSize
					3.編碼幀內預測方向
					4.若當前TUsize不是所允許的最小size,則編碼subDivFlag = false
					5.編碼cbf
					6.若有cbf,編碼殘差係數
					7.得到前面所有編碼的bits總數
					8.若開啓了rdPenalty,且TUsize爲32x32,且非Islice,則bits翻四倍
				12.根據distortion和bits開銷,計算psyCost和rdCost,存儲到fullCost中
			·若不mightNotSplit,則其cost,即fullCost爲MAX
		4.計算split狀態下的各個數據,若mightSplit,則
			1.若mightNotSplit,則
				1.將之前分析的mightNotSplit上下文暫存下來
				2.加載最初的上下文,保證上下文一致性
			2.計算是否TransformSkip
			3.遍歷四個split出來的子TU
				1.遞歸調用函數進行分析計算
					·若TransformSkip,則調用codeIntraLumaTSkip
					·否則,調用codeIntraLumaQT
				2.整合四個子TU的cbf
			4.存儲下cbf
			5.若mightNotSplit,且TUsize不是所允許的最小size,則
				1.重置bits
				2.編碼subDivFlag = true
				3.累加subDivFlag的bits
				4.基於distortion和bits開銷,計算split狀態的rdcost
			6.對比split和notSplit
				·若split的rdcost<notSplit的rdcost,則直接結算rdcost、distortion、bits、energy,返回return
				·否則,notSplit較優,加載之前暫存下的mightNotSplit上下文,恢復mightNotSplit的tuDepth、cbf、transforSkip
		5.執行到這裏notSplit較優,保存recon的YUV數據
		6.結算rdcost、distortion、bits、energy輸出
*/
void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, const uint32_t depthRange[2])
{
	//取CUData
    CUData& cu = mode.cu;
	//取fullDepth = CUDepth + TUDepth
    uint32_t fullDepth  = cuGeom.depth + tuDepth;
	//log TUsize
    uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
    uint32_t qtLayer    = log2TrSize - 2;
    uint32_t sizeIdx    = log2TrSize - 2;
	//只要TUsize在上限以下,就可以不再split
    bool mightNotSplit  = log2TrSize <= depthRange[1];
    //只要TUsize在下限以上,就可以split
	bool mightSplit     = (log2TrSize > depthRange[0]) && (bAllowSplit || !mightNotSplit);
    
	bool bEnableRDOQ  = !!m_param->rdoqLevel;

    /* If maximum RD penalty, force spits at TU size 32x32 if SPS allows TUs of 16x16 
		若rdPenaly爲2,即full,且非Islice,且TU的尺寸在32x32,且TU尺寸允許小於等於16x16,則強制split*/
    if (m_param->rdPenalty == 2 && m_slice->m_sliceType != I_SLICE && log2TrSize == 5 && depthRange[0] <= 4)
    {
        mightNotSplit = false;
        mightSplit = true;
    }

	/*	fullCost表示當前CU不進行TU的劃分的cost,整個CU就是一個TU;
		與之對應的有splitCost,表示進行了TU劃分的cost*/
    Cost fullCost;
	
	//CBF
    uint32_t bCBF = 0;

	//存儲recon
    pixel*   reconQt = m_rqt[qtLayer].reconQtYuv.getLumaAddr(absPartIdx);
    uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_size;

	/*
		若可以不再split,則計算不再split的cost,即fullCost
	*/
    if (mightNotSplit)
    {
		//若可以split,則將當前上下文存儲到rqtRoot中,保證後面計算split時上下文的一致性
        if (mightSplit)
            m_entropyCoder.store(m_rqt[fullDepth].rqtRoot);

		//取原始YUV
        const pixel* fenc = mode.fencYuv->getLumaAddr(absPartIdx);
        //取預測的YUV
		pixel*   pred     = mode.predYuv.getLumaAddr(absPartIdx);
        //得到殘差YUV
		int16_t* residual = m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx);
        uint32_t stride   = mode.fencYuv->m_size;

        // init availability pattern
        uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx];
        IntraNeighbors intraNeighbors;
		//得到相鄰PU的可參考信息
        initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors);
        //進行相鄰PU像素補全及平滑濾波
		initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode);
        // get prediction signal 按照幀內預測方向進行預測計算,輸出到pred中
        predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);

		//設置TransformSkip爲false
        cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
		//設置tuDepth爲fullDepth,即cuDepth+initTuDepth
        cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);

        uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2);
        coeff_t* coeffY       = m_rqt[qtLayer].coeffRQT[0] + coeffOffsetY;

        // store original entropy coding status 這是是啥
        if (bEnableRDOQ)
            m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSize, true);

		//計算殘差resi = fenc - pred
        primitives.cu[sizeIdx].calcresidual[stride % 64 == 0](fenc, pred, residual, stride);
		//若殘差進行tranform,輸出到coeffY中,並得到非零係數的個數numSig
        uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
        
		/* 得到重構幀recon */
		if (numSig)	//若有殘差係數
        {
			//進行反transform
            m_quant.invtransformNxN(cu, residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
            bool reconQtYuvAlign = m_rqt[qtLayer].reconQtYuv.getAddrOffset(absPartIdx, mode.predYuv.m_size) % 64 == 0;
            bool predAlign = mode.predYuv.getAddrOffset(absPartIdx, mode.predYuv.m_size) % 64 == 0;
            bool residualAlign = m_rqt[cuGeom.depth].tmpResiYuv.getAddrOffset(absPartIdx, mode.predYuv.m_size) % 64 == 0;
            bool bufferAlignCheck = (reconQtStride % 64 == 0) && (stride % 64 == 0) && reconQtYuvAlign && predAlign && residualAlign;
            //重構recon = pred + resi
			primitives.cu[sizeIdx].add_ps[bufferAlignCheck](reconQt, reconQtStride, pred, residual, stride, stride);
        }
        else
            // no coded residual, recon = pred,將pred輸出到recon中
            primitives.cu[sizeIdx].copy_pp(reconQt, reconQtStride, pred, stride);

		//記錄CBF
        bCBF = !!numSig << tuDepth;
		//設置CBF
        cu.setCbfSubParts(bCBF, TEXT_LUMA, absPartIdx, fullDepth);
		//根據fecn和recon來計算sse失真
        fullCost.distortion = primitives.cu[sizeIdx].sse_pp(reconQt, reconQtStride, fenc, stride);
		/*
			到這裏已經計算了嚴格意義上的distortion(fenc , recon)
		*/

		//重置bits
        m_entropyCoder.resetBits();
        if (!absPartIdx)
        {
			//若非Islice
            if (!cu.m_slice->isIntra())
            {
				//若允許旁路trans和quan,則編碼bypass flag
                if (cu.m_slice->m_pps->bTransquantBypassEnabled)
                    m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
				//編碼skip flag
                m_entropyCoder.codeSkipFlag(cu, 0);
				//編碼幀內預測方向
                m_entropyCoder.codePredMode(cu.m_predMode[0]);
            }
			//編碼partSize
            m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
        }

		/* 編碼幀內預測方向 */
		//若當前CU爲SIZE_2Nx2N,則只需要編碼一個方向
        if (cu.m_partSize[0] == SIZE_2Nx2N)
        {
            if (!absPartIdx)
                m_entropyCoder.codeIntraDirLumaAng(cu, 0, false);
        }
		//若非SIZE_2Nx2N,則需要編碼四個PU的方向。。。還沒理清楚
        else
        {
            uint32_t qNumParts = cuGeom.numPartitions >> 2;
			//若initTuDepth = 0
            if (!tuDepth)
            {
                for (uint32_t qIdx = 0; qIdx < 4; ++qIdx)
                    m_entropyCoder.codeIntraDirLumaAng(cu, qIdx * qNumParts, false);
            }
            else if (!(absPartIdx & (qNumParts - 1)))
                m_entropyCoder.codeIntraDirLumaAng(cu, absPartIdx, false);
        }

		//若當前TUsize不是允許的最小size,則編碼subDivFlag = false
        if (log2TrSize != depthRange[0])
            m_entropyCoder.codeTransformSubdivFlag(0, 5 - log2TrSize);

		//編碼cbf
        m_entropyCoder.codeQtCbfLuma(!!numSig, tuDepth);

		//若有cbf,即有殘差,則編碼殘差
        if (cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth))
            m_entropyCoder.codeCoeffNxN(cu, coeffY, absPartIdx, log2TrSize, TEXT_LUMA);

		//得到前面編碼的bits開銷總和
        fullCost.bits = m_entropyCoder.getNumberOfWrittenBits();

		//若開啓了rdPenalty,且TUsize爲32x32,且非Islice,則bits翻四倍
        if (m_param->rdPenalty && log2TrSize == 5 && m_slice->m_sliceType != I_SLICE)
            fullCost.bits *= 4;

		//計算根據distortion(fenc,recon)和全部的編碼bits來計算rdcost和enerpy
        if (m_rdCost.m_psyRd)
        {
            fullCost.energy = m_rdCost.psyCost(sizeIdx, fenc, mode.fencYuv->m_size, reconQt, reconQtStride);
            fullCost.rdcost = m_rdCost.calcPsyRdCost(fullCost.distortion, fullCost.bits, fullCost.energy);
        }
        else if(m_rdCost.m_ssimRd)
        {
            fullCost.energy = m_quant.ssimDistortion(cu, fenc, stride, reconQt, reconQtStride, log2TrSize, TEXT_LUMA, absPartIdx);
            fullCost.rdcost = m_rdCost.calcSsimRdCost(fullCost.distortion, fullCost.bits, fullCost.energy);
        }
        else
            fullCost.rdcost = m_rdCost.calcRdCost(fullCost.distortion, fullCost.bits);
    }
	//if !(mightNotSplit),即一定要split,則fullCcost的rdcost爲max
    else
        fullCost.rdcost = MAX_INT64;
		
	/*
		若可以split,則計算split的cost,即splitCost
	*/
    if (mightSplit)
    {
		//若可以不split,則將之前分析不split的上下文先保存下來,再恢復沒計算split之前的上下文
        if (mightNotSplit)
        {
			//保存熵編碼上下文到rqtTest中
            m_entropyCoder.store(m_rqt[fullDepth].rqtTest);  // save state after full TU encode
            //重新加載rqtRoot的熵編碼上下文
			m_entropyCoder.load(m_rqt[fullDepth].rqtRoot);   // prep state of split encode
        }

        /* code split block */
        uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;

		//是否跳過transForm
        int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && (log2TrSize - 1) <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0];
        if (m_param->bEnableTSkipFast)
            checkTransformSkip &= cu.m_partSize[0] != SIZE_2Nx2N;

        Cost splitCost;
        uint32_t cbf = 0;
		//遍歷四個TU
        for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
        {
			//遞歸調用對四個split出來的TU進行殘差編碼
            if (checkTransformSkip)
                codeIntraLumaTSkip(mode, cuGeom, tuDepth + 1, qPartIdx, splitCost);
            else
                codeIntraLumaQT(mode, cuGeom, tuDepth + 1, qPartIdx, bAllowSplit, splitCost, depthRange);
			//merge四個TU的cbf
            cbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1);
        }
		//cbf[plane][absPartIdx] 存儲cbf
        cu.m_cbf[0][absPartIdx] |= (cbf << tuDepth);

		//若可以不split,且TUsize不是所允許的最小size
        if (mightNotSplit && log2TrSize != depthRange[0])
        {
            /* If we could have coded this TU depth, include cost of subdiv flag */
			//重置bits
            m_entropyCoder.resetBits();
			//編碼subDivFlag = true
            m_entropyCoder.codeTransformSubdivFlag(1, 5 - log2TrSize);
			//累加subDivFlag的bits
            splitCost.bits += m_entropyCoder.getNumberOfWrittenBits();

			//計算rdcost
            if (m_rdCost.m_psyRd)
                splitCost.rdcost = m_rdCost.calcPsyRdCost(splitCost.distortion, splitCost.bits, splitCost.energy);
            else if(m_rdCost.m_ssimRd)
                splitCost.rdcost = m_rdCost.calcSsimRdCost(splitCost.distortion, splitCost.bits, splitCost.energy);
            else
                splitCost.rdcost = m_rdCost.calcRdCost(splitCost.distortion, splitCost.bits);
        }	//end of if (mightNotSplit && log2TrSize != depthRange[0])

		/*
			對比notSplit和split的cost,最優存儲兩者較優的
			rdcost、distortion、bits、enery、transform、cbf等信息
		*/
		//若split的rdcost < full的rdcost,則更新,return結束
        if (splitCost.rdcost < fullCost.rdcost)
        {
            outCost.rdcost     += splitCost.rdcost;
            outCost.distortion += splitCost.distortion;
            outCost.bits       += splitCost.bits;
            outCost.energy     += splitCost.energy;
            return;
        }
		//若full的rdcost < split的rdcost
        else
        {
            // recover entropy state of full-size TU encode 恢復notSplit的上下文
            m_entropyCoder.load(m_rqt[fullDepth].rqtTest);

            // recover transform index and Cbf values 恢復transform indx 和 cbf
            cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth);
            cu.setCbfSubParts(bCBF, TEXT_LUMA, absPartIdx, fullDepth);
            cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
        }
    }	//end of if (mightSplit)

    /* set reconstruction for next intra prediction blocks if full TU prediction won
		若最後notSplit較優(split優的話執行不到這裏),恢復recon的YUV數據,並存儲下來 */
    PicYuv*  reconPic = m_frame->m_reconPic;
    pixel*   picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
    intptr_t picStride = reconPic->m_stride;
    primitives.cu[sizeIdx].copy_pp(picReconY, picStride, reconQt, reconQtStride);

	//結算cost
    outCost.rdcost     += fullCost.rdcost;
    outCost.distortion += fullCost.distortion;
    outCost.bits       += fullCost.bits;
    outCost.energy     += fullCost.energy;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章