LookaheadTLD::calcAdaptiveQuantFrame()

/*
	aqMode有如下四種mode：
		X265_AQ_NONE                 
		X265_AQ_VARIANCE             
		X265_AQ_AUTO_VARIANCE        
		X265_AQ_AUTO_VARIANCE_BIASED 
		X265_AQ_EDGE
	函數根據不同aqMode來計算一個adaptive quantization偏移量qp_adj
	用於後期CU的量化值計算

	過程：
	1.基於aq的最小CU大小來初始化blockCount、modeOneConst、modeTwoConst、modeOneConst
	2.取當前幀的量化偏移量向量quantOffsets
	3.初始化當前幀的低分辨率wp_ssd和wp_sum
	4.若沒有bStatRead || 沒有開啓宏塊樹 || 當前幀不是參考幀，則
		·若無aqMode || param中aq強度爲0
			1.若有aqMode && param中aq強度爲0，則初始化qpCuTreeOffset[]、qpAqOffset[]和invQscaleFactor[]
				·若當前幀的量化偏移量向量quantOffsets[]有數據，則基於quantOffsets[]進行初始化它們
				·否則qpCuTreeOffset[]、qpAqOffset[]初始化爲0，invQscaleFactor[]爲256
			2.若開啓了權重預測 || 權重雙向預測，則遍歷每個block，累計他們YUV的ssd和sum到低分辨率wp_ssd和wp_sum中
		·否則，既有aqMode，param中aq強度又非0
			·若開啓了hevcAq方法，則執行
			·否則
				1.申請邊緣圖像、高斯圖像、theta圖像的內存，並對他們進行初始化
				2.若aqMode爲EDGE，則edgeFilter
				3.計算aq強度strength
					·若aqMode爲AUTO_VARIANCE、AUTO_VARIANCE_BIASED、EDGE，則
						1.遍歷每一個block，計算該block的YUV方差總和energy
							2.初始化qp_adj
								·若aqMode爲EDGE
									1.計算該block的邊緣圖edgeImage、theta圖、邊緣角度avgAngle、並得到其邊緣密度edgeDensity
									2.初始化qp_adj和edgeInclined[]
										1.若有edgeDensity，則基於edgeDensity來初始化qp_adj，並根據邊緣角度avgAngle是否在[30, 60]、[120, 150]範圍內初始化edgeInclined[]
										2.若無，則基於YUV方差總和energy來初始化qp_adj，並將所有的edgeInclined[]都初始化爲0
								·否則，基於YUV方差總和energy初始化qp_adj
							3.將宏塊的qp_adj存儲到qpCuTreeOffset中
						2.基於幀內每一塊block的qp_adj，計算幀的qp_adj均值avg_adj，qp_adj平方的均值avg_adj_pow2
						3.aq強度 = avg_adj * param中的aq強度
						4.bias aq強度 = param中的aq強度
					·否則，aqMode爲VARIANCE，aq強度簡單的由param中的aq強度計算而來
				4.遍歷每一個block
					1.重新計算qp_adj
						·若aqMode爲AUTO_VARIANCE_BIASED，則基於aq強度、均值avg_adj、bias aq強度來重新計算block的qp_adj
						·若aqMode爲AUTO_VARIANCE，則基於aq強度、均值avg_adj來重新計算block的qp_adj
						·若aqMode爲EDGE，則基於邊緣傾斜edgeInclined、aq強度、均值avg_adj來重新計算block的qp_adj
						·若aqMode爲VARIANCE，則重新計算block的YUV方差總和energy，並基於energy和aq強度來重新計算block的qp_adj
					2.若開啓了HDR/WCG的亮度/色度偏移，則微調qp_adj
					3.若當前幀有量化偏移quantOffsets，則將其累加到qp_adj
					4.將最終計算得到的qp_adj存儲到qpAqOffset[]、qpCuTreeOffset[]，並將其轉化成qscale存儲到invQscaleFactor[]中
				5.若aq的最小CUsize爲8x8，則基於invQscaleFactor計算invQscaleFactor8x8
	5.若開啓了權重預測 || 權重雙向預測
		1.若有bStatRead  && 開啓cuTree && 當前幀是參考幀，則遍歷每個block，計算它們YUV的sum和ssd到低分辨率wp_ssd和wp_sum中
		2.遍歷三個plane，最終基於wp_ssd和wp_sum重新計算wp_ssd
	6.若開啓了塊級動態inter優化bDynamicRefine || 區域漸入bEnableFades
		1.遍歷每個block，計算block的YUV方差和到blockVariance中
		2.基於每個block的YUV方差和，做均值計算，得到幀的方差frameVariance
*/
void LookaheadTLD::calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param)
{
    /* Actual adaptive quantization */
    int maxCol = curFrame->m_fencPic->m_picWidth;
    int maxRow = curFrame->m_fencPic->m_picHeight;
    int blockCount, loopIncr;
    float modeOneConst, modeTwoConst;

	// 基於aq的最小CU大小來初始化blockCount、modeOneConst、modeTwoConst、modeOneConst
    if (param->rc.qgSize == 8)
    {
        blockCount = curFrame->m_lowres.maxBlocksInRowFullRes * curFrame->m_lowres.maxBlocksInColFullRes;
        modeOneConst = 11.427f;
        modeTwoConst = 8.f;
        loopIncr = 8;
    }
    else
    {
        blockCount = widthInCU * heightInCU;
        modeOneConst = 14.427f;
        modeTwoConst = 11.f;
        loopIncr = 16;
    }

	// 取當前幀的量化偏移量向量
    float* quantOffsets = curFrame->m_quantOffsets;
	
	// 初始化低分辨率權重預測的ssd和sum
    for (int y = 0; y < 3; y++)
    {
        curFrame->m_lowres.wp_ssd[y] = 0;
        curFrame->m_lowres.wp_sum[y] = 0;
    }

	// 若 沒有數據載入 || 沒開啓宏塊樹 || 當前幀不是參考幀
    if (!(param->rc.bStatRead && param->rc.cuTree && IS_REFERENCED(curFrame)))
    {
        /* Calculate Qp offset for each 16x16 or 8x8 block in the frame */
		// 若無aqMode || aq強度爲0
        if (param->rc.aqMode == X265_AQ_NONE || param->rc.aqStrength == 0)
        {
			// 若有aqMode && aq強度爲0
            if (param->rc.aqMode && param->rc.aqStrength == 0)
            {
				/* 初始化qpCuTreeOffset和invQscaleFactor */
				// 若當前幀的量化偏移量非0
                if (quantOffsets)
                {
					// 遍歷每一個低分辨率CU
                    for (int cuxy = 0; cuxy < blockCount; cuxy++)
                    {
						// 取當前CU的quantOffsets給qpCuTreeOffset、qpAqOffset、invQscaleFactor
						// 其中invQscaleFactor用於CU satd到aq satd的轉化
                        curFrame->m_lowres.qpCuTreeOffset[cuxy] = curFrame->m_lowres.qpAqOffset[cuxy] = quantOffsets[cuxy];
						curFrame->m_lowres.invQscaleFactor[cuxy] = x265_exp2fix8(curFrame->m_lowres.qpCuTreeOffset[cuxy]);
                    }
                }
				// 若無quantOffsets則將qpCuTreeOffset、qpAqOffset初始化爲0
				// invQscaleFactor初始化爲256
                else
                {
                    memset(curFrame->m_lowres.qpCuTreeOffset, 0, blockCount * sizeof(double));
                    memset(curFrame->m_lowres.qpAqOffset, 0, blockCount * sizeof(double));
                    for (int cuxy = 0; cuxy < blockCount; cuxy++)
                        curFrame->m_lowres.invQscaleFactor[cuxy] = 256;
                }
            }

            /* Need variance data for weighted prediction and dynamic refinement*/
			// 若開啓的權重預測 || 權重雙向預測
            if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
            {
				// 遍歷每個CU，計算他們的像素差之和sum，以及像素差的平方和ssd
				// 存儲到wp_sum[plane]和wp_ssd[plane]中
                for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
                    for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
                        acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
            }
        }
		// 若即有aq mode，又有aq強度
        else
        {
			// 若開啓了hevcAq，一種新的aq方法
            if (param->rc.hevcAq)
            {
                // New method for calculating variance and qp offset
                xPreanalyze(curFrame);
            }
			// 沒開啓
            else
            {
#define AQ_EDGE_BIAS 0.5
#define EDGE_INCLINATION 45
                uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;
                int maxHeight = numCuInHeight * param->maxCUSize;
                intptr_t stride = curFrame->m_fencPic->m_stride;

				// 申請邊緣圖像、高斯圖像、theta圖像的內存，並對他們進行初始化
                pixel *edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
                pixel *gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
                pixel *thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
				memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
                memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
                memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
                // 若碼控的aqMode爲X265_AQ_EDGE
				if (param->rc.aqMode == X265_AQ_EDGE)
                    edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);

                int blockXY = 0, inclinedEdge = 0;
                double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
                double bias_strength = 0.f;
                double strength = 0.f;
				// 若aqMode是X265_AQ_AUTO_VARIANCE、X265_AQ_AUTO_VARIANCE_BIASED、X265_AQ_EDGE
                if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE)
                {
                    double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
					// 遍歷每一block行
                    for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
                    {
						// 遍歷block行中的每一個block
                        for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
                        {
                            uint32_t energy, edgeDensity, avgAngle;
							// 得到當前CU的3個plane的方差和
                            energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
                            // 若aq mode爲X265_AQ_EDGE
							if (param->rc.aqMode == X265_AQ_EDGE)
                            {
								// 得到edge圖和edgeTheta圖
                                pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
                                pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;
                                // 計算當前CU的邊界密度
								edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
                                // 若邊界密度非0，則使用邊界密度計算qp_adj
								if (edgeDensity)
                                {
                                    qp_adj = pow(edgeDensity * bit_depth_correction + 1, 0.1);
                                    //Increasing the QP of a block if its edge orientation lies around the multiples of 45 degree
									// 若avgAngle在[30, 60]度、[120, 150]度之間，則記錄當前CU爲edgeInclined
                                    if ((avgAngle >= EDGE_INCLINATION - 15 && avgAngle <= EDGE_INCLINATION + 15) || (avgAngle >= EDGE_INCLINATION + 75 && avgAngle <= EDGE_INCLINATION + 105))
                                        curFrame->m_lowres.edgeInclined[blockXY] = 1;
                                    else
                                        curFrame->m_lowres.edgeInclined[blockXY] = 0;
                                }
								// 若邊界密度不可用，則使用YUV方差和來計算qp_adj，並標記edgeInclined爲false
                                else
                                {
                                    qp_adj = pow(energy * bit_depth_correction + 1, 0.1);
                                    curFrame->m_lowres.edgeInclined[blockXY] = 0;
                                }
                            }
							// 若aq mode不是X265_AQ_EDGE，則qp_adj爲(var * bit_depth_correct +1)^0.1
                            else
                                qp_adj = pow(energy * bit_depth_correction + 1, 0.1);

							// 將得到的qp_adj存儲到cuTree
                            curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
							// 累加qp_adj到avg_adj中
                            avg_adj += qp_adj;
							// 累加qp_adj的平方到avg_adj_pow2中
                            avg_adj_pow2 += qp_adj * qp_adj;
                            blockXY++;
                        }
                    }	// 結束CU的遍歷

					// 計算一幀中所有CU的qp_adj和avg_adj_pow2的均值
                    avg_adj /= blockCount;
                    avg_adj_pow2 /= blockCount;
					// strength =  param中設置的強度 * avg_adj
                    strength = param->rc.aqStrength * avg_adj;
                    avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - modeTwoConst) / avg_adj;
					// 強度偏移量 = param中設置的強度
                    bias_strength = param->rc.aqStrength;
                }
				// 若aq mode爲X265_AQ_VARIANCE
                else
                    strength = param->rc.aqStrength * 1.0397f;

                X265_FREE(edgePic);
                X265_FREE(gaussianPic);
                X265_FREE(thetaPic);

				/* 重新遍歷每個block */
                blockXY = 0;
                for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
                {
                    for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
                    {
						// X265_AQ_AUTO_VARIANCE_BIASED，相比AUTO_VARIANCE多了個bias偏移量罷了
                        if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
                        {
                            qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
                            qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - modeTwoConst / (qp_adj * qp_adj));
                        }
						// X265_AQ_AUTO_VARIANCE
                        else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
                        {
                            qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
                            qp_adj = strength * (qp_adj - avg_adj);
                        }
						// X265_AQ_EDGE，只有當邊緣角度在[30,60] [120,150]之內纔有變化，其餘都與AUTO_VARIANCE同
                        else if (param->rc.aqMode == X265_AQ_EDGE)
                        {
                            inclinedEdge = curFrame->m_lowres.edgeInclined[blockXY];
                            qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
                            if(inclinedEdge && (qp_adj - avg_adj > 0))
                                qp_adj = ((strength + AQ_EDGE_BIAS) * (qp_adj - avg_adj));
                            else
                                qp_adj = strength * (qp_adj - avg_adj);
                        }
						// X265_AQ_VARIANCE，僅根據CU的YUV方差來計算，不考慮其與全幀的關係
                        else
                        {
                            uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
                            qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (modeOneConst + 2 * (X265_DEPTH - 8)));
                        }

						// 若開啓了HDR/WCG的亮度/色度偏移
                        if (param->bHDROpt)
                        {
							// 得到當前block的luma的ssd和sum
                            uint32_t sum = lumaSumCu(curFrame, blockX, blockY, param->rc.qgSize);
                            uint32_t lumaAvg = sum / (loopIncr * loopIncr);
							// 基於lumaAvg微調qp_adj
							if (lumaAvg < 301)
                                qp_adj += 3;
                            else if (lumaAvg >= 301 && lumaAvg < 367)
                                qp_adj += 2;
                            else if (lumaAvg >= 367 && lumaAvg < 434)
                                qp_adj += 1;
                            else if (lumaAvg >= 501 && lumaAvg < 567)
                                qp_adj -= 1;
                            else if (lumaAvg >= 567 && lumaAvg < 634)
                                qp_adj -= 2;
                            else if (lumaAvg >= 634 && lumaAvg < 701)
                                qp_adj -= 3;
                            else if (lumaAvg >= 701 && lumaAvg < 767)
                                qp_adj -= 4;
                            else if (lumaAvg >= 767 && lumaAvg < 834)
                                qp_adj -= 5;
                            else if (lumaAvg >= 834)
                                qp_adj -= 6;
                        }

						// 若當前幀有量化偏移量，則累加到qp_adj中
                        if (quantOffsets != NULL)
                            qp_adj += quantOffsets[blockXY];

						// 存儲qp_adj到qpAqOffset、qpCuTreeOffset中
                        curFrame->m_lowres.qpAqOffset[blockXY] = qp_adj;
                        curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
						// 基於qp_adj計算invQscaleFactor
                        curFrame->m_lowres.invQscaleFactor[blockXY] = x265_exp2fix8(qp_adj);
                        blockXY++;
                    }
                } // end of block的遍歷
            }
        } // end of 若即有aq mode，又有aq強度

		// 若aq所允許的最小CU單元爲8x8
        if (param->rc.qgSize == 8)
        {
			// 遍歷每個CU，計算invQscaleFactor8x8
            for (int cuY = 0; cuY < heightInCU; cuY++)
            {
                for (int cuX = 0; cuX < widthInCU; cuX++)
                {
                    const int cuXY = cuX + cuY * widthInCU;
                    curFrame->m_lowres.invQscaleFactor8x8[cuXY] = 
						(curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4] +
                        curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + 1] +
                        curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] +
                        curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4;
                }
            }
        }
    } // end of 若 沒有數據載入 || 沒開啓宏塊樹 || 當前幀不是參考幀

	// 若允許權重預測 || 權重雙向預測
    if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
    {
		// 若有數據讀入 && 開啓了cuTree && 當前幀是參考幀
        if (param->rc.bStatRead && param->rc.cuTree && IS_REFERENCED(curFrame))
        {
			// 遍歷每個CU，將CU的sum和ssd累計到wp_sum和wp_ssd
            for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
                for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
                    acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
        }

        int hShift = CHROMA_H_SHIFT(param->internalCsp);
        int vShift = CHROMA_V_SHIFT(param->internalCsp);
        maxCol = ((maxCol + 8) >> 4) << 4;
        maxRow = ((maxRow + 8) >> 4) << 4;
        int width[3]  = { maxCol, maxCol >> hShift, maxCol >> hShift };
        int height[3] = { maxRow, maxRow >> vShift, maxRow >> vShift };

		// 遍歷3個plane
        for (int i = 0; i < 3; i++)
        {
            uint64_t sum, ssd;
			// 取低分辨率幀的sum和ssd，這兩個數據在acEnergyCu中計算過
            sum = curFrame->m_lowres.wp_sum[i];
            ssd = curFrame->m_lowres.wp_ssd[i];
			// 重新計算ssd
            curFrame->m_lowres.wp_ssd[i] = ssd - (sum * sum + (width[i] * height[i]) / 2) / (width[i] * height[i]);
        }
    }

	// 若開啓了塊級動態inter優化 || 區域漸入
    if (param->bDynamicRefine || param->bEnableFades)
    {
        uint64_t blockXY = 0, rowVariance = 0;
        curFrame->m_lowres.frameVariance = 0;
		// 遍歷每個CU
        for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
        {
            for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
            {
				// 計算CU的YUV方差總和，並存儲到blockVariance中
                curFrame->m_lowres.blockVariance[blockXY] = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
                // 累加到行方差rowVariance中
				rowVariance += curFrame->m_lowres.blockVariance[blockXY];
                blockXY++;
            }
			// 幀方差frameVariance爲所有CU方差blockVariance的均值
            curFrame->m_lowres.frameVariance += (rowVariance / maxCol);
        }
        curFrame->m_lowres.frameVariance /= maxRow;
    }
}
LookaheadTLD::calcAdaptiveQuantFrame()

LookaheadTLD::calcAdaptiveQuantFrame()

LookaheadTLD::lowresIntraEstimate()

CostEstimateGroup::estimateCUCost()

x265多線程-線程/線程池

x265多線程-event

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結