FAAC源碼分析之faacEncEncode

FAAC編碼代碼流程圖


通用的AAC編碼系統框圖(偷來的圖


對比可以發現,其實FAAC的編碼和一般的AAC編碼大致一致,主要包括心理聲學模型處理部分和量化處理兩部分,還包括一些優化處理的過程。實現的源碼+註釋:

int FAACAPI faacEncEncode(faacEncHandle hEncoder,
	int32_t *inputBuffer,
	unsigned int samplesInput,
	unsigned char *outputBuffer,
	unsigned int bufferSize
	)
{
	unsigned int channel, i;
	int sb, frameBytes;
	unsigned int offset;
	BitStream *bitStream; /* bitstream used for writing the frame to */
	TnsInfo *tnsInfo_for_LTP;
	TnsInfo *tnsDecInfo;
#ifdef DRM
	int desbits, diff;
	double fix;
#endif

	/* local copy's of parameters */
	ChannelInfo *channelInfo = hEncoder->channelInfo;
	CoderInfo *coderInfo = hEncoder->coderInfo;
	unsigned int numChannels = hEncoder->numChannels;
	unsigned int sampleRate = hEncoder->sampleRate;
	unsigned int aacObjectType = hEncoder->config.aacObjectType;
	unsigned int mpegVersion = hEncoder->config.mpegVersion;
	unsigned int useLfe = hEncoder->config.useLfe;
	unsigned int useTns = hEncoder->config.useTns;
	unsigned int allowMidside = hEncoder->config.allowMidside;
	unsigned int bandWidth = hEncoder->config.bandWidth;
	unsigned int shortctl = hEncoder->config.shortctl;

	/* Increase frame number */
	hEncoder->frameNum++;

	if (samplesInput == 0)
		hEncoder->flushFrame++;

	/* After 4 flush frames all samples have been encoded,
	return 0 bytes written */
	if (hEncoder->flushFrame > 4)
		return 0;

	/* Determine the channel configuration */
	GetChannelInfo(channelInfo, numChannels, useLfe);

	/* Update current sample buffers */
	for (channel = 0; channel < numChannels; channel++) 
	{
		double *tmp;

		if (hEncoder->sampleBuff[channel]) {
			for(i = 0; i < FRAME_LEN; i++) {
				hEncoder->ltpTimeBuff[channel][i] = hEncoder->sampleBuff[channel][i];
			}
		}
		if (hEncoder->nextSampleBuff[channel]) {
			for(i = 0; i < FRAME_LEN; i++) {
				hEncoder->ltpTimeBuff[channel][FRAME_LEN + i] =
					hEncoder->nextSampleBuff[channel][i];
			}
		}

		if (!hEncoder->sampleBuff[channel])
			hEncoder->sampleBuff[channel] = (double*)AllocMemory(FRAME_LEN*sizeof(double));

		tmp = hEncoder->sampleBuff[channel];

		hEncoder->sampleBuff[channel]		= hEncoder->nextSampleBuff[channel];
		hEncoder->nextSampleBuff[channel]	= hEncoder->next2SampleBuff[channel];
		hEncoder->next2SampleBuff[channel]	= hEncoder->next3SampleBuff[channel];
		hEncoder->next3SampleBuff[channel]	= tmp;

		if (samplesInput == 0)
		{
			/* start flushing*/
			for (i = 0; i < FRAME_LEN; i++)
				hEncoder->next3SampleBuff[channel][i] = 0.0;
		}
		else
		{
			int samples_per_channel = samplesInput/numChannels;

			/* handle the various input formats and channel remapping */
			switch( hEncoder->config.inputFormat )
			{

			case FAAC_INPUT_16BIT:
				{
					short *input_channel = (short*)inputBuffer + hEncoder->config.channel_map[channel];

					for (i = 0; i < samples_per_channel; i++)
					{
						hEncoder->next3SampleBuff[channel][i] = (double)*input_channel;
						input_channel += numChannels;
					}
				}
				break;

			case FAAC_INPUT_32BIT:
				{
					int32_t *input_channel = (int32_t*)inputBuffer + hEncoder->config.channel_map[channel];

					for (i = 0; i < samples_per_channel; i++)
					{
						hEncoder->next3SampleBuff[channel][i] = (1.0/256) * (double)*input_channel;
						input_channel += numChannels;
					}
				}
				break;

			case FAAC_INPUT_FLOAT:
				{
					float *input_channel = (float*)inputBuffer + hEncoder->config.channel_map[channel];

					for (i = 0; i < samples_per_channel; i++)
					{
						hEncoder->next3SampleBuff[channel][i] = (double)*input_channel;
						input_channel += numChannels;
					}
				}
				break;

			default:
				return -1; /* invalid input format */
				break;
			}

			for (i = (int)(samplesInput/numChannels); i < FRAME_LEN; i++)
				hEncoder->next3SampleBuff[channel][i] = 0.0;
		}

		/* Psychoacoustics */
		/* Update buffers and run FFT on new samples */
		/* LFE psychoacoustic can run without it */
		if (!channelInfo[channel].lfe || channelInfo[channel].cpe)
		{
			// 心理聲學模型的緩衝區更新, 計算當前幀能量值
			hEncoder->psymodel->PsyBufferUpdate( 
				&hEncoder->fft_tables, 
				&hEncoder->gpsyInfo, 
				&hEncoder->psyInfo[channel],
				hEncoder->next3SampleBuff[channel], 
				bandWidth,
				hEncoder->srInfo->cb_width_short,
				hEncoder->srInfo->num_cb_short);
		}
	}

	if (hEncoder->frameNum <= 3) /* Still filling up the buffers */
		return 0;

	// 內部調用實現檢測瞬變信號, 判斷長短塊
	/* Psychoacoustics */
	hEncoder->psymodel->PsyCalculate(channelInfo, &hEncoder->gpsyInfo, hEncoder->psyInfo,
		hEncoder->srInfo->cb_width_long, hEncoder->srInfo->num_cb_long,
		hEncoder->srInfo->cb_width_short,
		hEncoder->srInfo->num_cb_short, numChannels);

	// 長短塊切換
	hEncoder->psymodel->BlockSwitch(coderInfo, hEncoder->psyInfo, numChannels);

	/* force block type */
	if (shortctl == SHORTCTL_NOSHORT)
	{
		for (channel = 0; channel < numChannels; channel++)
		{
			coderInfo[channel].block_type = ONLY_LONG_WINDOW;
		}
	}
	if (shortctl == SHORTCTL_NOLONG)
	{
		for (channel = 0; channel < numChannels; channel++)
		{
			coderInfo[channel].block_type = ONLY_SHORT_WINDOW;
		}
	}

	/* AAC Filterbank, MDCT with overlap and add */
	for (channel = 0; channel < numChannels; channel++) {
		int k;

		FilterBank(hEncoder,
			&coderInfo[channel],
			hEncoder->sampleBuff[channel],
			hEncoder->freqBuff[channel],
			hEncoder->overlapBuff[channel],
			MOVERLAPPED);

		if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
			for (k = 0; k < 8; k++) {
				specFilter(hEncoder->freqBuff[channel]+k*BLOCK_LEN_SHORT,
					sampleRate, bandWidth, BLOCK_LEN_SHORT);
			}
		} else {
			specFilter(hEncoder->freqBuff[channel], sampleRate,
				bandWidth, BLOCK_LEN_LONG);
		}
	}

	/* TMP: Build sfb offset table and other stuff */
	for (channel = 0; channel < numChannels; channel++) {
		channelInfo[channel].msInfo.is_present = 0;

		if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
			coderInfo[channel].max_sfb = hEncoder->srInfo->num_cb_short;
			coderInfo[channel].nr_of_sfb = hEncoder->srInfo->num_cb_short;

			coderInfo[channel].num_window_groups = 1;
			coderInfo[channel].window_group_length[0] = 8;
			coderInfo[channel].window_group_length[1] = 0;
			coderInfo[channel].window_group_length[2] = 0;
			coderInfo[channel].window_group_length[3] = 0;
			coderInfo[channel].window_group_length[4] = 0;
			coderInfo[channel].window_group_length[5] = 0;
			coderInfo[channel].window_group_length[6] = 0;
			coderInfo[channel].window_group_length[7] = 0;

			offset = 0;
			for (sb = 0; sb < coderInfo[channel].nr_of_sfb; sb++) {
				coderInfo[channel].sfb_offset[sb] = offset;
				offset += hEncoder->srInfo->cb_width_short[sb];
			}
			coderInfo[channel].sfb_offset[coderInfo[channel].nr_of_sfb] = offset;
		} else {
			coderInfo[channel].max_sfb = hEncoder->srInfo->num_cb_long;
			coderInfo[channel].nr_of_sfb = hEncoder->srInfo->num_cb_long;

			coderInfo[channel].num_window_groups = 1;
			coderInfo[channel].window_group_length[0] = 1;

			offset = 0;
			for (sb = 0; sb < coderInfo[channel].nr_of_sfb; sb++) {
				coderInfo[channel].sfb_offset[sb] = offset;
				offset += hEncoder->srInfo->cb_width_long[sb];
			}
			coderInfo[channel].sfb_offset[coderInfo[channel].nr_of_sfb] = offset;
		}
	}

	/* Perform TNS analysis and filtering */
	for (channel = 0; channel < numChannels; channel++) {
		if ((!channelInfo[channel].lfe) && (useTns)) {
			TnsEncode(&(coderInfo[channel].tnsInfo),
				coderInfo[channel].max_sfb,
				coderInfo[channel].max_sfb,
				(WINDOW_TYPE)coderInfo[channel].block_type,
				coderInfo[channel].sfb_offset,
				hEncoder->freqBuff[channel]);
		} else {
			coderInfo[channel].tnsInfo.tnsDataPresent = 0;      /* TNS not used for LFE */
		}
	}

	for(channel = 0; channel < numChannels; channel++)
	{
		if((coderInfo[channel].tnsInfo.tnsDataPresent != 0) && (useTns))
			tnsInfo_for_LTP = &(coderInfo[channel].tnsInfo);
		else
			tnsInfo_for_LTP = NULL;

		if(channelInfo[channel].present && (!channelInfo[channel].lfe) &&
			(coderInfo[channel].block_type != ONLY_SHORT_WINDOW) &&
			(mpegVersion == MPEG4) && (aacObjectType == LTP))
		{
			LtpEncode(hEncoder,
				&coderInfo[channel],
				&(coderInfo[channel].ltpInfo),
				tnsInfo_for_LTP,
				hEncoder->freqBuff[channel],
				hEncoder->ltpTimeBuff[channel]);
		} else {
			coderInfo[channel].ltpInfo.global_pred_flag = 0;
		}
	}

	for(channel = 0; channel < numChannels; channel++)
	{
		if ((aacObjectType == MAIN) && (!channelInfo[channel].lfe)) {
			int numPredBands = min(coderInfo[channel].max_pred_sfb, coderInfo[channel].nr_of_sfb);
			PredCalcPrediction(hEncoder->freqBuff[channel],
				coderInfo[channel].requantFreq,
				coderInfo[channel].block_type,
				numPredBands,
				(coderInfo[channel].block_type==ONLY_SHORT_WINDOW)?
				hEncoder->srInfo->cb_width_short:hEncoder->srInfo->cb_width_long,
				coderInfo,
				channelInfo,
				channel);
		} else {
			coderInfo[channel].pred_global_flag = 0;
		}
	}

	for (channel = 0; channel < numChannels; channel++) {
		if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
			SortForGrouping(&coderInfo[channel],
				&hEncoder->psyInfo[channel],
				&channelInfo[channel],
				hEncoder->srInfo->cb_width_short,
				hEncoder->freqBuff[channel]);
		}
		CalcAvgEnrg(&coderInfo[channel], hEncoder->freqBuff[channel]);

		// reduce LFE bandwidth
		if (!channelInfo[channel].cpe && channelInfo[channel].lfe)
		{
			coderInfo[channel].nr_of_sfb = coderInfo[channel].max_sfb = 3;
		}
	}

	MSEncode(coderInfo, channelInfo, hEncoder->freqBuff, numChannels, allowMidside);

	for (channel = 0; channel < numChannels; channel++)
	{
		CalcAvgEnrg(&coderInfo[channel], hEncoder->freqBuff[channel]);
	}

#ifdef DRM
	/* loop the quantization until the desired bit-rate is reached */
	diff = 1; /* to enter while loop */
	hEncoder->aacquantCfg.quality = 120; /* init quality setting */
	while (diff > 0) { /* if too many bits, do it again */
#endif
		/* Quantize and code the signal */
		for (channel = 0; channel < numChannels; channel++) {
			if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
				AACQuantize(&coderInfo[channel], &hEncoder->psyInfo[channel],
					&channelInfo[channel], hEncoder->srInfo->cb_width_short,
					hEncoder->srInfo->num_cb_short, hEncoder->freqBuff[channel],
					&(hEncoder->aacquantCfg));
			} else {
				AACQuantize(&coderInfo[channel], &hEncoder->psyInfo[channel],
					&channelInfo[channel], hEncoder->srInfo->cb_width_long,
					hEncoder->srInfo->num_cb_long, hEncoder->freqBuff[channel],
					&(hEncoder->aacquantCfg));
			}
		}

#ifdef DRM
		/* Write the AAC bitstream */
		bitStream = OpenBitStream(bufferSize, outputBuffer);
		WriteBitstream(hEncoder, coderInfo, channelInfo, bitStream, numChannels);

		/* Close the bitstream and return the number of bytes written */
		frameBytes = CloseBitStream(bitStream);

		/* now calculate desired bits and compare with actual encoded bits */
		desbits = (int) ((double) numChannels * (hEncoder->config.bitRate * FRAME_LEN)
			/ hEncoder->sampleRate);

		diff = ((frameBytes - 1 /* CRC */) * 8) - desbits;

		/* do linear correction according to relative difference */
		fix = (double) desbits / ((frameBytes - 1 /* CRC */) * 8);

		/* speed up convergence. A value of 0.92 gives approx up to 10 iterations */
		if (fix > 0.92)
			fix = 0.92;

		hEncoder->aacquantCfg.quality *= fix;

		/* quality should not go lower than 1, set diff to exit loop */
		if (hEncoder->aacquantCfg.quality <= 1)
			diff = -1;
	}
#endif

	// fix max_sfb in CPE mode
	for (channel = 0; channel < numChannels; channel++)
	{
		if (channelInfo[channel].present
			&& (channelInfo[channel].cpe)
			&& (channelInfo[channel].ch_is_left))
		{
			CoderInfo *cil, *cir;

			cil = &coderInfo[channel];
			cir = &coderInfo[channelInfo[channel].paired_ch];

			cil->max_sfb = cir->max_sfb = max(cil->max_sfb, cir->max_sfb);
			cil->nr_of_sfb = cir->nr_of_sfb = cil->max_sfb;
		}
	}

	MSReconstruct(coderInfo, channelInfo, numChannels);

	for (channel = 0; channel < numChannels; channel++)
	{
		/* If short window, reconstruction not needed for prediction */
		if ((coderInfo[channel].block_type == ONLY_SHORT_WINDOW)) {
			int sind;
			for (sind = 0; sind < BLOCK_LEN_LONG; sind++) {
				coderInfo[channel].requantFreq[sind] = 0.0;
			}
		} else {

			if((coderInfo[channel].tnsInfo.tnsDataPresent != 0) && (useTns))
				tnsDecInfo = &(coderInfo[channel].tnsInfo);
			else
				tnsDecInfo = NULL;

			if ((!channelInfo[channel].lfe) && (aacObjectType == LTP)) {  /* no reconstruction needed for LFE channel*/

				LtpReconstruct(&coderInfo[channel], &(coderInfo[channel].ltpInfo),
					coderInfo[channel].requantFreq);

				if(tnsDecInfo != NULL)
					TnsDecodeFilterOnly(&(coderInfo[channel].tnsInfo), coderInfo[channel].nr_of_sfb,
					coderInfo[channel].max_sfb, (WINDOW_TYPE)coderInfo[channel].block_type,
					coderInfo[channel].sfb_offset, coderInfo[channel].requantFreq);

				IFilterBank(hEncoder, &coderInfo[channel],
					coderInfo[channel].requantFreq,
					coderInfo[channel].ltpInfo.time_buffer,
					coderInfo[channel].ltpInfo.ltp_overlap_buffer,
					MOVERLAPPED);

				LtpUpdate(&(coderInfo[channel].ltpInfo),
					coderInfo[channel].ltpInfo.time_buffer,
					coderInfo[channel].ltpInfo.ltp_overlap_buffer,
					BLOCK_LEN_LONG);
			}
		}
	}

#ifndef DRM
	/* Write the AAC bitstream */
	bitStream = OpenBitStream(bufferSize, outputBuffer);

	WriteBitstream(hEncoder, coderInfo, channelInfo, bitStream, numChannels);

	/* Close the bitstream and return the number of bytes written */
	frameBytes = CloseBitStream(bitStream);

	/* Adjust quality to get correct average bitrate */
	if (hEncoder->config.bitRate)
	{
		double fix;
		int desbits = numChannels * (hEncoder->config.bitRate * FRAME_LEN)
			/ hEncoder->sampleRate;
		int diff = (frameBytes * 8) - desbits;

		hEncoder->bitDiff += diff;
		fix = (double)hEncoder->bitDiff / desbits;
		fix *= 0.01;
		fix = max(fix, -0.2);
		fix = min(fix, 0.2);

		if (((diff > 0) && (fix > 0.0)) || ((diff < 0) && (fix < 0.0)))
		{
			hEncoder->aacquantCfg.quality *= (1.0 - fix);
			if (hEncoder->aacquantCfg.quality > 300)
				hEncoder->aacquantCfg.quality = 300;
			if (hEncoder->aacquantCfg.quality < 50)
				hEncoder->aacquantCfg.quality = 50;
		}
	}
#endif

	return frameBytes;
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章