FAAC編碼代碼流程圖
通用的AAC編碼系統框圖(偷來的圖)
對比可以發現,其實FAAC的編碼和一般的AAC編碼大致一致,主要包括心理聲學模型處理部分和量化處理兩部分,還包括一些優化處理的過程。實現的源碼+註釋:
int FAACAPI faacEncEncode(faacEncHandle hEncoder,
int32_t *inputBuffer,
unsigned int samplesInput,
unsigned char *outputBuffer,
unsigned int bufferSize
)
{
unsigned int channel, i;
int sb, frameBytes;
unsigned int offset;
BitStream *bitStream; /* bitstream used for writing the frame to */
TnsInfo *tnsInfo_for_LTP;
TnsInfo *tnsDecInfo;
#ifdef DRM
int desbits, diff;
double fix;
#endif
/* local copy's of parameters */
ChannelInfo *channelInfo = hEncoder->channelInfo;
CoderInfo *coderInfo = hEncoder->coderInfo;
unsigned int numChannels = hEncoder->numChannels;
unsigned int sampleRate = hEncoder->sampleRate;
unsigned int aacObjectType = hEncoder->config.aacObjectType;
unsigned int mpegVersion = hEncoder->config.mpegVersion;
unsigned int useLfe = hEncoder->config.useLfe;
unsigned int useTns = hEncoder->config.useTns;
unsigned int allowMidside = hEncoder->config.allowMidside;
unsigned int bandWidth = hEncoder->config.bandWidth;
unsigned int shortctl = hEncoder->config.shortctl;
/* Increase frame number */
hEncoder->frameNum++;
if (samplesInput == 0)
hEncoder->flushFrame++;
/* After 4 flush frames all samples have been encoded,
return 0 bytes written */
if (hEncoder->flushFrame > 4)
return 0;
/* Determine the channel configuration */
GetChannelInfo(channelInfo, numChannels, useLfe);
/* Update current sample buffers */
for (channel = 0; channel < numChannels; channel++)
{
double *tmp;
if (hEncoder->sampleBuff[channel]) {
for(i = 0; i < FRAME_LEN; i++) {
hEncoder->ltpTimeBuff[channel][i] = hEncoder->sampleBuff[channel][i];
}
}
if (hEncoder->nextSampleBuff[channel]) {
for(i = 0; i < FRAME_LEN; i++) {
hEncoder->ltpTimeBuff[channel][FRAME_LEN + i] =
hEncoder->nextSampleBuff[channel][i];
}
}
if (!hEncoder->sampleBuff[channel])
hEncoder->sampleBuff[channel] = (double*)AllocMemory(FRAME_LEN*sizeof(double));
tmp = hEncoder->sampleBuff[channel];
hEncoder->sampleBuff[channel] = hEncoder->nextSampleBuff[channel];
hEncoder->nextSampleBuff[channel] = hEncoder->next2SampleBuff[channel];
hEncoder->next2SampleBuff[channel] = hEncoder->next3SampleBuff[channel];
hEncoder->next3SampleBuff[channel] = tmp;
if (samplesInput == 0)
{
/* start flushing*/
for (i = 0; i < FRAME_LEN; i++)
hEncoder->next3SampleBuff[channel][i] = 0.0;
}
else
{
int samples_per_channel = samplesInput/numChannels;
/* handle the various input formats and channel remapping */
switch( hEncoder->config.inputFormat )
{
case FAAC_INPUT_16BIT:
{
short *input_channel = (short*)inputBuffer + hEncoder->config.channel_map[channel];
for (i = 0; i < samples_per_channel; i++)
{
hEncoder->next3SampleBuff[channel][i] = (double)*input_channel;
input_channel += numChannels;
}
}
break;
case FAAC_INPUT_32BIT:
{
int32_t *input_channel = (int32_t*)inputBuffer + hEncoder->config.channel_map[channel];
for (i = 0; i < samples_per_channel; i++)
{
hEncoder->next3SampleBuff[channel][i] = (1.0/256) * (double)*input_channel;
input_channel += numChannels;
}
}
break;
case FAAC_INPUT_FLOAT:
{
float *input_channel = (float*)inputBuffer + hEncoder->config.channel_map[channel];
for (i = 0; i < samples_per_channel; i++)
{
hEncoder->next3SampleBuff[channel][i] = (double)*input_channel;
input_channel += numChannels;
}
}
break;
default:
return -1; /* invalid input format */
break;
}
for (i = (int)(samplesInput/numChannels); i < FRAME_LEN; i++)
hEncoder->next3SampleBuff[channel][i] = 0.0;
}
/* Psychoacoustics */
/* Update buffers and run FFT on new samples */
/* LFE psychoacoustic can run without it */
if (!channelInfo[channel].lfe || channelInfo[channel].cpe)
{
// 心理聲學模型的緩衝區更新, 計算當前幀能量值
hEncoder->psymodel->PsyBufferUpdate(
&hEncoder->fft_tables,
&hEncoder->gpsyInfo,
&hEncoder->psyInfo[channel],
hEncoder->next3SampleBuff[channel],
bandWidth,
hEncoder->srInfo->cb_width_short,
hEncoder->srInfo->num_cb_short);
}
}
if (hEncoder->frameNum <= 3) /* Still filling up the buffers */
return 0;
// 內部調用實現檢測瞬變信號, 判斷長短塊
/* Psychoacoustics */
hEncoder->psymodel->PsyCalculate(channelInfo, &hEncoder->gpsyInfo, hEncoder->psyInfo,
hEncoder->srInfo->cb_width_long, hEncoder->srInfo->num_cb_long,
hEncoder->srInfo->cb_width_short,
hEncoder->srInfo->num_cb_short, numChannels);
// 長短塊切換
hEncoder->psymodel->BlockSwitch(coderInfo, hEncoder->psyInfo, numChannels);
/* force block type */
if (shortctl == SHORTCTL_NOSHORT)
{
for (channel = 0; channel < numChannels; channel++)
{
coderInfo[channel].block_type = ONLY_LONG_WINDOW;
}
}
if (shortctl == SHORTCTL_NOLONG)
{
for (channel = 0; channel < numChannels; channel++)
{
coderInfo[channel].block_type = ONLY_SHORT_WINDOW;
}
}
/* AAC Filterbank, MDCT with overlap and add */
for (channel = 0; channel < numChannels; channel++) {
int k;
FilterBank(hEncoder,
&coderInfo[channel],
hEncoder->sampleBuff[channel],
hEncoder->freqBuff[channel],
hEncoder->overlapBuff[channel],
MOVERLAPPED);
if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
for (k = 0; k < 8; k++) {
specFilter(hEncoder->freqBuff[channel]+k*BLOCK_LEN_SHORT,
sampleRate, bandWidth, BLOCK_LEN_SHORT);
}
} else {
specFilter(hEncoder->freqBuff[channel], sampleRate,
bandWidth, BLOCK_LEN_LONG);
}
}
/* TMP: Build sfb offset table and other stuff */
for (channel = 0; channel < numChannels; channel++) {
channelInfo[channel].msInfo.is_present = 0;
if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
coderInfo[channel].max_sfb = hEncoder->srInfo->num_cb_short;
coderInfo[channel].nr_of_sfb = hEncoder->srInfo->num_cb_short;
coderInfo[channel].num_window_groups = 1;
coderInfo[channel].window_group_length[0] = 8;
coderInfo[channel].window_group_length[1] = 0;
coderInfo[channel].window_group_length[2] = 0;
coderInfo[channel].window_group_length[3] = 0;
coderInfo[channel].window_group_length[4] = 0;
coderInfo[channel].window_group_length[5] = 0;
coderInfo[channel].window_group_length[6] = 0;
coderInfo[channel].window_group_length[7] = 0;
offset = 0;
for (sb = 0; sb < coderInfo[channel].nr_of_sfb; sb++) {
coderInfo[channel].sfb_offset[sb] = offset;
offset += hEncoder->srInfo->cb_width_short[sb];
}
coderInfo[channel].sfb_offset[coderInfo[channel].nr_of_sfb] = offset;
} else {
coderInfo[channel].max_sfb = hEncoder->srInfo->num_cb_long;
coderInfo[channel].nr_of_sfb = hEncoder->srInfo->num_cb_long;
coderInfo[channel].num_window_groups = 1;
coderInfo[channel].window_group_length[0] = 1;
offset = 0;
for (sb = 0; sb < coderInfo[channel].nr_of_sfb; sb++) {
coderInfo[channel].sfb_offset[sb] = offset;
offset += hEncoder->srInfo->cb_width_long[sb];
}
coderInfo[channel].sfb_offset[coderInfo[channel].nr_of_sfb] = offset;
}
}
/* Perform TNS analysis and filtering */
for (channel = 0; channel < numChannels; channel++) {
if ((!channelInfo[channel].lfe) && (useTns)) {
TnsEncode(&(coderInfo[channel].tnsInfo),
coderInfo[channel].max_sfb,
coderInfo[channel].max_sfb,
(WINDOW_TYPE)coderInfo[channel].block_type,
coderInfo[channel].sfb_offset,
hEncoder->freqBuff[channel]);
} else {
coderInfo[channel].tnsInfo.tnsDataPresent = 0; /* TNS not used for LFE */
}
}
for(channel = 0; channel < numChannels; channel++)
{
if((coderInfo[channel].tnsInfo.tnsDataPresent != 0) && (useTns))
tnsInfo_for_LTP = &(coderInfo[channel].tnsInfo);
else
tnsInfo_for_LTP = NULL;
if(channelInfo[channel].present && (!channelInfo[channel].lfe) &&
(coderInfo[channel].block_type != ONLY_SHORT_WINDOW) &&
(mpegVersion == MPEG4) && (aacObjectType == LTP))
{
LtpEncode(hEncoder,
&coderInfo[channel],
&(coderInfo[channel].ltpInfo),
tnsInfo_for_LTP,
hEncoder->freqBuff[channel],
hEncoder->ltpTimeBuff[channel]);
} else {
coderInfo[channel].ltpInfo.global_pred_flag = 0;
}
}
for(channel = 0; channel < numChannels; channel++)
{
if ((aacObjectType == MAIN) && (!channelInfo[channel].lfe)) {
int numPredBands = min(coderInfo[channel].max_pred_sfb, coderInfo[channel].nr_of_sfb);
PredCalcPrediction(hEncoder->freqBuff[channel],
coderInfo[channel].requantFreq,
coderInfo[channel].block_type,
numPredBands,
(coderInfo[channel].block_type==ONLY_SHORT_WINDOW)?
hEncoder->srInfo->cb_width_short:hEncoder->srInfo->cb_width_long,
coderInfo,
channelInfo,
channel);
} else {
coderInfo[channel].pred_global_flag = 0;
}
}
for (channel = 0; channel < numChannels; channel++) {
if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
SortForGrouping(&coderInfo[channel],
&hEncoder->psyInfo[channel],
&channelInfo[channel],
hEncoder->srInfo->cb_width_short,
hEncoder->freqBuff[channel]);
}
CalcAvgEnrg(&coderInfo[channel], hEncoder->freqBuff[channel]);
// reduce LFE bandwidth
if (!channelInfo[channel].cpe && channelInfo[channel].lfe)
{
coderInfo[channel].nr_of_sfb = coderInfo[channel].max_sfb = 3;
}
}
MSEncode(coderInfo, channelInfo, hEncoder->freqBuff, numChannels, allowMidside);
for (channel = 0; channel < numChannels; channel++)
{
CalcAvgEnrg(&coderInfo[channel], hEncoder->freqBuff[channel]);
}
#ifdef DRM
/* loop the quantization until the desired bit-rate is reached */
diff = 1; /* to enter while loop */
hEncoder->aacquantCfg.quality = 120; /* init quality setting */
while (diff > 0) { /* if too many bits, do it again */
#endif
/* Quantize and code the signal */
for (channel = 0; channel < numChannels; channel++) {
if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) {
AACQuantize(&coderInfo[channel], &hEncoder->psyInfo[channel],
&channelInfo[channel], hEncoder->srInfo->cb_width_short,
hEncoder->srInfo->num_cb_short, hEncoder->freqBuff[channel],
&(hEncoder->aacquantCfg));
} else {
AACQuantize(&coderInfo[channel], &hEncoder->psyInfo[channel],
&channelInfo[channel], hEncoder->srInfo->cb_width_long,
hEncoder->srInfo->num_cb_long, hEncoder->freqBuff[channel],
&(hEncoder->aacquantCfg));
}
}
#ifdef DRM
/* Write the AAC bitstream */
bitStream = OpenBitStream(bufferSize, outputBuffer);
WriteBitstream(hEncoder, coderInfo, channelInfo, bitStream, numChannels);
/* Close the bitstream and return the number of bytes written */
frameBytes = CloseBitStream(bitStream);
/* now calculate desired bits and compare with actual encoded bits */
desbits = (int) ((double) numChannels * (hEncoder->config.bitRate * FRAME_LEN)
/ hEncoder->sampleRate);
diff = ((frameBytes - 1 /* CRC */) * 8) - desbits;
/* do linear correction according to relative difference */
fix = (double) desbits / ((frameBytes - 1 /* CRC */) * 8);
/* speed up convergence. A value of 0.92 gives approx up to 10 iterations */
if (fix > 0.92)
fix = 0.92;
hEncoder->aacquantCfg.quality *= fix;
/* quality should not go lower than 1, set diff to exit loop */
if (hEncoder->aacquantCfg.quality <= 1)
diff = -1;
}
#endif
// fix max_sfb in CPE mode
for (channel = 0; channel < numChannels; channel++)
{
if (channelInfo[channel].present
&& (channelInfo[channel].cpe)
&& (channelInfo[channel].ch_is_left))
{
CoderInfo *cil, *cir;
cil = &coderInfo[channel];
cir = &coderInfo[channelInfo[channel].paired_ch];
cil->max_sfb = cir->max_sfb = max(cil->max_sfb, cir->max_sfb);
cil->nr_of_sfb = cir->nr_of_sfb = cil->max_sfb;
}
}
MSReconstruct(coderInfo, channelInfo, numChannels);
for (channel = 0; channel < numChannels; channel++)
{
/* If short window, reconstruction not needed for prediction */
if ((coderInfo[channel].block_type == ONLY_SHORT_WINDOW)) {
int sind;
for (sind = 0; sind < BLOCK_LEN_LONG; sind++) {
coderInfo[channel].requantFreq[sind] = 0.0;
}
} else {
if((coderInfo[channel].tnsInfo.tnsDataPresent != 0) && (useTns))
tnsDecInfo = &(coderInfo[channel].tnsInfo);
else
tnsDecInfo = NULL;
if ((!channelInfo[channel].lfe) && (aacObjectType == LTP)) { /* no reconstruction needed for LFE channel*/
LtpReconstruct(&coderInfo[channel], &(coderInfo[channel].ltpInfo),
coderInfo[channel].requantFreq);
if(tnsDecInfo != NULL)
TnsDecodeFilterOnly(&(coderInfo[channel].tnsInfo), coderInfo[channel].nr_of_sfb,
coderInfo[channel].max_sfb, (WINDOW_TYPE)coderInfo[channel].block_type,
coderInfo[channel].sfb_offset, coderInfo[channel].requantFreq);
IFilterBank(hEncoder, &coderInfo[channel],
coderInfo[channel].requantFreq,
coderInfo[channel].ltpInfo.time_buffer,
coderInfo[channel].ltpInfo.ltp_overlap_buffer,
MOVERLAPPED);
LtpUpdate(&(coderInfo[channel].ltpInfo),
coderInfo[channel].ltpInfo.time_buffer,
coderInfo[channel].ltpInfo.ltp_overlap_buffer,
BLOCK_LEN_LONG);
}
}
}
#ifndef DRM
/* Write the AAC bitstream */
bitStream = OpenBitStream(bufferSize, outputBuffer);
WriteBitstream(hEncoder, coderInfo, channelInfo, bitStream, numChannels);
/* Close the bitstream and return the number of bytes written */
frameBytes = CloseBitStream(bitStream);
/* Adjust quality to get correct average bitrate */
if (hEncoder->config.bitRate)
{
double fix;
int desbits = numChannels * (hEncoder->config.bitRate * FRAME_LEN)
/ hEncoder->sampleRate;
int diff = (frameBytes * 8) - desbits;
hEncoder->bitDiff += diff;
fix = (double)hEncoder->bitDiff / desbits;
fix *= 0.01;
fix = max(fix, -0.2);
fix = min(fix, 0.2);
if (((diff > 0) && (fix > 0.0)) || ((diff < 0) && (fix < 0.0)))
{
hEncoder->aacquantCfg.quality *= (1.0 - fix);
if (hEncoder->aacquantCfg.quality > 300)
hEncoder->aacquantCfg.quality = 300;
if (hEncoder->aacquantCfg.quality < 50)
hEncoder->aacquantCfg.quality = 50;
}
}
#endif
return frameBytes;
}