因爲業務需要,直播推流過程中,pcm的音頻需要從48k採樣率,重採樣到44.1k,然後再編碼程aac,封裝成rtmp推送出去。這裏只描述如何做pcm數據的重採樣。
整個過程中,音頻輸入:PCM/S16/48000/2 重採樣輸出: PCM/S16/441000/2,每一幀輸入是10ms的samples。
重採樣的基本思路是,先用input和output audio格式,來初始化ffmpeg resample, 然後準備好input數據,和output buffer後調用swr_convert函數來做重採樣,具體處理函數如下:
步驟1:定義基本的處理函數和相關結構體
#include <libswresample/swresample.h>
bool initAudioResampler(const Frame&);
void uninitAudioResampler();
bool audioResample(const Frame&);
struct SwrContext *m_audioSwrCtx; //音頻處理context
uint8_t **m_audioSwrSamplesData; //音頻重採樣後輸出緩衝器
int m_audioSwrSamplesLinesize; //
int m_audioSwrSamplesCount; //音頻重採樣後一幀內sample的數目
步驟2: 相關函數定義:
bool initAudioResampler(const Frame& frame)
{
int ret;
m_audioSwrCtx = swr_alloc();
if (!m_audioSwrCtx) {
ELOG_ERROR_T("Could not allocate resampler context");
goto failed;
}
/* set options */
av_opt_set_int (m_audioSwrCtx, "in_channel_count", frame.additionalInfo.audio.channels, 0);
av_opt_set_int (m_audioSwrCtx, "in_sample_rate", frame.additionalInfo.audio.sampleRate, 0);
av_opt_set_sample_fmt(m_audioSwrCtx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_int (m_audioSwrCtx, "out_channel_count", 2, 0);
av_opt_set_int (m_audioSwrCtx, "out_sample_rate", 44100, 0);
av_opt_set_sample_fmt(m_audioSwrCtx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
ret = swr_init(m_audioSwrCtx);
if (ret < 0) {
ELOG_ERROR_T("Fail to initialize the resampling context, %s", ff_err2str(ret));
goto failed;
}
m_audioSwrSamplesCount = 44100 / 100; // 44.1k frame_size for 10ms frame
ret = av_samples_alloc_array_and_samples(&m_audioSwrSamplesData,
&m_audioSwrSamplesLinesize,
2,
m_audioSwrSamplesCount,
AV_SAMPLE_FMT_S16,
0);
if (ret < 0) {
ELOG_ERROR_T("Could not allocate swr samples data, %s", ff_err2str(ret));
goto failed;
}
ELOG_INFO("init audio resampler: input ch:%d, rate:%d, nbSamples:%d",
frame.additionalInfo.audio.channels,
frame.additionalInfo.audio.sampleRate,
frame.additionalInfo.audio.nbSamples);
return true;
failed:
if (m_audioSwrCtx) {
swr_free(&m_audioSwrCtx);
m_audioSwrCtx = NULL;
}
if (m_audioSwrSamplesData) {
av_freep(&m_audioSwrSamplesData[0]);
av_freep(&m_audioSwrSamplesData);
m_audioSwrSamplesData = NULL;
m_audioSwrSamplesLinesize = 0;
}
m_audioSwrSamplesCount = 0;
return false;
}
void uninitAudioResampler()
{
if (m_audioSwrCtx) {
swr_free(&m_audioSwrCtx);
m_audioSwrCtx = NULL;
}
if (m_audioSwrSamplesData) {
av_freep(&m_audioSwrSamplesData[0]);
av_freep(&m_audioSwrSamplesData);
m_audioSwrSamplesData = NULL;
m_audioSwrSamplesLinesize = 0;
}
m_audioSwrSamplesCount = 0;
}
bool audioResample(const Frame &frame)
{
if(frame.additionalInfo.audio.channels == 2 &&
frame.additionalInfo.audio.sampleRate == 44100){
ELOG_DEBUG_T("audioResample: input is already 44100/2/pcm, no need to resample");
return false;
}
if(frame.additionalInfo.audio.channels != 2 &&
frame.additionalInfo.audio.sampleRate != 48000){
ELOG_DEBUG_T("audioResample: input is not 48000/2/pcm, not supported, skip");
return false;
}
if (m_audioSwrCtx == NULL){
if (!initAudioResampler(frame)) {
ELOG_ERROR_T("initAudioResampler failed!");
return false;
}
}
if (m_audioSwrCtx) {
uint8_t* data[64];
data[0] = frame.payload; //這一步很關鍵,frame.payload指向sample的真實存儲位置,而swr_convert中要求是**uint8_t格式參數,需做一個轉換
int ret = swr_convert(m_audioSwrCtx, m_audioSwrSamplesData, m_audioSwrSamplesCount,
(const uint8_t**) data, frame.additionalInfo.audio.nbSamples);
if (ret < 0) {
ELOG_ERROR_T("Error while converting, %s", ff_err2str(ret));
return false;
}
return true;
}
return false;
}