一.初始化解碼器

void QHAudioDecoder::openDecoder(Format *format)
{
    AVCodec *codec = avcodec_find_decoder((AVCodecID)format->codecID);
    if (!codec)
    {
        std::cout <<"audio codec not found"<< std::endl;
    }

    m_codecContext = avcodec_alloc_context3(codec);
    if (!m_codecContext)
    {
        std::cout <<"could not create audio codec"<< std::endl;
    }

    if (format->codecID==AV_CODEC_ID_AAC)
    {
        if(format->extra&&format->extraSize)
        {
            m_codecContext->extradata_size = format->extraSize;
            m_codecContext->extradata = (uint8_t*)av_malloc(format->extraSize + AV_INPUT_BUFFER_PADDING_SIZE);
            if(m_codecContext->extradata)
            {
                memcpy(m_codecContext->extradata, format->extra, format->extraSize);
                memset(m_codecContext->extradata + format->extraSize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
            }
        }
    }

    if (format->codecID==format->codecID==AV_CODEC_ID_PCM_ALAW||AV_CODEC_ID_PCM_MULAW||format->codecID==AV_CODEC_ID_ADPCM_G726)
    {
       
    }

    if (avcodec_open2(m_codecContext,codec,nullptr) < 0)
    {
        std::cout <<"could not open audio codec"<< std::endl;
    }

    m_frame = av_frame_alloc();
    m_dstFrame = av_frame_alloc();

    if (!m_frame||!m_dstFrame)
    {
        std::cout <<"could not create audio frame"<< std::endl;
    }
}

format->extra和format->extraSize就是之前live555接收數據時，根據MediaSubsession參數獲取的有用信息。

二.解碼

bool QHAudioDecoder::doDecode(Block *block,Frame **frame)
{
    bool result=false;

    AVPacket pkt;
    av_init_packet(&pkt);

    pkt.data = block->buffer;
    pkt.size = block->bufferSize;
    pkt.dts = block->dts;
    pkt.pts = block->pts;

    int ret=avcodec_send_packet(m_codecContext, &pkt);
    while(ret==0)
    {
        ret=avcodec_receive_frame(m_codecContext, m_frame);
        if(ret==0)
        {
            if(m_firstFrame)
            {
                int dst_nb_samples=m_audioResample.openResample(m_codecContext, &m_dstAudioParams);
                if(dst_nb_samples>=0)
                {
                    m_dstFrame->format = m_dstAudioParams.sampleFmt;
                    m_dstFrame->channel_layout = m_dstAudioParams.channelLayout;
                    m_dstFrame->sample_rate = m_dstAudioParams.sampleRate;
                    m_dstFrame->nb_samples = (dst_nb_samples == 0) ? MAX_NUMBER_OF_AUDIO_SAMPLES : dst_nb_samples;

                    av_frame_get_buffer(m_dstFrame, 1);
                }

                m_firstFrame=false;
            }

            if(SDL_GetAudioStatus()==SDL_AUDIO_PLAYING)
            {
                AVFrame *tempFrame=m_frame;
                int dst_nb_samples = tempFrame->nb_samples;

                int ret = m_audioResample.doResample(tempFrame, m_dstFrame);
                if(ret > 0)
                {
                    tempFrame = m_dstFrame;
                    dst_nb_samples	= ret;
                }

                int bytePerSample = av_get_bytes_per_sample((AVSampleFormat)tempFrame->format);
                int dataSize = dst_nb_samples * bytePerSample * tempFrame->channels;

                (*frame)->buffer=tempFrame->data[0];
                (*frame)->bufferSize=dataSize;
                (*frame)->sampleRate=m_dstAudioParams.sampleRate;
                (*frame)->channels=m_dstAudioParams.channels;
                (*frame)->pts=m_frame->pts;
                (*frame)->dts=m_frame->pkt_dts;

                result=true;
            }
        }
    }

    av_packet_unref(&pkt);

    return result;
}

解碼沒什麼特別的，FFmpeg的demo和網上有大量代碼可以參考，但是需要注意的是，解碼後需要進行重採樣doResample。因爲解碼出的AVSampleFormat是AV_SAMPLE_FMT_FLTP（float, planar），該格式無法直接使用SDL進行播放，需要轉換成SDL支持的AV_SAMPLE_FMT_S16（signed 16 bits）格式。

三.重採樣

音頻重採樣FFmpeg源碼中也有demo：ffmpeg\doc\examples\resampling_audio.c，這裏還是貼一下代碼吧。

int QHAudioResample::openResample(AVCodecContext *codecContext, AudioResampleParams *dstParams)
{
    if(!codecContext || !dstParams)
    {
        return -1;
    }

    if(!(codecContext->sample_fmt != dstParams->sampleFmt ||
            codecContext->sample_rate != dstParams->sampleRate ||
            codecContext->channel_layout != dstParams->channelLayout))
    {
        return -1;
    }

    AudioResampleParams srcParams;
    memset(&srcParams,0,sizeof(AudioResampleParams));

    if (codecContext->channel_layout && (codecContext->channels == av_get_channel_layout_nb_channels(codecContext->channel_layout)))
    {
        srcParams.channelLayout = codecContext->channel_layout;
    }
    else
    {
        uint64_t default_channel_layout = av_get_default_channel_layout(codecContext->channels);
        srcParams.channelLayout = default_channel_layout;
    }

    srcParams.sampleFmt = codecContext->sample_fmt;
    srcParams.sampleRate = codecContext->sample_rate;
    srcParams.channels = codecContext->channels;

    memcpy(&m_srcParams, &srcParams, sizeof(AudioResampleParams));
    memcpy(&m_dstParams, dstParams, sizeof(AudioResampleParams));

    m_swrContext = swr_alloc_set_opts(nullptr, m_dstParams.channelLayout, m_dstParams.sampleFmt, m_dstParams.sampleRate,
        m_srcParams.channelLayout, m_srcParams.sampleFmt, m_srcParams.sampleRate, 0, nullptr);

	if(!m_swrContext)
	{
        return -1;
	}

	/* set options */
    av_opt_set_int       (m_swrContext, "in_channel_layout",  srcParams.channelLayout, 0);
    av_opt_set_int       (m_swrContext, "in_sample_rate",     srcParams.sampleRate,    0);
    av_opt_set_sample_fmt(m_swrContext, "in_sample_fmt",      srcParams.sampleFmt,     0);

    av_opt_set_int       (m_swrContext, "in_channel_layout",  dstParams->channelLayout, 0);
    av_opt_set_int       (m_swrContext, "out_sample_rate",    dstParams->sampleRate,    0);
    av_opt_set_sample_fmt(m_swrContext, "out_sample_fmt",     dstParams->sampleFmt,	    0);	//AV_SAMPLE_FMT_S16

    /* initialize the resampling context */
    if (swr_init(m_swrContext) < 0)
    {
        swr_free(&m_swrContext);
        m_swrContext=nullptr;
        return -1;
    }

    int src_nb_samples = codecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE ? 10000 : codecContext->frame_size;
    int dst_nb_samples = av_rescale_rnd(src_nb_samples, dstParams->sampleRate, srcParams.sampleRate, AV_ROUND_UP);

    return dst_nb_samples;
}

int QHAudioResample::doResample(AVFrame *srcFrame, AVFrame *dstFrame)
{
    if(!m_swrContext)
    {
        return 0;
    }
    int result = swr_get_delay(m_swrContext, m_srcParams.sampleRate) + srcFrame->nb_samples;
    int dst_nb_samples = av_rescale_rnd(result, m_dstParams.sampleRate, m_srcParams.sampleRate, AV_ROUND_UP);

    result = swr_convert(m_swrContext, dstFrame->extended_data, dst_nb_samples, (const uint8_t **)srcFrame->data, srcFrame->nb_samples);
    return result;
}

原創不易，轉載請標明出處：https://blog.csdn.net/caoshangpa/article/details/112151865

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

基於live555的rtsp播放器：音頻解碼

一.初始化解碼器

二.解碼

三.重採樣

讀論文：ISIA Food-500 一個大型食品識別數據集通過堆疊的全球本地網絡

[系統安全] 七.逆向分析之PE病毒原理、C++實現文件加解密及OllyDbg逆向

基於live555的rtsp播放器：音頻解碼

python練習- for in range()

OpenFaaS-無服務器應用

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結