iOS 音頻學習邊錄音邊轉碼AVCaptureSession

這種方法用來錄音相對比較少, 主要是用來錄視頻的多一些, 不過也是一種方法, 所以就說一下,主要的難點也是在AudioBooxTool這個框架, 用起來不是很熟。

1.錄製聲音

利用AVCaptureSession來錄音, 這部分是屬於AVFounation的內容, 相對比較基礎, 所以直接看代碼就好

    //建立會話者
    AVCaptureSession *captureSession = [[AVCaptureSession alloc] init];
    self.captureSession = captureSession;
    //連接輸入設備
    AVCaptureDevice *audioDevice = [AVCaptureDevice devicesWithMediaType:AVMediaTypeAudio].lastObject;
    AVCaptureDeviceInput *captureAudioInput = [AVCaptureDeviceInput deviceInputWithDevice:audioDevice error:nil];
    if ([captureSession canAddInput:captureAudioInput]) {
        [captureSession addInput:captureAudioInput];
    }
    //連接輸出設備
    AVCaptureAudioDataOutput *captureAudioOutput = [[AVCaptureAudioDataOutput alloc] init];
    self.captureAudioOutput = captureAudioOutput;
    if ([captureSession canAddOutput:captureAudioOutput]) {
        [captureSession addOutput:captureAudioOutput];
    }
    dispatch_queue_t captureAudioOutputQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
    [captureAudioOutput setSampleBufferDelegate:self queue:captureAudioOutputQueue];
    //文件存儲位置
    NSString *audioFilePath = [NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES).lastObject stringByAppendingPathComponent:@"abc.aac"];
    self.audioFilePath = audioFilePath;
    [[NSFileManager defaultManager] removeItemAtPath:audioFilePath error:nil];
    [[NSFileManager defaultManager] createFileAtPath:audioFilePath contents:nil attributes:nil];
    NSFileHandle *audioFileHandle = [NSFileHandle fileHandleForWritingAtPath:audioFilePath];
    self.audioFileHandle = audioFileHandle;
    [captureSession startRunning];

然後根據代理獲取錄製聲音的原始數據回調, 然後根據自己自定義的編碼器對原始數據進行編碼, 我這裏給出的是AAC數據格式的編碼樣例

#pragma mark - AVCaptureAudioDataOutputSampleBufferDelegate
-(void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection {
    if (captureOutput == self.captureAudioOutput) {
        NSLog(@"%@--%@", sampleBuffer, [NSDate date]);
        [self.aacEncoder encodeSampleBuffer:sampleBuffer completionBlock:^(NSData *encodedData, NSError *error) {
            [self.audioFileHandle writeData:encodedData];
        }];
    }

}

2.轉碼器

外部屬性和API:


@property (nonatomic) dispatch_queue_t encoderQueue;//轉碼隊列
@property (nonatomic) dispatch_queue_t callbackQueue;//轉碼回調隊列



//把PCM數據傳過來, 編碼完成後回調出去
- (void) encodeSampleBuffer:(CMSampleBufferRef)sampleBuffer completionBlock:(void (^)(NSData *encodedData, NSError* error))completionBlock;


內部屬性:

@property (nonatomic) AudioConverterRef audioConverter;//格式裝換器
@property (nonatomic) uint8_t *aacBuffer;//編碼後AAC的緩存數據
@property (nonatomic) NSUInteger aacBufferSize;//編碼後AAC的緩存數據大小
@property (nonatomic) char *pcmBuffer;//編碼前PCM的緩存數據
@property (nonatomic) size_t pcmBufferSize;//編碼前PCM的緩存數據大小


初始化:

- (id) init {
    if (self = [super init]) {
        _encoderQueue = dispatch_queue_create("AAC Encoder Queue", DISPATCH_QUEUE_SERIAL);
        _callbackQueue = dispatch_queue_create("AAC Encoder Callback Queue", DISPATCH_QUEUE_SERIAL);
        _audioConverter = NULL;
        _pcmBufferSize = 0;
        _pcmBuffer = NULL;
        _aacBufferSize = 1024;
        _aacBuffer = malloc(_aacBufferSize * sizeof(uint8_t));
        memset(_aacBuffer, 0, _aacBufferSize);
    }
    return self;
}

開始編碼:

- (void) encodeSampleBuffer:(CMSampleBufferRef)sampleBuffer completionBlock:(void (^)(NSData * encodedData, NSError* error))completionBlock {
    CFRetain(sampleBuffer);
    dispatch_async(_encoderQueue, ^{
        if (!_audioConverter) {
            [self setupEncoderFromSampleBuffer:sampleBuffer];
        }
        CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
        CFRetain(blockBuffer);
        OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, &_pcmBufferSize, &_pcmBuffer);
        NSError *error = nil;
        if (status != kCMBlockBufferNoErr) {
            error = [NSError errorWithDomain:NSOSStatusErrorDomain code:status userInfo:nil];
        }
        memset(_aacBuffer, 0, _aacBufferSize);
        
        AudioBufferList outAudioBufferList = {0};
        outAudioBufferList.mNumberBuffers = 1;
        outAudioBufferList.mBuffers[0].mNumberChannels = 1;
        outAudioBufferList.mBuffers[0].mDataByteSize = (int)_aacBufferSize;
        outAudioBufferList.mBuffers[0].mData = _aacBuffer;
        AudioStreamPacketDescription *outPacketDescription = NULL;
        UInt32 ioOutputDataPacketSize = 1;
        // Converts data supplied by an input callback function, supporting non-interleaved and packetized formats.
        // Produces a buffer list of output data from an AudioConverter. The supplied input callback function is called whenever necessary.
        status = AudioConverterFillComplexBuffer(_audioConverter, inInputDataProc, (__bridge void *)(self), &ioOutputDataPacketSize, &outAudioBufferList, outPacketDescription);
        NSData *data = nil;
        if (status == 0) {
            NSData *rawAAC = [NSData dataWithBytes:outAudioBufferList.mBuffers[0].mData length:outAudioBufferList.mBuffers[0].mDataByteSize];
            NSData *adtsHeader = [self adtsDataForPacketLength:rawAAC.length];
            NSMutableData *fullData = [NSMutableData dataWithData:adtsHeader];
            [fullData appendData:rawAAC];
            data = fullData;
        } else {
            error = [NSError errorWithDomain:NSOSStatusErrorDomain code:status userInfo:nil];
        }
        if (completionBlock) {
            dispatch_async(_callbackQueue, ^{
                completionBlock(data, error);
            });
        }
        CFRelease(sampleBuffer);
        CFRelease(blockBuffer);
    });
}

輸出參數的配置和根據輸出參數創建轉換器:

/**
 *  設置編碼參數
 *
 *  @param sampleBuffer 音頻
 */
- (void) setupEncoderFromSampleBuffer:(CMSampleBufferRef)sampleBuffer {
    AudioStreamBasicDescription inAudioStreamBasicDescription = *CMAudioFormatDescriptionGetStreamBasicDescription((CMAudioFormatDescriptionRef)CMSampleBufferGetFormatDescription(sampleBuffer));
   
    
    AudioStreamBasicDescription outAudioStreamBasicDescription = {0}; // 初始化輸出流的結構體描述爲0. 很重要。
    outAudioStreamBasicDescription.mSampleRate = inAudioStreamBasicDescription.mSampleRate; // 音頻流,在正常播放情況下的幀率。如果是壓縮的格式,這個屬性表示解壓縮後的幀率。幀率不能爲0。
    outAudioStreamBasicDescription.mFormatID = kAudioFormatMPEG4AAC; // 設置編碼格式
    outAudioStreamBasicDescription.mFormatFlags = kMPEG4Object_AAC_LC; // 無損編碼 ,0表示沒有
    outAudioStreamBasicDescription.mBytesPerPacket = 0; // 每一個packet的音頻數據大小。如果的動態大小,設置爲0。動態大小的格式,需要用AudioStreamPacketDescription 來確定每個packet的大小。
    outAudioStreamBasicDescription.mFramesPerPacket = 1024; // 每個packet的幀數。如果是未壓縮的音頻數據,值是1。動態碼率格式,這個值是一個較大的固定數字,比如說AAC的1024。如果是動態大小幀數(比如Ogg格式)設置爲0。
    outAudioStreamBasicDescription.mBytesPerFrame = 0; //  每幀的大小。每一幀的起始點到下一幀的起始點。如果是壓縮格式,設置爲0 。
    outAudioStreamBasicDescription.mChannelsPerFrame = 1; // 聲道數
    outAudioStreamBasicDescription.mBitsPerChannel = 0; // 壓縮格式設置爲0
    outAudioStreamBasicDescription.mReserved = 0; // 8字節對齊,填0.
    AudioClassDescription *description = [self
                                          getAudioClassDescriptionWithType:kAudioFormatMPEG4AAC
                                          fromManufacturer:kAppleSoftwareAudioCodecManufacturer]; //軟編
    
    OSStatus status = AudioConverterNewSpecific(&inAudioStreamBasicDescription, &outAudioStreamBasicDescription, 1, description, &_audioConverter); // 創建轉換器
  
    if (status != 0) {
        NSLog(@"setup converter: %d", (int)status);
    }
}


/**
 *  獲取編解碼器
 *
 *  @param type         編碼格式
 *  @param manufacturer 軟/硬編
 *
 編解碼器(codec)指的是一個能夠對一個信號或者一個數據流進行變換的設備或者程序。這裏指的變換既包括將 信號或者數據流進行編碼(通常是爲了傳輸、存儲或者加密)或者提取得到一個編碼流的操作,也包括爲了觀察或者處理從這個編碼流中恢復適合觀察或操作的形式的操作。編解碼器經常用在視頻會議和流媒體等應用中。
 *  @return 指定編碼器
 */
- (AudioClassDescription *)getAudioClassDescriptionWithType:(UInt32)type
                                           fromManufacturer:(UInt32)manufacturer
{
    static AudioClassDescription desc;
    
    UInt32 encoderSpecifier = type;
    OSStatus st;
    
    UInt32 size;
    st = AudioFormatGetPropertyInfo(kAudioFormatProperty_Encoders,
                                    sizeof(encoderSpecifier),
                                    &encoderSpecifier,
                                    &size);
  
    if (st) {
        NSLog(@"error getting audio format propery info: %d", (int)(st));
        return nil;
    }
    
    unsigned int count = size / sizeof(AudioClassDescription);
    AudioClassDescription descriptions[count];
    st = AudioFormatGetProperty(kAudioFormatProperty_Encoders,
                                sizeof(encoderSpecifier),
                                &encoderSpecifier,
                                &size,
                                descriptions);
    if (st) {
        NSLog(@"error getting audio format propery: %d", (int)(st));
        return nil;
    }
    
    for (unsigned int i = 0; i < count; i++) {
        if ((type == descriptions[i].mSubType) &&
            (manufacturer == descriptions[i].mManufacturer)) {
            memcpy(&desc, &(descriptions[i]), sizeof(desc));
            return &desc;
        }
    }
    
    return nil;
}

轉換的回調函數:

/**
 *  A callback function that supplies audio data to convert. This callback is invoked repeatedly as the converter is ready for new input data.
 
 */
OSStatus inInputDataProc(AudioConverterRef inAudioConverter, UInt32 *ioNumberDataPackets, AudioBufferList *ioData, AudioStreamPacketDescription **outDataPacketDescription, void *inUserData)
{
    AACEncoder *encoder = (__bridge AACEncoder *)(inUserData);
    UInt32 requestedPackets = *ioNumberDataPackets;
    
    size_t copiedSamples = [encoder copyPCMSamplesIntoBuffer:ioData];
    if (copiedSamples < requestedPackets) {
        //PCM 緩衝區還沒滿
        *ioNumberDataPackets = 0;
        return -1;
    }
    *ioNumberDataPackets = 1;
    
    return noErr;
}

/**
 *  填充PCM到緩衝區
 */
- (size_t) copyPCMSamplesIntoBuffer:(AudioBufferList*)ioData {
    size_t originalBufferSize = _pcmBufferSize;
    if (!originalBufferSize) {
        return 0;
    }
    ioData->mBuffers[0].mData = _pcmBuffer;
    ioData->mBuffers[0].mDataByteSize = (int)_pcmBufferSize;
    _pcmBuffer = NULL;
    _pcmBufferSize = 0;
    return originalBufferSize;
}

裝換後的要拼上AAC的格式頭:

/**
 *  Add ADTS header at the beginning of each and every AAC packet.
 *  This is needed as MediaCodec encoder generates a packet of raw
 *  AAC data.
 *
 *  Note the packetLen must count in the ADTS header itself.
 *  See: http://wiki.multimedia.cx/index.php?title=ADTS
 *  Also: http://wiki.multimedia.cx/index.php?title=MPEG-4_Audio#Channel_Configurations
 **/
- (NSData*) adtsDataForPacketLength:(NSUInteger)packetLength {
    int adtsLength = 7;
    char *packet = malloc(sizeof(char) * adtsLength);
    // Variables Recycled by addADTStoPacket
    int profile = 2;  //AAC LC
    //39=MediaCodecInfo.CodecProfileLevel.AACObjectELD;
    int freqIdx = 4;  //44.1KHz
    int chanCfg = 1;  //MPEG-4 Audio Channel Configuration. 1 Channel front-center
    NSUInteger fullLength = adtsLength + packetLength;
    // fill in ADTS data
    packet[0] = (char)0xFF; // 11111111     = syncword
    packet[1] = (char)0xF9; // 1111 1 00 1  = syncword MPEG-2 Layer CRC
    packet[2] = (char)(((profile-1)<<6) + (freqIdx<<2) +(chanCfg>>2));
    packet[3] = (char)(((chanCfg&3)<<6) + (fullLength>>11));
    packet[4] = (char)((fullLength&0x7FF) >> 3);
    packet[5] = (char)(((fullLength&7)<<5) + 0x1F);
    packet[6] = (char)0xFC;
    NSData *data = [NSData dataWithBytesNoCopy:packet length:adtsLength freeWhenDone:YES];
    return data;
}

注意點:

錄音的數據是可以根據幀爲單位進行切割來播放的。

  1. 未壓縮的PCM數據, 看你是封裝成什麼文件格式(而不是數據格式), 根據文件格式拼上格式頭就可以播放了。
    例如: pcm數據格式的wav文件的格式頭:
//wav的格式頭, 純語音文件拼上格式頭可播放
NSData* WriteWavFileHeader(long totalAudioLen, long totalDataLen, long longSampleRate,int channels, long byteRate)
{
    Byte  header[44];
    //4byte,資源交換文件標誌:RIFF
    header[0] = 'R';  // RIFF/WAVE header
    header[1] = 'I';
    header[2] = 'F';
    header[3] = 'F';
    //4byte,從下個地址到文件結尾的總字節數
    header[4] = (Byte) (totalDataLen & 0xff);  //file-size (equals file-size - 8)
    header[5] = (Byte) ((totalDataLen >> 8) & 0xff);
    header[6] = (Byte) ((totalDataLen >> 16) & 0xff);
    header[7] = (Byte) ((totalDataLen >> 24) & 0xff);
    //4byte,wav文件標誌:WAVE
    header[8] = 'W';  // Mark it as type "WAVE"
    header[9] = 'A';
    header[10] = 'V';
    header[11] = 'E';
    //4byte,波形文件標誌:FMT(最後一位空格符)
    header[12] = 'f';  // Mark the format section 'fmt ' chunk
    header[13] = 'm';
    header[14] = 't';
    header[15] = ' ';
    //4byte,音頻屬性
    header[16] = 16;   // 4 bytes: size of 'fmt ' chunk, Length of format data.  Always 16
    header[17] = 0;
    header[18] = 0;
    header[19] = 0;
    //2byte,格式種類(1-線性pcm-WAVE_FORMAT_PCM,WAVEFORMAT_ADPCM)
    header[20] = 1;  // format = 1 ,Wave type PCM
    header[21] = 0;
    //2byte,通道數
    header[22] = (Byte) channels;  // channels
    header[23] = 0;
    //4byte,採樣率
    header[24] = (Byte) (longSampleRate & 0xff);
    header[25] = (Byte) ((longSampleRate >> 8) & 0xff);
    header[26] = (Byte) ((longSampleRate >> 16) & 0xff);
    header[27] = (Byte) ((longSampleRate >> 24) & 0xff);
    //4byte 傳輸速率,Byte率=採樣頻率*音頻通道數*每次採樣得到的樣本位數/8,00005622H,也就是22050Byte/s=11025*1*16/8。
    header[28] = (Byte) (byteRate & 0xff);
    header[29] = (Byte) ((byteRate >> 8) & 0xff);
    header[30] = (Byte) ((byteRate >> 16) & 0xff);
    header[31] = (Byte) ((byteRate >> 24) & 0xff);
    //2byte   一個採樣多聲道數據塊大小,塊對齊=通道數*每次採樣得到的樣本位數/8,0002H,也就是2=1*16/8
    header[32] = (Byte) (channels * 16 / 8);
    header[33] = 0;
    //2byte,採樣精度-PCM位寬
    header[34] = 16; // bits per sample
    header[35] = 0;
    //4byte,數據標誌:data
    header[36] = 'd'; //"data" marker
    header[37] = 'a';
    header[38] = 't';
    header[39] = 'a';
    //4byte,從下個地址到文件結尾的總字節數,即除了wav header以外的pcm data length(純音頻數據)
    header[40] = (Byte) (totalAudioLen & 0xff);  //data-size (equals file-size - 44).
    header[41] = (Byte) ((totalAudioLen >> 8) & 0xff);
    header[42] = (Byte) ((totalAudioLen >> 16) & 0xff);
    header[43] = (Byte) ((totalAudioLen >> 24) & 0xff);
    
    return [[NSData alloc] initWithBytes:header length:44];;
}

2.壓縮的數據, 壓縮(編碼)後的數據, 一般都是每一幀都有獨立的格式頭的, 所以, 根據幀切割後,直接就可以播放了, 我自己測的時候用AVAudioPlayer是播放不了AAC數據格式文件的, 要用 AudioServicesPlaySystemSound(),或者AudioQueueStart()來播放

最後附上AAC錄音編碼, 和AAC解碼播放的DEMO

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章