缓冲区或缓冲区大小不正确

时间:2020-05-19 09:25:14

标签: c++ objective-c audio fft alsa

我似乎无法理解为什么这种方法不起作用,我试图为给定功能获取声音样本。

我的代码基于使用了Objective-c的函数版本。

但是下面用C ++编写的代码并不意味着通过将浮点缓冲区传递给OXY_DecodeAudioBuffer函数来工作,该函数随后会尝试并在缓冲区中查找数据。

问题:我是否将正确的缓冲区大小和输出从缓冲区传递给函数?我总是在缓冲区中找不到任何数据。谁能看到这个问题?

我正在使用的硬件是带有USB麦克风的Raspberry Pi 2。

我还在说明中包括了该功能:

//OXY_DecodeAudioBuffer function, receives an audiobuffer of specified size and outputs if encoded data is found
  //* Parameters:
  //    audioBuffer: float array of bufferSize size with audio data to be decoded
  //    size: size of audioBuffer
  //    oxyingObject: OXY object instance, created in OXY_Create()  
  //* Returns: -1 if no decoded data is found, -2 if start token is found, -3 if complete word has been decoded, positive number if character is decoded (number is the token idx)

  OXY_DLLEXPORT int32_t OXY_DecodeAudioBuffer(float *audioBuffer, int size, void *oxyingObject);

以下代码中的float_buffer输出:

 1. -0.00354004    -0.00369263    -0.00338745    -0.00354004    -0.00341797    -0.00402832

程序代码:

#include <stdio.h>
#include <stdlib.h>
#include <alsa/asoundlib.h>

#include <unistd.h>
#include <math.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>

#include "Globals.h"
#include "OxyCoreLib_api.h"

void* mCore;
using namespace std;

void GetDecodedMode(){
    std::cerr << "DECODE_MODE ---> " << OXY_GetDecodedMode(mCore) << std::endl << std::endl;
}

int main(void)
{
    int i,j;
    int err;
    int mode = 3;
    int16_t *buffer;
    float* float_buffer;

    // Allocate our own buffers (1 channel, 16 bits per sample, thus 16 bits per frame, thus 2 bytes per frame).
    // Practice learns the buffers used contain 512 frames, if this changes it will be fixed in processAudio.
    int buffer_frames = 512;   //Not sure this correct but reason above

    unsigned int rate = 44100;
    float sampleRate = 44100.f;  //to configure
    snd_pcm_t *capture_handle;
    snd_pcm_hw_params_t *hw_params;
    snd_pcm_format_t format = SND_PCM_FORMAT_S16_LE;

    if ((err = snd_pcm_open(&capture_handle, "hw:1,0", SND_PCM_STREAM_CAPTURE, 0)) < 0) {
        fprintf(stderr, "cannot open audio device %s (%s)\n","device",snd_strerror(err));
        exit(1);
    } else {fprintf(stdout, "audio interface opened\n");}

    if ((err = snd_pcm_hw_params_malloc(&hw_params)) < 0) {
        fprintf(stderr, "cannot allocate hardware parameter structure (%s)\n",
            snd_strerror(err));
        exit(1);
    }   else { fprintf(stdout, "hw_params allocated\n"); }

    if ((err = snd_pcm_hw_params_any(capture_handle, hw_params)) < 0) {
        fprintf(stderr, "cannot initialize hardware parameter structure (%s)\n",
            snd_strerror(err));
        exit(1);
    }   else { fprintf(stdout, "hw_params initialized\n"); }

    if ((err = snd_pcm_hw_params_set_access(capture_handle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED)) < 0) {
        fprintf(stderr, "cannot set access type (%s)\n",
            snd_strerror(err));
        exit(1);
    }   else { fprintf(stdout, "hw_params access set\n"); }

    if ((err = snd_pcm_hw_params_set_format(capture_handle, hw_params, format)) < 0) {
        fprintf(stderr, "cannot set sample format (%s)\n",
            snd_strerror(err));
        exit(1);
    }   else { fprintf(stdout, "hw_params format set\n"); }

    if ((err = snd_pcm_hw_params_set_rate_near(capture_handle, hw_params, &rate, 0)) < 0) {
        fprintf(stderr, "cannot set sample rate (%s)\n",
            snd_strerror(err));
        exit(1);
    }   else { fprintf(stdout, "hw_params rate set\n"); }

    if ((err = snd_pcm_hw_params_set_channels(capture_handle, hw_params, 1)) < 0) {
        fprintf(stderr, "cannot set channel count (%s)\n",
            snd_strerror(err));
        exit(1);
    }   else { fprintf(stdout, "hw_params channels set\n"); }

    if ((err = snd_pcm_hw_params(capture_handle, hw_params)) < 0) {
        fprintf(stderr, "cannot set parameters (%s)\n",
            snd_strerror(err));
        exit(1);
    }   else { fprintf(stdout, "hw_params set\n"); }

    snd_pcm_hw_params_free(hw_params);

    fprintf(stdout, "hw_params freed\n");

    if ((err = snd_pcm_prepare(capture_handle)) < 0) {
        fprintf(stderr, "cannot prepare audio interface for use (%s)\n",
            snd_strerror(err));
        exit(1);
    }   else { fprintf(stdout, "audio interface prepared\n"); }

    //allocate buffer of 16bit ints, as specified in PCM_FORMAT

    //initialise
    mCore = OXY_Create();
    //Configure - Mode 3 inaudible, 44100, bufferSize
    OXY_Configure(mode, sampleRate, buffer_frames, mCore);

    //Debug to make sure
    GetDecodedMode();

    buffer = static_cast<int16_t*>(malloc(buffer_frames * snd_pcm_format_width(format) / 8 * 2));
    //buffer = malloc(buffer_frames * snd_pcm_format_width(format) / 8 * 2);

    float_buffer = static_cast<float*>(malloc(buffer_frames*sizeof(float)));
    //float_buffer = malloc(buffer_frames*sizeof(float));

    fprintf(stdout, "buffer allocated\n");

    //where did 10000 come from doubt its correct
    for (i = 0; i < 10000; ++i) {
        //read from audio device into buffer
        if ((err = snd_pcm_readi(capture_handle, buffer, buffer_frames)) != buffer_frames) {
            fprintf(stderr, "read from audio interface failed (%s)\n",
                err, snd_strerror(err));
            exit(1);
        }
        //try to change buffer from short ints to floats for transformation
        for (i = 0; i < buffer_frames; i++){
            //norm
            float_buffer[i] = (float)buffer[i]/32768.0;

            //Example output of float_buffer
            /*
             -0.00354004
             -0.00369263
             -0.00338745
             -0.00354004
             -0.00341797
             -0.00402832
             -0.00341797
             -0.00427246
             -0.00375366
             -0.00378418
             -0.00408936
             -0.00332642
             -0.00369263
             -0.00350952
             -0.00369263
             -0.00369263
             -0.00344849
             -0.00354004
             */
        }

        //send to float_to be tested
        int ret = OXY_DecodeAudioBuffer(float_buffer, buffer_frames, mCore);

        if (ret == -2)
        {
        std::cerr << "FOUND_TOKEN ---> -2 " << std::endl << std::endl;
        }
            else if(ret>=0)
        {

        std::cerr << "Decode started ---> -2 " << ret << std::endl << std::endl;
        }
        else if (ret == -3)
        {
        //int sizeStringDecoded = OXY_GetDecodedData(mStringDecoded, mCore);

         std::cerr << "STRING DECODED ---> -2 " << std::endl << std::endl;
         // ...
        }
        else
        {
          std::cerr << "No data found in this buffer" << std::endl << std::endl;
            //no data found in this buffer
        }
    }
    free(buffer);
    snd_pcm_close(capture_handle);

    std::cerr << "memory freed\n" << std::endl << std::endl;

    //snd_pcm_close(capture_handle);
    return(0);
    //exit(0);
}

使用相同API的工作Objective-C版本:

//
//  IosAudioController.m
//

#import "IosAudioController.h"
#import <AudioToolbox/AudioToolbox.h>

#import "OxyCoreLib_api.h"


#define kOutputBus 0
#define kInputBus 1

IosAudioController* iosAudio;

void checkStatus(int status){
    if (status) {
        printf("Status not 0! %d\n", status);
        exit(1);
    }
}

static OSStatus recordingCallback(void *inRefCon, 
                                  AudioUnitRenderActionFlags *ioActionFlags, 
                                  const AudioTimeStamp *inTimeStamp, 
                                  UInt32 inBusNumber, 
                                  UInt32 inNumberFrames, 
                                  AudioBufferList *ioData) {

  if (iosAudio->mOxyObject->mDecoding == 0)
    return noErr;

    // Because of the way our audio format (setup below) is chosen:
    // we only need 1 buffer, since it is mono
    // Samples are 16 bits = 2 bytes.
    // 1 frame includes only 1 sample

    AudioBuffer buffer;

    buffer.mNumberChannels = 1;
    buffer.mDataByteSize = inNumberFrames * 2;
    buffer.mData = malloc( inNumberFrames * 2 );

    // Put buffer in a AudioBufferList
    AudioBufferList bufferList;
    bufferList.mNumberBuffers = 1;
    bufferList.mBuffers[0] = buffer;

  // Then:
  // Obtain recorded samples

  OSStatus status;

  status = AudioUnitRender([iosAudio audioUnit],
                           ioActionFlags,
                           inTimeStamp,
                           inBusNumber,
                           inNumberFrames,
                           &bufferList);
    checkStatus(status);

    // Now, we have the samples we just read sitting in buffers in bufferList
    // Process the new data
    [iosAudio processAudio:&bufferList];

    //Now Decode Audio *******************

    //convert from AudioBuffer format to *float buffer
    iosAudio->floatBuffer = (float *)malloc(inNumberFrames * sizeof(float));

    //UInt16 *frameBuffer = bufferList.mBuffers[0].mData;
    SInt16 *frameBuffer = bufferList.mBuffers[0].mData;
    for(int j=0;j<inNumberFrames;j++)
    {
        iosAudio->floatBuffer[j] = frameBuffer[j]/32768.0;
    }

    int ret = OXY_DecodeAudioBuffer(iosAudio->floatBuffer, inNumberFrames, (void*)iosAudio->mOxyObject->mOxyCore);

    if (ret == -2)
    {
    //  NSLog(@"BEGIN TOKEN FOUND!");

      [iosAudio->mObject performSelector:iosAudio->mSelector withObject:[NSNumber numberWithInt:0]];
    }
    else if (ret >= 0)
    {
      NSLog(@"Decode started %@",@(ret).stringValue);
    }
    else if (ret == -3)
    {
      int sizeStringDecoded = OXY_GetDecodedData(iosAudio->mStringDecoded, (void*)iosAudio->mOxyObject->mOxyCore);

      NSString *tmpString = [NSString stringWithUTF8String:iosAudio->mStringDecoded];

      iosAudio->mOxyObject->mDecodedString = [NSString stringWithUTF8String:iosAudio->mStringDecoded];

      if (sizeStringDecoded > 0)
      {
        iosAudio->mOxyObject->mDecodedOK = 1;
        NSLog(@"Decoded OK! %@ ", tmpString);
        [iosAudio->mObject performSelector:iosAudio->mSelector withObject:[NSNumber numberWithInt:1]];
      }
      else
      {
        iosAudio->mOxyObject->mDecodedOK = -1;
        NSLog(@"END DECODING BAD! %@ ", tmpString);
        [iosAudio->mObject performSelector:iosAudio->mSelector withObject:[NSNumber numberWithInt:2]];
      }
    }
    else
    {
        //no data found in this buffer
    }

    // release the malloc'ed data in the buffer we created earlier
    free(bufferList.mBuffers[0].mData);
    free(iosAudio->floatBuffer);

    return noErr;
}

static OSStatus playbackCallback(void *inRefCon, 
                                 AudioUnitRenderActionFlags *ioActionFlags, 
                                 const AudioTimeStamp *inTimeStamp, 
                                 UInt32 inBusNumber, 
                                 UInt32 inNumberFrames, 
                                 AudioBufferList *ioData) {    
    // Notes: ioData contains buffers (may be more than one!)
    // Fill them up as much as you can. Remember to set the size value in each buffer to match how
    // much data is in the buffer.

    for (int i=0; i < ioData->mNumberBuffers; i++)
    { // in practice we will only ever have 1 buffer, since audio format is mono
        AudioBuffer buffer = ioData->mBuffers[i];

//      NSLog(@"  Buffer %d has %d channels and wants %d bytes of data.", i, buffer.mNumberChannels, buffer.mDataByteSize);

        // copy temporary buffer data to output buffer
        UInt32 size = min(buffer.mDataByteSize, [iosAudio tempBuffer].mDataByteSize); // dont copy more data than we have, or than fits
        memcpy(buffer.mData, [iosAudio tempBuffer].mData, size);
        buffer.mDataByteSize = size; // indicate how much data we wrote in the buffer

        // uncomment to hear random noise
        /*UInt16 *frameBuffer = buffer.mData;
        for (int j = 0; j < inNumberFrames; j++)
          frameBuffer[j] = rand();*/

        // Play encoded buffer
        if (iosAudio->mOxyObject->mEncoding > 0)
        {
            int sizeSamplesRead;
            float audioBuffer[2048];
            sizeSamplesRead = OXY_GetEncodedAudioBuffer(audioBuffer, (void*)iosAudio->mOxyObject->mOxyCore);
            if (sizeSamplesRead == 0)
                iosAudio->mOxyObject->mEncoding = 0;

            SInt16 *frameBuffer = buffer.mData;
            for(int j=0;j<sizeSamplesRead;j++)
            {
                frameBuffer[j] = audioBuffer[j]*32768.0;
            }
        }
        else
        {
            SInt16 *frameBuffer = buffer.mData;
            for (int j = 0; j < inNumberFrames; j++)
                frameBuffer[j] = 0;
        }    
    }

    return noErr;
}

@implementation IosAudioController

@synthesize audioUnit, tempBuffer;

- (id) init {
    self = [super init];

    OSStatus status;

    // Describe audio component
    AudioComponentDescription desc;
    desc.componentType = kAudioUnitType_Output;
    desc.componentSubType = kAudioUnitSubType_RemoteIO;
    desc.componentFlags = 0;
    desc.componentFlagsMask = 0;
    desc.componentManufacturer = kAudioUnitManufacturer_Apple;

    // Get component
    AudioComponent inputComponent = AudioComponentFindNext(NULL, &desc);

    // Get audio units
    status = AudioComponentInstanceNew(inputComponent, &audioUnit);
    checkStatus(status);

    // Enable IO for recording
    UInt32 flag = 1;
    status = AudioUnitSetProperty(audioUnit, 
                                  kAudioOutputUnitProperty_EnableIO, 
                                  kAudioUnitScope_Input, 
                                  kInputBus,
                                  &flag, 
                                  sizeof(flag));
    checkStatus(status);

    // Enable IO for playback
    status = AudioUnitSetProperty(audioUnit, 
                                  kAudioOutputUnitProperty_EnableIO, 
                                  kAudioUnitScope_Output, 
                                  kOutputBus,
                                  &flag, 
                                  sizeof(flag));
    checkStatus(status);

    // Describe format
    AudioStreamBasicDescription audioFormat;
    audioFormat.mSampleRate         = 44100.0;
    audioFormat.mFormatID           = kAudioFormatLinearPCM;
    audioFormat.mFormatFlags        = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
    audioFormat.mFramesPerPacket    = 1;
    audioFormat.mChannelsPerFrame   = 1;
    audioFormat.mBitsPerChannel     = 16;
    audioFormat.mBytesPerPacket     = 2;
    audioFormat.mBytesPerFrame      = 2;

    // Apply format
    status = AudioUnitSetProperty(audioUnit, 
                                  kAudioUnitProperty_StreamFormat, 
                                  kAudioUnitScope_Output, 
                                  kInputBus, 
                                  &audioFormat, 
                                  sizeof(audioFormat));
    checkStatus(status);
    status = AudioUnitSetProperty(audioUnit, 
                                  kAudioUnitProperty_StreamFormat, 
                                  kAudioUnitScope_Input, 
                                  kOutputBus, 
                                  &audioFormat, 
                                  sizeof(audioFormat));
    checkStatus(status);

    // Set input callback
    AURenderCallbackStruct callbackStruct;
    callbackStruct.inputProc = recordingCallback;
    callbackStruct.inputProcRefCon = (__bridge void *)self;
    status = AudioUnitSetProperty(audioUnit, 
                                  kAudioOutputUnitProperty_SetInputCallback, 
                                  kAudioUnitScope_Global, 
                                  kInputBus, 
                                  &callbackStruct, 
                                  sizeof(callbackStruct));
    checkStatus(status);

    // Set output callback
    callbackStruct.inputProc = playbackCallback;
    callbackStruct.inputProcRefCon = (__bridge void *)self;
    status = AudioUnitSetProperty(audioUnit, 
                                  kAudioUnitProperty_SetRenderCallback, 
                                  kAudioUnitScope_Global, 
                                  kOutputBus,
                                  &callbackStruct, 
                                  sizeof(callbackStruct));
    checkStatus(status);

    // Disable buffer allocation for the recorder (optional - do this if we want to pass in our own)
    flag = 0;
    status = AudioUnitSetProperty(audioUnit, 
                                  kAudioUnitProperty_ShouldAllocateBuffer,
                                  kAudioUnitScope_Output, 
                                  kInputBus,
                                  &flag, 
                                  sizeof(flag));

    // Allocate our own buffers (1 channel, 16 bits per sample, thus 16 bits per frame, thus 2 bytes per frame).
    // Practice learns the buffers used contain 512 frames, if this changes it will be fixed in processAudio.
    tempBuffer.mNumberChannels = 1;
  int size = 512;
#if (TARGET_OS_SIMULATOR)
  size = 256; //TODO check this value!! depends on play/record callback buffer size
#else
  size = 512; //TODO check this value!! depends on play/record callback buffer size
#endif

  tempBuffer.mDataByteSize = size * 2;
    tempBuffer.mData = malloc( size * 2);

    // Initialise
    status = AudioUnitInitialize(audioUnit);
    checkStatus(status);

    return self;
}

- (void) start {
    OSStatus status = AudioOutputUnitStart(audioUnit);
    checkStatus(status);
}

- (void) stop {
    OSStatus status = AudioOutputUnitStop(audioUnit);
    checkStatus(status);
}

- (void) processAudio: (AudioBufferList*) bufferList{
    AudioBuffer sourceBuffer = bufferList->mBuffers[0];

    // fix tempBuffer size if it's the wrong size
    if (tempBuffer.mDataByteSize != sourceBuffer.mDataByteSize) {
        free(tempBuffer.mData);
        tempBuffer.mDataByteSize = sourceBuffer.mDataByteSize;
        tempBuffer.mData = malloc(sourceBuffer.mDataByteSize);
    }

    // copy incoming audio data to temporary buffer
    memcpy(tempBuffer.mData, bufferList->mBuffers[0].mData, bufferList->mBuffers[0].mDataByteSize);
}

- (void) dealloc {

    AudioUnitUninitialize(audioUnit);
    free(tempBuffer.mData);
}

- (void) setOxyObject: (OxyCore*) oxyObject
{
    mOxyObject = oxyObject;
}


- (void) setListenCallback:(id)object withSelector:(SEL)selector
{
    mObject = object;
    mSelector = selector;
}

@end

1 个答案:

答案 0 :(得分:0)

我可以看到的一个问题是,您正在使用2个嵌套循环和相同的变量进行迭代。第一个循环for (i = 0; i < 10000; ++i)和第二个循环for (i = 0; i < buffer_frames; i++),如果buffer_frames >= 10000 - 1,则第一个循环将执行一次并退出,否则将进入无限循环。

关于以下行,我还有两句话:

buffer = static_cast<int16_t*>(malloc(buffer_frames * snd_pcm_format_width(format) / 8 * 2));

根据API reference snd_pcm_format_width(format)返回每个样本的位数。由于每个样本有16位,并且每个帧仅包含一个样本,因此您应该分配buffer_frames * snd_pcm_format_width(format) / 8字节的内存(乘法运算中的2代表通道数,在您的情况下为1)。另外,我建议将缓冲区类型更改为char*,因为它是唯一不违反strict aliasing rule的类型。因此,该行变为:

static_cast<char*>(malloc(buffer_frames * (snd_pcm_format_width(format) / 8)));

当您完成从短整数转换为浮点数的技巧时,第二个for循环变为:

int16_t* sint_buffer = buffer;
for (j = 0; j < buffer_frames; ++j){
    float_buffer[j] = (float)sint_buffer[j]/32768.0;
    // everything else goes here
}