使用OpenSL ES将音频数据发送到ibm watson语音识别器

时间:2017-08-24 02:09:54

标签: java android c++ speech-recognition opensl

我目前正在使用OpenSL ES从本地设备的麦克风获取音频数据。我使用PipedStream使用websocket将音频数据发送到ibm watson语音识别器,但我没有得到任何结果,我不完全确定原因。我需要帮助才能使用麦克风获得结果。

这是我的MainActivity.java

PipedInputStream pipedInputStream;
PipedOutputStream pipedOutputStream;

MyDataHandler handler;

SpeechToText speechToTextService;
String result;
Boolean isFinal;

AudioFormat audioFormat;

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_main);
    try {
        pipedOutputStream = new PipedOutputStream();
        pipedInputStream = new PipedInputStream(pipedOutputStream);
        handler = new MyDataHandler(pipedOutputStream);

        new Thread(new Runnable() {
            @Override
            public void run() {
                start_process();
            }
        }).start();

        new Thread(new Runnable() {
            @Override
            public void run() {
                while (true) {
                    doCallToNativeFunction(pipedOutputStream);
                    try {
                        Thread.sleep(100);
                    } catch (InterruptedException e) {
                        Log.e("MainActivity", e.getMessage(), e);
                    }
                }
            }
        }).start();

        speechToTextService = new SpeechToText();
        speechToTextService.setUsernameAndPassword("f1361112-2e3e-4787-b180-4df70aee83a1",
                "BCouSNYeaqtc");

        new Thread(new Runnable() {
            @Override
            public void run() {
                RecognizeOptions options = new RecognizeOptions.Builder()
                        .continuous(true)
                        .interimResults(true)
                        .contentType(HttpMediaType.AUDIO_PCM + "; rate=" + 44100)
                        .inactivityTimeout(-1)
                        .wordConfidence(true)
                        .build();
                speechToTextService.recognizeUsingWebSocket(pipedInputStream, options, new BaseRecognizeCallback() {
                    @Override
                    public void onTranscription(SpeechResults speechResults) {
                        try {
                            result = speechResults.getResults().get(0).getAlternatives()
                                    .get(0).getTranscript();
                            isFinal = speechResults.getResults().get(0).isFinal();

                            System.out.println(speechResults);
                        } catch (Exception e) {
                            Log.e("MainActivity", e.getMessage(), e);
                        }
                    }
                });
            }
        }).start();
    } catch (IOException e) {
        Log.e("MainActivity", e.getMessage(), e);
    }
}

public void doCallToNativeFunction(OutputStream outputStream) {
    getAudioDataWithHandler(handler);
}

DataHandler.java

OutputStream dataStream;

public MyDataHandler(OutputStream writeTo) {
    dataStream = writeTo;
}
public void handleData(byte[] incomingData) {
    try {
        dataStream.write(incomingData, 0, incomingData.length);
        dataStream.flush();
    } catch (Exception e) {
        Log.e("MyDataHandler", e.getMessage(), e);
    }
}

MainSystem.cpp

OPENSL_STREAM *p;
int samps, i, j;
float inbuffer[VECSAMPS_MONO], outbuffer[VECSAMPS_STEREO];

static int on;
JNIEXPORT void JNICALL
start_process() {
p = android_OpenAudioDevice(SR, 1, 1, BUFFERFRAMES);
if (p == NULL) return;
}

JNIEXPORT void JNICALL
stop_process() {
on = 0;
}

JNIEXPORT void JNICALL
getAudioDataWithHandler(JNIEnv *env,
                                                                    jclass 
clazz,
                                                                    jobject 
obj) {

if (p != NULL) {
    samps = android_AudioIn(p, inbuffer, VECSAMPS_MONO);

    jbyteArray audioDataCopy = env->NewByteArray(samps);
    env->SetByteArrayRegion(audioDataCopy, 0, samps, (jbyte *) inbuffer);

    jmethodID aMethodId = env->GetMethodID(env->GetObjectClass(obj), "handleData", "([B)V");
    assert(0 != aMethodId);
    env->CallVoidMethod(obj, aMethodId, audioDataCopy);
} else {
    __android_log_print(ANDROID_LOG_ERROR, "s", "NULL");
}
}

OpenSLSystem.cpp

static void *createThreadLock(void);
static void   waitThreadLock(void *lock);
static void  notifyThreadLock(void *lock);
static void  destroyThreadLock(void *lock);
static void  bqRecorderCallback(SLAndroidSimpleBufferQueueItf bq, void 
*context);

static void openSLCreateEngine(OPENSL_STREAM *p) {
SLresult result;

result = slCreateEngine(&(p->engineObject), 0 , NULL, 0, NULL, NULL);
assert(result == SL_RESULT_SUCCESS);

result = (*p->engineObject)->Realize(p->engineObject, SL_BOOLEAN_FALSE);
assert(result == SL_RESULT_SUCCESS);

result = (*p->engineObject)->GetInterface(p->engineObject, SL_IID_ENGINE, &(p->engineEngine));
assert(result == SL_RESULT_SUCCESS);
}

static void openSLRecOpen(OPENSL_STREAM *p) {

SLresult result;
SLuint32 sr = p->sr;
SLuint32 channels = p->inchannels;

if (channels) {
    switch(sr) {
        case 8000:
            sr = SL_SAMPLINGRATE_8;
            break;
        case 11025:
            sr = SL_SAMPLINGRATE_11_025;
            break;
        case 16000:
            sr = SL_SAMPLINGRATE_16;
            break;
        case 22050:
            sr = SL_SAMPLINGRATE_22_05;
            break;
        case 24000:
            sr = SL_SAMPLINGRATE_24;
            break;
        case 32000:
            sr = SL_SAMPLINGRATE_32;
            break;
        case 44100:
            sr = SL_SAMPLINGRATE_44_1;
            break;
        case 64000:
            sr = SL_SAMPLINGRATE_64;
            break;
        case 88200:
            sr = SL_SAMPLINGRATE_88_2;
            break;
        case 96000:
            sr = SL_SAMPLINGRATE_96;
            break;
        case 192000:
            sr = SL_SAMPLINGRATE_192;
            break;
        default:
            return;
    }

    SLDataLocator_IODevice loc_dev = {SL_DATALOCATOR_IODEVICE, SL_IODEVICE_AUDIOINPUT,
                                      SL_DEFAULTDEVICEID_AUDIOINPUT, NULL};
    SLDataSource audioSrc = {&loc_dev, NULL};

    SLuint32 speakers;
    if (channels > 1)
        speakers = SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT;
    else speakers = SL_SPEAKER_FRONT_CENTER;
    SLDataLocator_AndroidSimpleBufferQueue loc_bq = 
{SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, 2};
    SLDataFormat_PCM format_pcm = {SL_DATAFORMAT_PCM, channels, sr,
                                   SL_PCMSAMPLEFORMAT_FIXED_8, 
SL_PCMSAMPLEFORMAT_FIXED_8,
                                   speakers, SL_BYTEORDER_LITTLEENDIAN};
    SLDataSink audioSnk = {&loc_bq, &format_pcm};

    const SLInterfaceID id[1] = {SL_IID_ANDROIDSIMPLEBUFFERQUEUE};
    const SLboolean req[1] = {SL_BOOLEAN_FALSE};
    result = (*p->engineEngine)->CreateAudioRecorder(p->engineEngine, &(p->recorderObject), &audioSrc,
                                                     &audioSnk, 1, id, req);
    assert(result == SL_RESULT_SUCCESS);

    result = (*p->recorderObject)->Realize(p->recorderObject, SL_BOOLEAN_FALSE);
    assert(result == SL_RESULT_SUCCESS);

    result = (*p->recorderObject)->GetInterface(p->recorderObject, SL_IID_RECORD, &(p->recorderRecord));
    assert(result == SL_RESULT_SUCCESS);

    result = (*p->recorderObject)->GetInterface(p->recorderObject, SL_IID_ANDROIDSIMPLEBUFFERQUEUE,
                                               &(p->recorderBufferQueue));
    assert(result == SL_RESULT_SUCCESS);

    result = (*p->recorderBufferQueue)->RegisterCallback(p->recorderBufferQueue, bqRecorderCallback,
                                                         p);
    assert(result == SL_RESULT_SUCCESS);

    result = (*p->recorderRecord)->SetRecordState(p->recorderRecord, SL_RECORDSTATE_RECORDING);
    assert(result == SL_RESULT_SUCCESS);
}
}

static void openSLDestroyEngine(OPENSL_STREAM *p) {
    if (p->recorderObject != NULL) {
        (*p->recorderObject)->Destroy(p->recorderObject);
        p->recorderObject = NULL;
        p->recorderRecord = NULL;
        p->recorderBufferQueue = NULL;
    }

    if (p->engineObject != NULL) {
        (*p->engineObject)->Destroy(p->engineObject);
        p->engineObject = NULL;
        p->engineEngine = NULL;
    }
}
OPENSL_STREAM *android_OpenAudioDevice(int sr, int inchannels, int outchannels, int bufferframes) {
OPENSL_STREAM *p;
p = (OPENSL_STREAM *) calloc(sizeof(OPENSL_STREAM), 1);

p->inchannels = inchannels;
p->outchannels = outchannels;
p->sr = sr;
p->inlock = createThreadLock();
p->outlock = createThreadLock();

if ((p->outBufSamples = bufferframes * outchannels) != 0) {
    if ((p->outputBuffer[0] = (short *) calloc(p->outBufSamples, sizeof(short))) == NULL ||
            (p->outputBuffer[1] = (short *) calloc(p->outBufSamples, sizeof(short))) == NULL) {
        android_CloseAudioDevice(p);
        return NULL;
    }
}

if ((p->inBufSamples = bufferframes * inchannels) != 0) {
    if ((p->inputBuffer[0] = (short *) calloc(p->inBufSamples, sizeof(short))) == NULL ||
            (p->inputBuffer[1] = (short *) calloc(p->inBufSamples, sizeof(short))) == NULL) {
        android_CloseAudioDevice(p);
        return NULL;
    }
}

p->currentInputIndex = 0;
p->currentOutputBuffer = 0;
p->currentInputIndex = p->inBufSamples;
p->currentInputBuffer = 0;

openSLCreateEngine(p);
openSLRecOpen(p);

notifyThreadLock(p->outlock);
notifyThreadLock(p->inlock);

p->time = 0.;
return p;
}

void android_CloseAudioDevice(OPENSL_STREAM *p) {
if (p == NULL)
    return;

openSLDestroyEngine(p);

if (p->inlock != NULL) {
    notifyThreadLock(p->inlock);
    destroyThreadLock(p->inlock);
    p->inlock = NULL;
}

if (p->outlock != NULL) {
    notifyThreadLock(p->outlock);
    destroyThreadLock(p->outlock);
    p->outlock = NULL;
}

if (p->outputBuffer[0] != NULL) {
    free(p->outputBuffer[0]);
    p->outputBuffer[0] = NULL;
}

if (p->outputBuffer[1] != NULL) {
    free(p->outputBuffer[1]);
    p->outputBuffer[1] = NULL;
}

if (p->inputBuffer[0] != NULL) {
    free(p->inputBuffer[0]);
    p->inputBuffer[0] = NULL;
}

if (p->inputBuffer[1] != NULL) {
    free(p->inputBuffer[1]);
    p->inputBuffer[1] = NULL;
}

free(p);
}

void bqRecorderCallback(SLAndroidSimpleBufferQueueItf bq, void *context) {
OPENSL_STREAM *p = (OPENSL_STREAM *) context;
notifyThreadLock(p->inlock);
}

int android_AudioIn(OPENSL_STREAM *p, float *buffer, int size) {
    short *inBuffer;
    int i, bufsamps = p->inBufSamples, index = p->currentInputIndex;
    if (p == NULL || bufsamps == 0) return 0;

    inBuffer = p->inputBuffer[p->currentInputBuffer];
    for (i = 0; i < size; i++) {
        if (index >= bufsamps) {
            waitThreadLock(p->inlock);
            (*p->recorderBufferQueue)->Enqueue(p->recorderBufferQueue,
                                              inBuffer, bufsamps * sizeof(short));
            p->currentInputBuffer = (p->currentInputBuffer ? 0 : 1);
            index = 0;
            inBuffer = p->inputBuffer[p->currentInputBuffer];
        }
        buffer[i] = (float) inBuffer[index++] *CONVMYFLT;
    }
    p->currentInputIndex = index;
    if (p->outchannels == 0) p->time += (double) size / (p->sr * p->inchannels);
    return i;
}
void *createThreadLock(void) {
    threadLock *p;
    p = (threadLock *) malloc(sizeof(threadLock));
    if (p == NULL)
        return NULL;
    memset(p, 0, sizeof(threadLock));
    if (pthread_mutex_init(&(p->m), (pthread_mutexattr_t *) NULL) != 0) {
        free((void *) p);
        return NULL;
    }
    if (pthread_cond_init(&(p->c), (pthread_condattr_t *) NULL) != 0) {
        pthread_mutex_destroy(&(p->m));
        free((void *) p);
        return NULL;
    }
    p->s = (unsigned char) 1;

    return p;
}


void waitThreadLock(void *lock) {
    threadLock *p;
    int __unused retval = 0;
    p = (threadLock *) lock;
    pthread_mutex_lock(&(p->m));
    while (!p->s) {
        pthread_cond_wait(&(p->c), &(p->m));
    }
    p->s = (unsigned char) 0;
    pthread_mutex_unlock(&(p->m));
}
void notifyThreadLock(void *lock) {
    threadLock *p;
    p = (threadLock *) lock;
    pthread_mutex_lock(&(p->m));
    p->s = (unsigned char) 1;
    pthread_cond_signal(&(p->c));
    pthread_mutex_unlock(&(p->m));
}

void destroyThreadLock(void *lock) {
    threadLock *p;
    p = (threadLock *) lock;
    if (p == NULL)
        return;
    notifyThreadLock(p);
    pthread_cond_destroy(&(p->c));
    pthread_mutex_destroy(&(p->m));
    free(p);
}

Update01

所以我将数据播放到我的设备的扬声器中,它只是播放了一堆咔哒声,所以它肯定会转向音频数据。

Update02

我在读取音频数据时打印出阵列,并打印出相同的缓冲区。所以我猜测我的系统结构的方式是因为某些原因没有从麦克风获得新的缓冲区。

0 个答案:

没有答案