如何识别android上的pocketsphinx只有大声的声音(单词)

时间:2016-03-16 16:25:51

标签: java android pocketsphinx-android

我正在构建Android应用程序,它必须一直(一直)听一个声音,并抓住一个关键词,如帮助。我现在正在使用MediaRecorder来获得振幅,然后如果是响亮的(例如20000),我称之为pocketsphinx speechrecognizer。问题是,当语音识别器捕获(或不捕获)关键字时,我无法跳回到MediaRecorder,应用程序崩溃了。当然我的应用程序必须在后台工作(24小时/天)所以我的实现是在服务中,所以我的MediaRecorder在单独的线程中。 我知道pocketphinx也可以检查振幅(尖叫声),但是如何制作呢? pocketphinx(获得幅度)更好的解决方案是触发语音识别器吗?在我的课程下面,我将非常感谢您的帮助。

@Override
public IBinder onBind(Intent intent) {
    return null;
}

private final class ServiceHandler extends Handler{
    public ServiceHandler(Looper looper){
        super(looper);
    }
    @Override
    public void handleMessage(Message msg){
        outputFile = Environment.getExternalStorageDirectory().getAbsolutePath() + "/record.3gp";
        getVoiceRecord();
    }
}
@Override
public void onCreate() {
    thread = new HandlerThread("ServiceStartArguments",
            Process.THREAD_PRIORITY_BACKGROUND);
    thread.start();
    // Get the HandlerThread's Looper and use it for our Handler
    serviceLooper = thread.getLooper();
    serviceHandler = new ServiceHandler(serviceLooper);
}
@Override
public int onStartCommand(Intent intent, int flags, int startId) {
    //TODO do something useful
    //change to START_STICKY
    Log.d("tag", "on start command");
    Message msg = serviceHandler.obtainMessage();
    msg.arg1 = startId;
    serviceHandler.sendMessage(msg);
    return Service.START_NOT_STICKY;
}

private void getVoiceRecord() {
    startRecorder();
    start = System.currentTimeMillis();
    Log.d("tag", "Time started at " + start);
    while (true){
        if(recorder!=null){
            amplitude = recorder.getMaxAmplitude();
            if(amplitude>20000){
                Toast.makeText(getApplicationContext(), "Scream detected",
                        Toast.LENGTH_LONG).show();
                Log.d("tag", "Scream detected " + 20 * Math.log10(amplitude) + " amplitude: " + amplitude);
                stopRecorder();
                Log.d("tag", "Finish recording");
                getSpeech();
            }
            finish = System.currentTimeMillis();
            if(finish-start>50000){
                //loop = false;
                stopRecorder();
                Log.d("tag", "Finish recording");
                if(recorder==null){
                    recorder.reset();
                    startRecorder();
                    start = System.currentTimeMillis();
                }
            }
        }

    }//end of while loop
}

private void getSpeech() {
            try {
                Assets assets = new Assets(ScreamService.this);
                File assetDir = assets.syncAssets();
                setupRecognizer(assetDir);
            } catch (IOException e) {
                //return e;
            }
            reset();
}

private void stopRecorder() {
    try{
        recorder.stop();
        recorder.reset();
        recorder.release();
        recorder = null;
        Log.d("tag", "Stop recording");
    }catch (IllegalStateException e) {
        e.printStackTrace();
        Log.d("tag", "Media Recorder did not stop " + e);
        try {
            Thread.sleep(2000);
            recorder.stop();
            recorder.release();
            recorder = null;
        } catch (InterruptedException e1) {
            e1.printStackTrace();
        }
    }catch (RuntimeException e) {
        e.printStackTrace();
        Log.d("tag", "Media Recorder did not stop " + e);
    }
}

private void startRecorder() {
    Log.d("tag", "Start recording... ");
    try {
        recorder = new MediaRecorder();
        recorder.setAudioSource(MediaRecorder.AudioSource.MIC);
        recorder.setOutputFormat(MediaRecorder.OutputFormat.THREE_GPP);
        recorder.setAudioEncoder(MediaRecorder.AudioEncoder.AMR_NB);
        recorder.setOutputFile(outputFile);
        recorder.prepare();
        recorder.start();
    } catch (IOException e) {
        e.printStackTrace();
        Log.d("tag", "Media Recorder did not start IOExeption " + e);
    } catch (IllegalStateException e) {
        e.printStackTrace();
        Log.d("tag", "Media Recorder did not start Ilegal State Trace" + e);
    }
}

@Override
public void onDestroy() {
    super.onDestroy();
    recognizer.cancel();
    recognizer.shutdown();
    Toast.makeText(this, "Scream Service & recognizer Stopped.", Toast.LENGTH_SHORT).show();
}

@Override
public void onPartialResult(Hypothesis hypothesis) {
}

/**
 * This callback is called when we stop the recognizer.
 */
@Override
public void onResult(Hypothesis hypothesis) {
    if (hypothesis != null) {
        String text = hypothesis.getHypstr();
        //makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
        Log.d("tag", "onResult " + text);
        if(text.equals("help") || text.equals("help me")) {
            recognizer.stop();
            recognizer.cancel();
            this.startService(new Intent(this, SendMessage.class));
            getVoiceRecord();
        }
    }else {
        Log.d("tag", "onResult is null");
    }

}

@Override
public void onBeginningOfSpeech() {
    //Log.d("tag", "onBeginningOfSpeech ");
}

@Override
public void onEndOfSpeech() {
    counter++;
    if(counter>5){
        //recognizer.stop();
        recognizer.cancel();
        counter=0;
        getVoiceRecord();
        Log.d("tag", "Speech recognizer is killed");
    }else {
        //Log.d("tag", "onEndOfSpeech ");
        reset();
    }

}

private void reset() {
    recognizer.stop();
    recognizer.startListening("menu");
}

private void setupRecognizer(File assetsDir) throws IOException {
    Log.d("tag", "default setup");
    recognizer = defaultSetup()
            .setAcousticModel(new File(assetsDir, "en-us-ptm"))
            .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
    // To disable logging of raw audio comment out this call (takes a lot of space on the device)
            .setRawLogDir(assetsDir)
    // Threshold to tune for keyphrase to balance between false alarms and misses
            .setKeywordThreshold(1e-45f)
    // Use context-independent phonetic search, context-dependent is too slow for mobile
            .setBoolean("-allphone_ci", true)
            .getRecognizer();
    recognizer.addListener(this);

    // Create grammar-based search for selection between demos
    File menuGrammar = new File(assetsDir, "menu.gram");
    recognizer.addGrammarSearch("menu", menuGrammar);
}

@Override
public void onError(Exception error) {
    Log.d("tag", "error "+error.getMessage());
}

@Override
public void onTimeout() {
    Log.d("tag", "onTimeout");
}

1 个答案:

答案 0 :(得分:0)

您可以修改pocketsphinx源以计算录制的音频数据的幅度,然后再将其传递给识别器。在SpeechRecognizer.java RecognizerThread类:

    .........
    while (!interrupted()
            && ((timeoutSamples == NO_TIMEOUT) || (remainingSamples > 0))) {
        int nread = recorder.read(buffer, 0, buffer.length);

        if (-1 == nread) {
            throw new RuntimeException("error reading audio buffer");
        } else if (nread > 0) {

            // int max = 0;
            // for (int i = 0; i < nread; i++) {
            //     max = Math.max(max, Math.abs(buffer[i]));
            // }
            // Log.e("!!!!!!!!", "Level is: " + max);
            // You can decide to skip buffer here

            decoder.processRaw(buffer, nread, false, false);
    ......