语音识别onBeginningOfSpeech和onEndOfSpeech执行没有任何时间间隔

时间:2016-09-19 19:36:26

标签: android speech-recognition

我正在尝试将语音识别内置到我的应用程序中,并且我看到onBeginningOfSpeech和onEndOfSpeech在1秒内被解雇。

另外,当我说完后,语音识别在我给出间隙后立即结束。通常情况下,ASR在停止语音识别之前需要大约3-5秒的等待时间。

即使在手机上的其他应用程序中,此代码实际上也会破坏其余的语音识别功能。

有这样的情况吗?

这就是我的代码的样子。

这是我服务的onCreate方法。我正在使用服务来调用语音识别

@Override
public void onCreate() {
    super.onCreate();
    createSR();
    mSpeechRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
    mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
            RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
    mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true);
    mSpeechRecognizerIntent.putExtra("android.speech.extra.DICTATION_MODE", true);
    mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
            this.getPackageName());
}

这是识别监听器代码。

protected class SpeechRecognitionListener implements RecognitionListener {

    private static final String TAG = "SRecognitionListener";
    private boolean isUserSpeaking;
    private long userSpokeAt=-1;
    private long userStoppedSpeakingAt = -1;
    private String completeSegment;
    private String recognizingSegment;
    private ArrayList<String> recognizedSegments;

    @Override
    public void onBeginningOfSpeech() {
        Log.d(TAG, "onBeginingOfSpeech"); //$NON-NLS-1$
    }

    @Override
    public void onBufferReceived(byte[] buffer) {

    }

    @Override
    public void onEndOfSpeech() {
        Log.d(TAG, "onEndOfSpeech"); //$NON-NLS-1$
    }

    @Override
    public void onError(int error) {
        Log.d(TAG, "onError: " + error);
        if (error == SpeechRecognizer.ERROR_NO_MATCH) {
            return;
        }

        mIsListening = false;
        Message message = Message.obtain(null, MSG_RECOGNIZER_START_LISTENING);
        try {
            mServerMessenger.send(message);
        } catch (RemoteException e) {

        }
        Log.d(TAG, "error = " + error); //$NON-NLS-1$
    }

    @Override
    public void onEvent(int eventType, Bundle params) {

    }

    /* TODO
    * There needs to be a boolean variable that would make sure that the translated message from the partialResults would be a fresh message by refreshing the entire data in the bundle data
    * Replace the recognizingSegment to have an empty string before doing anything
    * */
    @Override
    public void onPartialResults(Bundle partialResults) {
        ArrayList<String> matches = partialResults.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
        if (matches != null && matches.size() != 0) {
            Log.d(TAG, "onPartialResults: " + matches.get(0));
            partialBundleData = partialResults;
            String nextPartResult=matches.get(0);

            if (!recognizingSegment.equals("")){
                String[] nextPartWords = nextPartResult.split(" ");
                String[] recWords;
                String previousSegments="";
                recWords = recognizingSegment.split(" "); //The last recognized segment
                if (recognizedSegments.size()>0){
                    previousSegments=mergeSegments(recognizedSegments);
                }
                if (nextPartWords.length+2>=recWords.length){    //Most definitely the same segment
                    Log.d(TAG, "onPartialResults: matching "+recognizingSegment+" with "+nextPartResult);
                    if (doWordsMatch(recWords,nextPartWords)) { //Since the words match this is probably the same segment
                        recognizingSegment = nextPartResult;
                        partialResult = previousSegments + " " + recognizingSegment;
                        Log.d(TAG, "onPartialResults: Same segment - " + partialResult);
                        partialResults.putString("PartialSentence", partialResult);
                    }else{  //Since the words don't match this is probably a new segment
                        recognizedSegments.add(recognizingSegment);
                        partialResult=previousSegments+" "+recognizingSegment+" "+nextPartResult;
                        Log.d(TAG, "onPartialResults: New segment - " + partialResult);
                        partialResults.putString("PartialSentence",partialResult);
                        recognizingSegment=nextPartResult;
                    }
                }else{  //This must be a new segment
                    Log.d(TAG, "onPartialResults: matching "+recognizingSegment+" with "+nextPartResult);
                    if (!doWordsMatch(recWords, nextPartWords)) {   //Since the words don't match this is probably a new segment
                        recognizedSegments.add(recognizingSegment);
                        partialResult = previousSegments + " " + recognizingSegment + " " + nextPartResult;
                        Log.d(TAG, "onPartialResults: New segment - " + partialResult);
                        partialResults.putString("PartialSentence", partialResult);
                        recognizingSegment = nextPartResult;
                    }else{  //Since the words match this is probably the same segment
                        recognizingSegment = nextPartResult;
                        partialResult = previousSegments + " " + recognizingSegment;
                        Log.d(TAG, "onPartialResults: Same segment - " + partialResult);
                        partialResults.putString("PartialSentence", partialResult);
                    }
                }
            }else{
                partialResult=nextPartResult;
                Log.d(TAG, "onPartialResults: First segment - " + partialResult);
                recognizingSegment=nextPartResult;
                partialResults.putString("PartialSentence",nextPartResult);
            }
            Message message = new Message();
            message.what = ASRService.MSG_RECOGNIZER_PART_RESULT;

            message.setData(partialResults);
            sendMessageToClients(message);
        } else {
            Log.d(TAG, "onPartialResults: No Results");
        }
    }

    private boolean doWordsMatch(String[] phraseA, String[] phraseB){
        int noOfWordsToMatch=3;
        if (phraseA.length<noOfWordsToMatch){
            noOfWordsToMatch=phraseA.length;
        }
        if (phraseB.length<noOfWordsToMatch){
            noOfWordsToMatch=phraseB.length;
        }
        boolean wordsMatch=false;
        int noOfMatchingWords=0;
        for (int i=0; i<noOfWordsToMatch; i++){
            if (phraseA[i].equals(phraseB[i])){
                noOfMatchingWords++;
            }
        }
        Log.d(TAG, "onPartialResults: noOfMatchingWords - "+noOfMatchingWords);
        if (noOfMatchingWords>=2 || noOfMatchingWords>=noOfWordsToMatch){
            wordsMatch=true;
        }
        return wordsMatch;
    }

    private String mergeSegments(ArrayList<String> segments){
        StringBuilder mergedSegments=new StringBuilder();
        for (String segment: segments){
            mergedSegments.append(segment+" ");
        }
        return mergedSegments.toString().trim();
    }


    @Override
    public void onReadyForSpeech(Bundle params) {
        Log.d(TAG, "onReadyForSpeech"); //$NON-NLS-1$
        Message message = new Message();
        message.what = ASRService.MSG_RECOGNIZER_STARTED_LISTENING;
        sendMessageToClients(message);
        userSpokeAt=-1;
        completeSegment ="";
        recognizingSegment="";
        recognizedSegments=new ArrayList<>();
    }

    @Override
    public void onResults(Bundle results) {
        ArrayList<String> matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
        if (matches != null && matches.size() != 0) {
            Log.d(TAG, "onResults: " + matches.get(0));
            Message message = new Message();
            message.what = ASRService.MSG_RECOGNIZER_RESULT;
            message.setData(results);
            sendMessageToClients(message);
        } else {
            Log.d(TAG, "onResults: No Results");
        }
        cancelSR();
    }

    @Override
    public void onRmsChanged(float rmsdB) {

        if (rmsdB > 20) {
            if (userSpokeAt==-1) {   //The user spoke the first time
                partialResultsTimer = new AsyncTask<Void, Void, Void>() {
                    @Override
                    protected Void doInBackground(Void... params) {
                        try {
                            Thread.sleep(70000);     //We wait for a max duration of this time to cancel the speech recognition because the service automatically times out anyway.
                            partialResultsTimer=null;
                            cancelSR();
                        } catch (InterruptedException e) {

                        }
                        return null;
                    }
                }.execute();
            }

            userSpokeAt = System.currentTimeMillis();
            if (!isUserSpeaking) {
                Log.d(TAG, "User started speaking");
                isUserSpeaking = true;
                if (userStoppedSpeakingAt != -1) {
                    long gap = userSpokeAt - userStoppedSpeakingAt;
                    Log.d(TAG, "User spoke after " + gap + " millis");
                }
                userStoppedSpeakingAt = -1;
                if (timeoutTaskRunner != null) {
                    Log.d(TAG, "Speech Recognition timer canceling");
                    timeoutTaskRunner.cancel();
                    timerRunning = false;
                }
                Message message = new Message();
                message.what = ASRService.MSG_RECOGNIZER_USER_SPEAKING_STATE_CHANGED;
                message.arg1 = 1; //1 means true
                sendMessageToClients(message);
            }


        } else if (isUserSpeaking) {
            long currentTimeMillis = System.currentTimeMillis();
            if (currentTimeMillis - userSpokeAt > 1700) {
                isUserSpeaking = false;
                Log.d(TAG, "User isn't speaking after: " + (currentTimeMillis - userSpokeAt));
                userStoppedSpeakingAt = currentTimeMillis;
                startTimer();
                Message message = new Message();
                message.what = ASRService.MSG_RECOGNIZER_USER_SPEAKING_STATE_CHANGED;
                message.arg1 = 0; //0 means false
                sendMessageToClients(message);
            }
        }

    }


}

@Override
public IBinder onBind(Intent arg0) {
    Log.d("ASRService", "onBind");  //$NON-NLS-1$
    return mServerMessenger.getBinder();
} }

0 个答案:

没有答案