检测用户说话SpeechRecognition API的时间?

时间:2017-03-11 20:13:56

标签: javascript webkit webspeech-api webkitspeechrecognition

我试图检测用户何时暂停并再次开始说话而不停止识别。这是我设置的代码,用于检测用户何时进行会话并将其输出到页面:



//set up vars
var final_transcript;
var recognizing = false;
//check if using chrome and up to date
if ('webkitSpeechRecognition' in window) {
//init
  var recognition = new webkitSpeechRecognition();

  recognition.continuous = true;
  recognition.interimResults = true;

  recognition.onstart = function() {
    recognizing = true;
  };
//if there is error somewhere
  recognition.onerror = function(event) {
    console.log(event.error);
  };


  recognition.onend = function() {
    recognizing = false;
  };
//after giving the spech
  recognition.onresult = function(event) {
    var interim_transcript = '';
    for (var i = event.resultIndex; i < event.results.length; ++i) {
      if (event.results[i].isFinal) {
        // finilize and show the compleated text
        final_transcript += event.results[i][0].transcript;
      } else {
        // run the speech and output it 
        interim_transcript += event.results[i][0].transcript;

      }
    }
    final_transcript = capitalize(final_transcript);
    final_span.innerHTML = linebreak(final_transcript);
    interim_span.innerHTML = linebreak(interim_transcript);

  };
}

var two_line = /\n\n/g;
var one_line = /\n/g;
function linebreak(s) {
  return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
}

function capitalize(s) {
  return s.replace(s.substr(0,1), function(m) { return m.toUpperCase(); });
}

function startDictation(event) {
  if (recognizing) {
    recognition.stop();
    return;
  }
  final_transcript = '';
  recognition.lang = 'en';
  recognition.start();
}
//startDictation();
&#13;
<div id="results">
        <span id="final_span" class="final"></span>
        <span id="interim_span" class="interim"></span>

    </div>
&#13;
&#13;
&#13;

那么有没有办法检测用户何时暂停以及他暂停的时间长度?

1 个答案:

答案 0 :(得分:1)

这不是一个完整的答案,但它已经开始。

https://jsfiddle.net/persianturtle/7uygdyy1/1/

我不确定onspeechstart事件是否仅触发一次,或者如果我在一个噪音较小的区域,它会再次发射。

如果它多次触发,这很容易做到,因为我们可以存储speech startspeech end次,并从那里找出沉默的时期。

如果它onspeechstart只触发一次,那么你可以找到一种方法来获得给定文本的平均语音持续时间,并从那里找出沉默。

但是,每次捕获单词时都会触发一个事件:onresult

所以基本的想法是定义一个activity数组,你可以push语音数据进入,然后分析沉默时期。

代码:

let activity = []

recognition.onresult = event => {
  console.log(event)
  activity.push('Ended:' + event.timeStamp)
}

recognition.onspeechstart = event => {
  activity.push('Started:' + event.timeStamp)
}

recognition.onend = event => {
  console.log(activity)
}