Question

我尝试使用Watson Speech To Text服务实现语音识别。我使用＆＃34; MediaStreamRecorder＆＃34;在javascript中编写了一些代码。图书馆。我通过Websocket发送数据并得到这个问题：如果我使用＆＃34;内容类型＆＃34;：＆＃34; audio / wav＆＃34;，Watson只识别第一个blob并设置inactivity_timeout以默认值同时我设置它到2秒。

我使用此代码打开websocket：

initWebSocket(startRecordingCallback) {
    var that = this;
    that.websocket = new WebSocket(that.wsURI);
    that.websocket.onopen = function (evt) {
        console.log("WebSocket: connection OK ");
        var message = {
            "action": "start",
            "content-type": "audio/wav",
            "interim_results": true,
            "continuous": true,
            "inactivity_timeout": 2
        };
        that.websocket.send(JSON.stringify(message));
    };
    that.websocket.onclose = function (evt) {
        if (event.wasClean) {
            console.log("WebSocket: connection closed clearly " + JSON.stringify(evt));

        } else {
            console.log("WebSocket: disconnect " + JSON.stringify(evt));
        }
    };
    that.websocket.onmessage = function (evt) {
      console.log(evt)
    };
    that.websocket.onerror = function (evt) {
        console.log("WebSocket: error " + JSON.stringify(evt));
    };
}

此代码用于录制音频：

startRecording() {
    var that = this;
    this.initWebSocket(function () {
      var mediaConstraints = {
          audio: true
      };
      function onMediaSuccess(stream) {
          that.mediaRecorder = new MediaStreamRecorder(stream);
          that.mediaRecorder.mimeType = 'audio/wav';
          that.mediaRecorder.ondataavailable = function (blob) {
              that.websocket.send(blob);
          };
          that.mediaRecorder.start(3000);
      }

      function onMediaError(e) {
          console.error('media error', e);
      }
      navigator.getUserMedia(mediaConstraints, onMediaSuccess, onMediaError);
    });
}

我需要在2秒钟不活动后使用带有套接字自动关闭功能的websocket实时识别。请给我建议。

Answer 1

正如@Daniel Bolanos所说，如果成绩单为空超过inactivity_timeout秒，则不会触发inactivity_timeout。该服务使用不同的方式来检测是否有语音而不是依赖于转录。

如果服务检测到语音，即使成绩单为空，也不会触发inactivity_timeout。

以下是一段代码，您可以使用speech-javascript-sdk执行您尝试处理问题的内容。希望它能帮助未来的StackOverflow用户尝试识别来自麦克风的音频。

document.querySelector('#button').onclick = function () {
  // you need to provide this endpoint to fetch a watson token
  fetch('/api/speech-to-text/token') 
  .then(function(response) {
    return response.text();
  }).then(function (token) {
    var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
      token: token,
      outputElement: '#output' // CSS selector or DOM Element
    });

    stream.on('error', function(err) {
      console.log(err);
    });

    document.querySelector('#stop').onclick = function() {
      stream.stop();
    };
  }).catch(function(error) {
    console.log(error);
  });
};

演示：https://watson-speech.mybluemix.net/microphone-streaming.html

写给图书馆的@Nathan Friedly的信用。

Watson在识别＆＃34; audio / wav＆＃34;

1 个答案: