我想使用Google Speech API来实现语音到文本的功能,但是在我的前端,我还没有得到应该做什么,我在后端和前端都使用了Socket.io Stream。
前端(JavaScript)
bindSendAudioMessage() {
let me = this;
me.sendAudioMessageButton = me.ele.find('#send-audio-message-btn');
me.sendAudioMessageButton.off('click').one('click', async function () {
let stream = await navigator.mediaDevices.getUserMedia({ audio : true});
me.recordingStarted(stream);
});
},
recordingStarted: function (inputStream) {
let serverStream = ss.createStream();
ss(chatBox.socketIO).emit('speech-to-text', serverStream);
inputStream.pipe(serverStream);
ss(chatBox.socketIO).on('speech-text', function (stream) {
console.log('receiving something');
console.log(stream);
stream.on('data', data => {
console.log(data);
})
})
},
后端(NodeJS)
// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
SocketStream(socket).on('speech-to-text', function (inputStream) {
console.log(inputStream);
const request = {
config: {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: 'en-US',
},
interimResults: false, // If you want interim results, set this to true
single_utterance: true,
};
// Create a recognize stream
const recognizeStream = client
.streamingRecognize(request)
.on('error', console.error)
.on('data', data =>
process.stdout.write(
data.results[0] && data.results[0].alternatives[0]
? `Transcript: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`
)
);
let outputStream = SocketStream.createStream();
SocketStream(socket).emit('speech-text', outputStream);
// Pipe inputStream to recognizeStream then to outputStream
inputStream.pipe(recognizeStream).pipe(outputStream);
})
我确定流API中缺少某些内容,我知道的一个问题是navigator.mediaDevices.getUserMedia({ audio : true})
会给我一个MediaStream
,它与SocketIO Stream
不同。
如何准备音频MediaStream
才能将其流式传输到SocketIO Stream
?
当我从Google API获取响应时,该如何流回响应?
此行inputStream.pipe(recognizeStream).pipe(outputStream);
有意义吗?