我正在尝试使用Google的语音转文本nodeJS库(https://github.com/googleapis/nodejs-speech),并使用navigator.mediaDevices.getUserMedia从客户端的麦克风输入流式传输音频。
我能够使用sox麦克风传声到nodejs语音 streamingRecognize,并且它可以正常工作。 我还能够从客户端流音频并将其制作并通过管道传输到服务器端的扬声器。 但是,当我尝试通过管道将其识别到streamingRecognize时,它无法识别任何单词。
服务器端
var io = require("socket.io")(server);
const speech = require('@google-cloud/speech');
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: true,
//singleUtterance: false
};
let recognizeStream = speechClient
.streamingRecognize(request)
.on('data', data => {
console.log(data);
process.stdout.write(
data.results[0] && data.results[0].alternatives[0] ?
`Transcription: ${data.results[0].alternatives[0].transcript}\n` :
`\n\nReached transcription time limit, press Ctrl+C\n`
)
});
io.on("connection", function (client) {
client.on("userSpeaking", function (data) {
if (recognizeStream !== null) {
recognizeStream.write(new Uint8Array(data));
}
});
});
客户端
function convertFloat32ToInt16(buffer) {
let l = buffer.length;
let buf = new Int16Array(l);
while (l--) {
buf[l] = Math.min(1, buffer[l]) * 0x7FFF;
}
return buf.buffer;
}
AudioContext = window.AudioContext || window.webkitAudioContext;
context = new AudioContext();
processor = context.createScriptProcessor(bufferSize, 1, 1);
processor.connect(context.destination);
context.resume();
function microphoneProcess(e) {
var left = e.inputBuffer.getChannelData(0);
var left16 = convertFloat32ToInt16(left);
socket.emit('userSpeaking', left16);
}
navigator.mediaDevices
.getUserMedia({
video: false,
audio: true
}, {
type: 'audio',
sampleRate: 16000,
desiredSampleRate: 16000,
audioBitsPerSecond: 16000,
mimeType: 'audio/webm;codecs=opus'
})
.then((stream) => {
globalStream = stream;
input = context.createMediaStreamSource(stream);
input.connect(processor);
processor.onaudioprocess = function (e) {
microphoneProcess(e);
};
})
.catch(console.error);