我需要在音频流上执行语音识别,而Google语音转文本服务似乎很有前途。经过一些文档和示例的苦苦挣扎之后,我设法使用Google提供的node.js库和OSX上的sox来捕获麦克风音频。
识别工作正常,但是很遗憾,我找不到启用单词计时数据的方法,根据文档,应将enableWordTimeOffsets
选项设置为true。
这仅在流传输模式下发生,上传文件的简单示例给出了预期的结果。
那么有人通过流识别成功获得了单词计时数据吗?会不会是Google API的javascript库中的小故障?
谢谢
Davide
PS如果可以提供帮助,则流示例的代码(很脏,这只是一个测试)在这里:
var cp = null
var spawn = require('child_process').spawn
function startRecording (options) {
cp = null // Empty out possibly dead recording process
var defaults = {
sampleRate: 16000,
channels: 1,
compress: false,
threshold: 0,
thresholdStart: null,
thresholdEnd: null,
silence: '1.0',
verbose: false,
recordProgram: 'sox'
}
options = Object.assign(defaults, options)
// Capture audio stream
var cmd, cmdArgs, cmdOptions
var cmd = 'sox';
var cmdArgs = [
'-q', // show no progress
'-d', // use default recording device
'-r', 16000, // sample rate
'-c', 1, // channels
'-t', '.raw', // audio type ... was waveaudio
// '-e', 'signed-integer', // sample encoding
'-b', '16', // precision (bits)
'-', // pipe
];
// Spawn audio capture command
cmdOptions = { encoding: 'binary' }
if (options.device) {
cmdOptions.env = Object.assign({}, process.env, { AUDIODEV: options.device })
}
cp = spawn(cmd, cmdArgs, cmdOptions)
var rec = cp.stdout
if (options.verbose) {
console.log('Recording', options.channels, 'channels with sample rate',
options.sampleRate + '...')
console.time('End Recording')
rec.on('data', function (data) {
console.log('Recording %d bytes', data.length)
})
rec.on('end', function () {
console.timeEnd('End Recording')
})
}
return rec
}
function stopRecording () {
if (!cp) {
console.log('Please start a recording first')
return false
}
cp.kill() // Exit the spawned process, exit gracefully
return cp
}
setTimeout(()=>{stopRecording()}, 10000)
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'it-IT';
function microphoneStream() { // (encoding, sampleRateHertz, languageCode) {
// [START micStreamRecognize]
// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
enableWordTimeOffsets: true
};
const request = {
config,
interimResults: true, //Get interim results from stream
enableWordTimeOffsets: true
};
// Creates a client
const client = new speech.SpeechClient();
// Create a recognize stream
const recognizeStream = client
.streamingRecognize(request)
.on('error', console.error)
.on('data', data =>
//process.stdout.write(
// data.results[0] && data.results[0].alternatives[0]
// ? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
// : `\n\nReached transcription time limit, press Ctrl+C\n`
console.log(data)
//)
);
// Start recording and send the microphone input to the Speech API
startRecording({
sampleRateHertz: sampleRateHertz,
threshold: 0.5, //silence threshold
recordProgram: 'sox', // Try also "arecord" or "sox"
silence: '5000.0', //seconds of silence before ending
})
.pipe(recognizeStream);
console.log('Listening, press Ctrl+C to stop.');
// [END micStreamRecognize]
}
microphoneStream();
这里是非流媒体的地方
// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
const fs = require('fs');
// Creates a client
const client = new speech.SpeechClient();
// The name of the audio file to transcribe
const fileName = './test.raw';
// Reads a local audio file and converts it to base64
const file = fs.readFileSync(fileName);
const audioBytes = file.toString('base64');
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const audio = {
content: audioBytes,
};
const config = {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: 'it-IT',
enableAutomaticPunctuation: true,
enableWordTimeOffsets: true
};
const request = {
audio: audio,
config: config,
};
// Detects speech in the audio file
client
.recognize(request)
.then(data => {
console.log(data)
const response = data[0];
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
})
.catch(err => {
console.error('ERROR:', err);
});