我想做的事情:视频聊天期间的实时语音转换为文本转换
使用:Webrtc,web speech api,node js v9.9.0
会发生什么:index.html加载>视频元素适当地显示一个按钮,该按钮在点击时开始语音到文本>单击按钮,语音到文本正常工作 但 当我打开另一个标签时,打开localhost,第二个视频元素也会在第一个旁边显示,但是,语音到文本中止
原因:因为有另一个具有更高优先级的程序开始执行。我猜这个其他程序是节点js程序。
我需要知道的是,即使在节点js程序之后,或者当前正在中止的任何程序,html脚本仍然可以继续执行的方式开始执行。 < / p>
我曾尝试在HTML中使用网络工作者,但最终却没有执行语音到文本脚本。
的index.html:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Realtime communication with WebRTC</title>
<style>
body {
font-family: monospace;
font-size: 22px;
}
</style>
<link rel="stylesheet" href="/css/main.css" />
</head>
<body>
<h1>Realtime communication with WebRTC</h1>
<span id="speech"></span>
<span id="interim"></span>
<div id="videos">
<video id="localVideo" autoplay></video>
<video id="remoteVideo" autoplay></video>
</div>
<script id = "STT">
function upgrade() {
alert('Please use Google Chrome for best experience');
}
window.onload = function() {
if (!(window.webkitSpeechRecognition) && !(window.speechRecognition)) {
upgrade();
} else {
var recognizing,
transcription = document.getElementById('speech'),
interim_span = document.getElementById('interim');
interim_span.style.opacity = '0.5';
function reset() {
//recognizing = false;
interim_span.innerHTML = '';
transcription.innerHTML = '';
speech.start();
}
var speech = new webkitSpeechRecognition() || speechRecognition();
speech.continuous = true;
speech.interimResults = true;
speech.lang = 'en-US'; // check google web speech example source for more lanuages
speech.start(); //enables recognition on default
speech.onstart = function() {
// When recognition begins
recognizing = true;
};
speech.onresult = function(event) {
// When recognition produces result
var interim_transcript = '';
var final_transcript = '';
// main for loop for final and interim results
for (var i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
final_transcript += event.results[i][0].transcript;
} else {
interim_transcript += event.results[i][0].transcript;
}
}
transcription.innerHTML = final_transcript;
interim_span.innerHTML = interim_transcript;
};
speech.onerror = function(event) {
// Either 'No-speech' or 'Network connection error'
console.error(event.error);
};
speech.onend = function() {
// When recognition ends
reset();
};
}
};
</script>
<button onclick="STT">click</button>
<script src="/socket.io/socket.io.js"></script>
<script src="https://webrtc.github.io/adapter/adapter-latest.js"></script>
<script src="js/main.js"></script>
</body>
</html>
index.js:
'use strict';
var os = require('os');
var nodeStatic = require('node-static');
var http = require('http');
var socketIO = require('socket.io');
var fs = require('fs');
var Connect = require('connect');
const threads = require('webworker-threads');
console.log("in index.js");
var fileServer = new(nodeStatic.Server)();
console.log(' var fileServer = new(nodeStatic.Server)();' + fileServer);
var app = http.createServer(function(req, res){
console.log("creating a server");
/*
var worker = new threads.Worker(function(){
function voiceRex(){
console.log('at voiceRex');
fs.readFile('./js/speechreg.html', function (err, html) {
if (err) {
throw err;
}
});
/*res.writeHeader(200, {"Content-Type": "text/html"});
res.write(html);
res.end();
}
this.onmessage = function(event){
res.writeHeader(200, {"Content-Type": "text/html"});
res.write(html);
}
});
worker.onmessage = function(event) {
res.end();
};*/
fileServer.serve(req, res);
}).listen(8000);
console.log('app:' + app);
var io = socketIO.listen(app);
console.log('io:' + io);
io.sockets.on('connection', function(socket) {
console.log('io.sockets.on(connection, function(socket)');
console.log('recieved connection ');
// convenience function to log server messages on the client
function log() {
console.log('in log in index.js');
var array = ['Message from server:'];
console.log('message from server:' + array);
array.push.apply(array, arguments);
socket.emit('log', array);
}
socket.on('message', function(message) {
console.log('Client said: ', message);
log('Client said: ', message);
// for a real app, would be room-only (not broadcast)
socket.broadcast.emit('message', message);
});
socket.on('create or join', function(room) {
console.log('Received request to create or join room ' + room);
log('Received request to create or join room ' + room);
var clientsInRoom = io.sockets.adapter.rooms[room];
console.log(clientsInRoom);
var numClients = clientsInRoom ? Object.keys(clientsInRoom.sockets).length : 0;
console.log(numClients);
log('Room ' + room + ' now has ' + numClients + ' client(s)');
console.log('Room ' + room + ' now has ' + numClients + ' client(s)');
if (numClients === 0) {
console.log('number of clients is 0');
socket.join(room);
log('Client ID ' + socket.id + ' created room ' + room);
socket.emit('created', room, socket.id);
} else if (numClients === 1) {
console.log('number of clients is 1');
log('Client ID ' + socket.id + ' joined room ' + room);
io.sockets.in(room).emit('join', room);
socket.join(room);
socket.emit('joined', room, socket.id);
io.sockets.in(room).emit('ready');
} else { // max two clients
console.log('number of clients is max');
socket.emit('full', room);
}
}, );
socket.on('ipaddr', function() {
console.log('in ipaddr');
var ifaces = os.networkInterfaces();
for (var dev in ifaces) {
ifaces[dev].forEach(function(details) {
if (details.family === 'IPv4' && details.address !== '127.0.0.1') {
socket.emit('ipaddr', details.address);
}
});
}
});
});
index.html:使用网络工作者
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Realtime communication with WebRTC</title>
<style>
body {
font-family: monospace;
font-size: 22px;
}
</style>
<link rel="stylesheet" href="/css/main.css" />
</head>
<body>
<h1>Realtime communication with WebRTC</h1>
<span id="speech"></span>
<span id="interim"></span>
<div id="videos">
<video id="localVideo" autoplay></video>
<video id="remoteVideo" autoplay></video>
</div>
<script id="worker" type="javascript/worker">
var voice = function(transcription, interim_span){
var recognizing;
function reset() {
//recognizing = false;
interim_span.innerHTML = "";
transcription.innerHTML = "";
speech.start();
}
var speech = new webkitSpeechRecognition() || speechRecognition();
speech.lang = "en"; // check google web speech example source for more lanuages
speech.continuous = true;
speech.interimResults = true;
speech.start(); //enables recognition on default
speech.onstart = function() {
// When recognition begins
recognizing = true;
};
speech.onresult = function(event) {
// When recognition produces result
var interim_transcript = '';
var final_transcript = '';
var interim_transcript_post = interim_transcript;
var final_transcript_post = final_transcript;
var final_transcript_post = JSON.parse(JSON.stringify(final_transcript_post));
// main for loop for final and interim results
for (var i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
final_transcript += event.results[i][0].transcript;
final_transcript_post += JSON.parse(JSON.stringify(final_transcript));
} else {
interim_transcript += event.results[i][0].transcript;
interim_transcript_post += JSON.parse(JSON.stringify(interim_transcript));
}
}
self.postMessage({
'final_transcript_post': final_transcript_post,
'interim_transcript_post': interim_transcript_post
});
};
speech.onerror = function(event) {
// Either 'No-speech' or 'Network connection error'
console.error(event.error);
};
speech.onend = function() {
// When recognition ends
reset();
};
};
</script>
<script>
var transcription = document.getElementById('speech');
var interim_span = document.getElementById('interim');
var transcription = JSON.parse(JSON.stringify(transcription));
var interim_span = JSON.parse(JSON.stringify(interim_span));
document.getElementById('interim').style.opacity = '0.5';
var blob = new Blob([document.getElementById('worker').textContent]);
var w = new Worker(window.URL.createObjectURL(blob));
w.postMessage({
'transcription': transcription,
'interim_span':interim_span
});
w.onmessage = function(event){
transcription.innerHTML = event.data.final_transcript_post;
interim_span.innerHTML = event.data.interim_transcript_post;
};
</script>
<script src="/socket.io/socket.io.js"></script>
<script src="https://webrtc.github.io/adapter/adapter-latest.js"></script>
<script src="js/main.js"></script>
</body>
</html>
答案 0 :(得分:0)
网络语音api无法与视频聊天同时访问麦克风。如果您想在视频聊天中进行转录,您应该将录制的音频流式传输到外部转录服务并显示结果。例如,Google Speech API支持流,但它非常昂贵。