通过Web Audio API在WAV录音中出现口吃/“机器人”音频

时间:2014-08-21 20:51:23

标签: javascript web-audio

我正在使用Web Audio API在应用程序用户按住特定键(例如,按键通话)时捕获浏览器中音频的WAVE音频剪辑(更新 :当不使用按键通话时也会发生。许多录音中的音频断断续续;你可以hear an example here(从约5秒开始)和a different example here。我该怎么做才能诊断(或修复)这个问题? (我已经在createScriptProcessor玩弄了缓冲区大小,但无济于事。)

使用该应用程序的计算机都是在OS X 10.8或10.9上运行Chrome 36的MacBook Pro(更新:10.10上的Chrome 39/40)。这是记录上面链接的样本的机器的chrome://版本:

Google Chrome:   36.0.1985.143 (Official Build 287914) 
OS:              Mac OS X 
Blink:           537.36 (@179211)
JavaScript:      V8 3.26.31.15
Flash:           14.0.0.177
User Agent:      Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36
Command Line:    /Applications/Google Chrome.app/Contents/MacOS/Google Chrome --flag-switches-begin --flag-switches-end
Executable Path: /Applications/Google Chrome.app/Contents/MacOS/Google Chrome
Profile Path:    /Users/jason/Library/Application Support/Google/Chrome/Default
Variations:      e950616e-37fb3cc2
                 8afebf76-771ac34e
                 c70841c8-4866ef6e
                 195ce1b5-d93a0620
                 c4126e6a-ca7d8d80
                 9e5c75f1-ad69ceb0
                 262f996f-7075cd8
                 24dca50e-837c4893
                 ca65a9fe-91ac3782
                 8d790604-9cb2a91c
                 4ea303a6-3d47f4f4
                 d8f57532-3f4a17df
                 b2612322-f8cf70e2
                 5a3c10b5-e1cc0f14
                 244ca1ac-4ad60575
                 f47ae82a-86f22ee5
                 5e29d81-cf4f6ead
                 3ac60855-486e2a9c
                 246fb659-6e597ede
                 f296190c-65255996
                 4442aae2-6e597ede
                 ed1d377-e1cc0f14
                 75f0f0a0-a5822863
                 e2b18481-d7f6b13c
                 e7e71889-4ad60575
                 cbf0c14e-bf3e6cfd

制作录音的代码的相关部分如下(略有简化):

function startRecording() {
  navigator.getUserMedia({audio: true, video: false}, function (stream) {
    audioContext = audioContext || new window.webkitAudioContext();
    input = audioContext.createMediaStreamSource(stream);
    node = input.context.createScriptProcessor(4096, 1, 1);

    input.connect(node);
    node.connect(audioContext.destination);

    this.worker = new Worker(this.workerUrl); // see Web Worker code, below
    this.worker.addEventListener("message", this.handleWorkerMessage.bind(this));
    this.worker.postMessage({command: "init"});

    node.addEventListener("audioprocess", this.onAudioProcess);
  });
}

function stopRecording() {
  this.recording = false;
  this.worker.postMessage({command: "end"});
}

function onAudioProcess = function(evt) {
  if (!this.recording || stream.ended) return;

  var channelLeft = evt.inputBuffer.getChannelData(0);
  channelLeft = new Float32Array(channelLeft);
  this.worker.postMessage({command: "encode", buffer: channelLeft});
}

function handleWorkerMessage = function(evt) {
  var data = evt.data;
  switch (data.command) {
  case "end":
    this.appendToBuffer(data.buffer);
    var view;
    try {
      view = new DataView(this.buffer);
      var blob = new Blob([view], {type: this.mimeType});
      this.callback(blob);
    } finally {
      this.worker.terminate();
      node.removeEventListener("audioprocess", this.onAudioProcess);
    }
    break;
  }
}

function appendToBuffer = function(buffer) {
  if (!this.buffer) {
    this.buffer = buffer;
  } else {
    var tmp = new Uint8Array(this.buffer.byteLength + buffer.byteLength);
    tmp.set(new Uint8Array(this.buffer), 0);
    tmp.set(new Uint8Array(buffer), this.buffer.byteLength);
    this.buffer = tmp.buffer;
  }
}

这是Web Worker,它保存传递给它的缓冲区并在最后构建WAV(此代码大量借用RecordRTC):

var buffers,
    length = 0,
    sampleRate = 44100;

function concatBuffers(buffers, totalLength) {
  var buf;
  var result = new Float32Array(totalLength);
  var offset = 0;
  var lng = buffers.length;
  for (var i = 0; i < lng; i++) {
    var buf = buffers[i];
    result.set(buf, offset);
    offset += buf.length;
  }
  return result;
}

function writeUTFBytes(view, offset, string) {
  var lng = string.length;
  for (var i = 0; i < lng; i++) {
    view.setUint8(offset + i, string.charCodeAt(i));
  }
}

this.addEventListener("message", function(evt) {
  var data = evt.data;

  switch (data.command) {
  case "init":
    buffers = [];
    break;
  case "encode":
    buffers.push(new Float32Array(data.buffer));
    length += data.buffer.length;
    break;
  case "end":
    var pcmBuffer = concatBuffers(buffers, length);
    var wavBuffer = new ArrayBuffer(44 + pcmBuffer.length * 2);
    var view = new DataView(wavBuffer);

    // RIFF chunk descriptor
    writeUTFBytes(view, 0, "RIFF");
    view.setUint32(4, 44 + pcmBuffer.length * 2, true);
    writeUTFBytes(view, 8, 'WAVE');

    // FMT sub-chunk
    writeUTFBytes(view, 12, 'fmt ');
    view.setUint32(16, 16, true);
    view.setUint16(20, 1, true);

    view.setUint16(22, 1, true); // one channel
    view.setUint32(24, sampleRate, true);
    view.setUint32(28, sampleRate * 4, true);
    view.setUint16(32, 4, true);
    view.setUint16(34, 16, true);

    // data sub-chunk
    writeUTFBytes(view, 36, 'data');
    view.setUint32(40, pcmBuffer.length * 2, true);

    // PCM samples
    var lng = pcmBuffer.length;
    var index = 44;
    volume = 1;
    for (var i = 0; i < lng; i++) {
      view.setInt16(index, pcmBuffer[i] * (0x7FFF * volume), true);
      index += 2;
    }

    this.postMessage({command: "end", buffer: wavBuffer});
    break;
  }
});

更新

我们开始在另一种产品上使用相同的技术,并且看到相同的行为,我们不会在此产品中使用即按即说系统。

2 个答案:

答案 0 :(得分:2)

基于音频和时间延迟,我有一种非常强烈的感觉,这实际上是你的关键处理程序 - 这听起来像是围绕着按键操作的代码导致启动/停止风暴?

答案 1 :(得分:1)

我已经注意到,与Firefox不同,Chrome似乎很高兴在页面上发生很多事情时丢弃音频帧。尽管在发生这种情况时不会通知您,但是如果您使用性能测量工具,则可以查看导致瓶颈的原因。

由于JavaScript是单线程的(并且您不能在worker中创建脚本处理器),所以需要花费大量时间才能完成的事件处理程序(或应用程序中其他地方发生的其他事情)会使问题变得更糟。

如果您将脚本处理器的bufferSize增加到16384(最大值),则Chrome似乎丢失的帧更少。