我试图实现一个django应用程序,其中用户可以记录语音消息传递给它watson api并接收转录数据并在单独的页面上向用户显示结果。录像机无法正常工作,因为我无法保存文件,如果我想将应用程序部署到heroku并将录音保存在那里然后将该音频文件传递给watson api,还应该进行哪些更改?
代码:
STT / STT / settings.py:
import os
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'my-secret-key'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = ['*']
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'record',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'STT.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'STT.wsgi.application'
# Database
# https://docs.djangoproject.com/en/1.11/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
}
}
# Password validation
# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/1.11/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.11/howto/static-files/
STATIC_URL = '/static/'
TEMPLATE_DIRS = (
os.path.join(BASE_DIR, 'templates'),
)
WATSON_USERNAME = 'watson-username'
WATSON_PASSWORD = 'watson-password'
WATSON_ENDPOINT = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize'
WATSON_DEFAULT_PARAMS = {
'continuous': True,
'timestamps': True,
'word_confidence': True,
}
WATSON_DEFAULT_HEADERS = {
'content-type': 'audio/wav'
}
STT / STT / urls.py:
from django.conf.urls import include, url
from django.contrib import admin
urlpatterns = [
url(r'^admin/', admin.site.urls),
url(r'^record/', include('record.urls')),
]
STT / STT / wsgi.py:
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "STT.settings")
application = get_wsgi_application()
STT / record / static / record / img / mic128.png:图像文件
STT / record / static / record / img / save.svg:image file
STT /记录/静态/记录/ JS / recorderjs / recorder.js:
(function(window){
var WORKER_PATH = 'js/recorderjs/recorderWorker.js';
var Recorder = function(source, cfg){
var config = cfg || {};
var bufferLen = config.bufferLen || 4096;
this.context = source.context;
if(!this.context.createScriptProcessor){
this.node = this.context.createJavaScriptNode(bufferLen, 2, 2);
} else {
this.node = this.context.createScriptProcessor(bufferLen, 2, 2);
}
var worker = new Worker(config.workerPath || WORKER_PATH);
worker.postMessage({
command: 'init',
config: {
sampleRate: this.context.sampleRate
}
});
var recording = false,
currCallback;
this.node.onaudioprocess = function(e){
if (!recording) return;
worker.postMessage({
command: 'record',
buffer: [
e.inputBuffer.getChannelData(0),
e.inputBuffer.getChannelData(1)
]
});
}
this.configure = function(cfg){
for (var prop in cfg){
if (cfg.hasOwnProperty(prop)){
config[prop] = cfg[prop];
}
}
}
this.record = function(){
recording = true;
}
this.stop = function(){
recording = false;
}
this.clear = function(){
worker.postMessage({ command: 'clear' });
}
this.getBuffers = function(cb) {
currCallback = cb || config.callback;
worker.postMessage({ command: 'getBuffers' })
}
this.exportWAV = function(cb, type){
currCallback = cb || config.callback;
type = type || config.type || 'audio/wav';
if (!currCallback) throw new Error('Callback not set');
worker.postMessage({
command: 'exportWAV',
type: type
});
}
this.exportMonoWAV = function(cb, type){
currCallback = cb || config.callback;
type = type || config.type || 'audio/wav';
if (!currCallback) throw new Error('Callback not set');
worker.postMessage({
command: 'exportMonoWAV',
type: type
});
}
worker.onmessage = function(e){
var blob = e.data;
currCallback(blob);
}
source.connect(this.node);
this.node.connect(this.context.destination); // if the script node is not connected to an output the "onaudioprocess" event is not triggered in chrome.
};
Recorder.setupDownload = function(blob, filename){
var url = (window.URL || window.webkitURL).createObjectURL(blob);
var link = document.getElementById("save");
link.href = url;
link.download = filename || 'output.wav';
}
window.Recorder = Recorder;
})(window);
STT /记录/静态/记录/ JS / recorderjs / recorderWorker.js:
var recLength = 0,
recBuffersL = [],
recBuffersR = [],
sampleRate;
this.onmessage = function(e){
switch(e.data.command){
case 'init':
init(e.data.config);
break;
case 'record':
record(e.data.buffer);
break;
case 'exportWAV':
exportWAV(e.data.type);
break;
case 'exportMonoWAV':
exportMonoWAV(e.data.type);
break;
case 'getBuffers':
getBuffers();
break;
case 'clear':
clear();
break;
}
};
function init(config){
sampleRate = config.sampleRate;
}
function record(inputBuffer){
recBuffersL.push(inputBuffer[0]);
recBuffersR.push(inputBuffer[1]);
recLength += inputBuffer[0].length;
}
function exportWAV(type){
var bufferL = mergeBuffers(recBuffersL, recLength);
var bufferR = mergeBuffers(recBuffersR, recLength);
var interleaved = interleave(bufferL, bufferR);
var dataview = encodeWAV(interleaved);
var audioBlob = new Blob([dataview], { type: type });
this.postMessage(audioBlob);
}
function exportMonoWAV(type){
var bufferL = mergeBuffers(recBuffersL, recLength);
var dataview = encodeWAV(bufferL, true);
var audioBlob = new Blob([dataview], { type: type });
this.postMessage(audioBlob);
}
function getBuffers() {
var buffers = [];
buffers.push( mergeBuffers(recBuffersL, recLength) );
buffers.push( mergeBuffers(recBuffersR, recLength) );
this.postMessage(buffers);
}
function clear(){
recLength = 0;
recBuffersL = [];
recBuffersR = [];
}
function mergeBuffers(recBuffers, recLength){
var result = new Float32Array(recLength);
var offset = 0;
for (var i = 0; i < recBuffers.length; i++){
result.set(recBuffers[i], offset);
offset += recBuffers[i].length;
}
return result;
}
function interleave(inputL, inputR){
var length = inputL.length + inputR.length;
var result = new Float32Array(length);
var index = 0,
inputIndex = 0;
while (index < length){
result[index++] = inputL[inputIndex];
result[index++] = inputR[inputIndex];
inputIndex++;
}
return result;
}
function floatTo16BitPCM(output, offset, input){
for (var i = 0; i < input.length; i++, offset+=2){
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
function writeString(view, offset, string){
for (var i = 0; i < string.length; i++){
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function encodeWAV(samples, mono){
var buffer = new ArrayBuffer(44 + samples.length * 2);
var view = new DataView(buffer);
/* RIFF identifier */
writeString(view, 0, 'RIFF');
/* file length */
view.setUint32(4, 32 + samples.length * 2, true);
/* RIFF type */
writeString(view, 8, 'WAVE');
/* format chunk identifier */
writeString(view, 12, 'fmt ');
/* format chunk length */
view.setUint32(16, 16, true);
/* sample format (raw) */
view.setUint16(20, 1, true);
/* channel count */
view.setUint16(22, mono?1:2, true);
/* sample rate */
view.setUint32(24, sampleRate, true);
/* byte rate (sample rate * block align) */
view.setUint32(28, sampleRate * 4, true);
/* block align (channel count * bytes per sample) */
view.setUint16(32, 4, true);
/* bits per sample */
view.setUint16(34, 16, true);
/* data chunk identifier */
writeString(view, 36, 'data');
/* data chunk length */
view.setUint32(40, samples.length * 2, true);
floatTo16BitPCM(view, 44, samples);
return view;
}
STT /记录/静态/记录/ JS / audiodisplay.js:
function drawBuffer( width, height, context, data ) {
var step = Math.ceil( data.length / width );
var amp = height / 2;
context.fillStyle = "silver";
context.clearRect(0,0,width,height);
for(var i=0; i < width; i++){
var min = 1.0;
var max = -1.0;
for (j=0; j<step; j++) {
var datum = data[(i*step)+j];
if (datum < min)
min = datum;
if (datum > max)
max = datum;
}
context.fillRect(i,(1+min)*amp,1,Math.max(1,(max-min)*amp));
}
}
STT /记录/静态/记录/ JS / main.js:
window.AudioContext = window.AudioContext || window.webkitAudioContext;
var audioContext = new AudioContext();
var audioInput = null,
realAudioInput = null,
inputPoint = null,
audioRecorder = null;
var rafID = null;
var analyserContext = null;
var canvasWidth, canvasHeight;
var recIndex = 0;
/* TODO:
- offer mono option
- "Monitor input" switch
*/
function saveAudio() {
audioRecorder.exportWAV( doneEncoding );
// could get mono instead by saying
// audioRecorder.exportMonoWAV( doneEncoding );
}
function gotBuffers( buffers ) {
var canvas = document.getElementById( "wavedisplay" );
drawBuffer( canvas.width, canvas.height, canvas.getContext('2d'), buffers[0] );
// the ONLY time gotBuffers is called is right after a new recording is completed -
// so here's where we should set up the download.
audioRecorder.exportWAV( doneEncoding );
}
function doneEncoding( blob ) {
Recorder.setupDownload( blob, "myRecording" + ((recIndex<10)?"0":"") + recIndex + ".wav" );
recIndex++;
}
function toggleRecording( e ) {
if (e.classList.contains("recording")) {
// stop recording
audioRecorder.stop();
e.classList.remove("recording");
audioRecorder.getBuffers( gotBuffers );
} else {
// start recording
if (!audioRecorder)
return;
e.classList.add("recording");
audioRecorder.clear();
audioRecorder.record();
}
}
function convertToMono( input ) {
var splitter = audioContext.createChannelSplitter(2);
var merger = audioContext.createChannelMerger(2);
input.connect( splitter );
splitter.connect( merger, 0, 0 );
splitter.connect( merger, 0, 1 );
return merger;
}
function cancelAnalyserUpdates() {
window.cancelAnimationFrame( rafID );
rafID = null;
}
function updateAnalysers(time) {
if (!analyserContext) {
var canvas = document.getElementById("analyser");
canvasWidth = canvas.width;
canvasHeight = canvas.height;
analyserContext = canvas.getContext('2d');
}
// analyzer draw code here
{
var SPACING = 3;
var BAR_WIDTH = 1;
var numBars = Math.round(canvasWidth / SPACING);
var freqByteData = new Uint8Array(analyserNode.frequencyBinCount);
analyserNode.getByteFrequencyData(freqByteData);
analyserContext.clearRect(0, 0, canvasWidth, canvasHeight);
analyserContext.fillStyle = '#F6D565';
analyserContext.lineCap = 'round';
var multiplier = analyserNode.frequencyBinCount / numBars;
// Draw rectangle for each frequency bin.
for (var i = 0; i < numBars; ++i) {
var magnitude = 0;
var offset = Math.floor( i * multiplier );
// gotta sum/average the block, or we miss narrow-bandwidth spikes
for (var j = 0; j< multiplier; j++)
magnitude += freqByteData[offset + j];
magnitude = magnitude / multiplier;
var magnitude2 = freqByteData[i * multiplier];
analyserContext.fillStyle = "hsl( " + Math.round((i*360)/numBars) + ", 100%, 50%)";
analyserContext.fillRect(i * SPACING, canvasHeight, BAR_WIDTH, -magnitude);
}
}
rafID = window.requestAnimationFrame( updateAnalysers );
}
function toggleMono() {
if (audioInput != realAudioInput) {
audioInput.disconnect();
realAudioInput.disconnect();
audioInput = realAudioInput;
} else {
realAudioInput.disconnect();
audioInput = convertToMono( realAudioInput );
}
audioInput.connect(inputPoint);
}
function gotStream(stream) {
inputPoint = audioContext.createGain();
// Create an AudioNode from the stream.
realAudioInput = audioContext.createMediaStreamSource(stream);
audioInput = realAudioInput;
audioInput.connect(inputPoint);
// audioInput = convertToMono( input );
analyserNode = audioContext.createAnalyser();
analyserNode.fftSize = 2048;
inputPoint.connect( analyserNode );
audioRecorder = new Recorder( inputPoint );
zeroGain = audioContext.createGain();
zeroGain.gain.value = 0.0;
inputPoint.connect( zeroGain );
zeroGain.connect( audioContext.destination );
updateAnalysers();
}
function initAudio() {
if (!navigator.getUserMedia)
navigator.getUserMedia = navigator.webkitGetUserMedia || navigator.mozGetUserMedia;
if (!navigator.cancelAnimationFrame)
navigator.cancelAnimationFrame = navigator.webkitCancelAnimationFrame || navigator.mozCancelAnimationFrame;
if (!navigator.requestAnimationFrame)
navigator.requestAnimationFrame = navigator.webkitRequestAnimationFrame || navigator.mozRequestAnimationFrame;
navigator.getUserMedia(
{
"audio": {
"mandatory": {
"googEchoCancellation": "false",
"googAutoGainControl": "false",
"googNoiseSuppression": "false",
"googHighpassFilter": "false"
},
"optional": []
}
}, gotStream, function(e) {
alert('Error getting audio');
console.log(e);
});
}
window.addEventListener('load', initAudio );
STT /记录/静态/记录/ style.css中:
html { overflow: hidden; }
body {
font: 14pt Arial, sans-serif;
background: lightgrey;
display: flex;
flex-direction: column;
height: 100vh;
width: 100%;
margin: 0 0;
}
canvas {
display: inline-block;
background: #202020;
width: 95%;
height: 45%;
box-shadow: 0 0 10px blue;
}
#controls {
display: flex;
flex-direction: row;
align-items: center;
justify-content: space-around;
height: 20%;
width: 100%;
}
#record { height: 15vh; }
#record.recording {
background: red;
}
#save, #save img { height: 10vh; }
#save { opacity: 0.25;}
#save[download] { opacity: 1;}
#viz {
height: 80%;
width: 100%;
display: flex;
flex-direction: column;
justify-content: space-around;
align-items: center;
}
@media (orientation: landscape) {
body { flex-direction: row;}
#controls { flex-direction: column; height: 100%; width: 10%;}
#viz { height: 100%; width: 90%;}
}
STT /记录/模板/记录/ index.html中:
{% load static %}
<!doctype html>
<html>
<head>
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Audio Recorder</title>
<link rel="stylesheet" href="{% static 'record/style.css' %}"/>
<script src="{% static 'record/js/audiodisplay.js' %}"></script>
<script src="{% static 'record/js/recorderjs/recorder.js' %}"></script>
<script src="{% static 'record/js/main.js' %}"></script>
</head>
<body>
<div id="viz">
<canvas id="analyser" width="1024" height="500"></canvas>
<canvas id="wavedisplay" width="1024" height="500"></canvas>
</div>
<div id="controls">
<img id="record" src="{% static 'record/img/mic128.png' %}" onclick="toggleRecording(this);">
<a id="save" href="#"><img src="{% static 'record/img/save.svg' %}"></a>
</div>
</body>
</html>
STT /记录/ apps.py:
from django.apps import AppConfig
class RecordConfig(AppConfig):
name = 'record'
STT /记录/ urls.py:
from django.conf.urls import url
from . import views
urlpatterns = [
url(r'^$', views.index, name='index'),
]
STT /记录/ views.py:
from django.shortcuts import render
from pydub import AudioSegment
from glob import glob
from math import ceil
from os.path import basename, splitext, exists
import json
import requests
import csv
from STT.settings import WATSON_USERNAME, WATSON_PASSWORD, WATSON_ENDPOINT, WATSON_DEFAULT_PARAMS, \
WATSON_DEFAULT_HEADERS
def index(request):
if request.method == 'GET':
return render(request, 'record/index.html')
# POST
# via: http://www.propublica.org/podcast/item/how-a-reporter-pierced-the-hype-behind-theranos/
DOWNLOAD_URL = 'https://api.soundcloud.com/tracks/247345268/download?client_id=cUa40O3Jg3Emvp6Tv4U6ymYYO50NUGpJ'
AUDIO_FILENAME = 'podcast.mp3'
AUDIO_SEGMENT_SECONDS = 300
if not exists(AUDIO_FILENAME):
print("Downloading from", DOWNLOAD_URL)
resp = requests.get(DOWNLOAD_URL)
with open(AUDIO_FILENAME, 'wb') as w:
w.write(resp.content)
print("Wrote audio file to", AUDIO_FILENAME)
# convert to WAV
audio = AudioSegment.from_mp3(AUDIO_FILENAME)
xs = 0
while xs < audio.duration_seconds:
ys = min(xs + AUDIO_SEGMENT_SECONDS, ceil(audio.duration_seconds))
fname = str(xs).rjust(5, '0') + '-' + str(ys).rjust(5, '0') + '.wav'
audio[xs * 1000:ys * 1000].export(fname, format='wav')
print("Saved", fname)
xs = ys
# Transcribe each WAV to Watson
for fname in glob("*.wav"):
# Download watson's response
tname = splitext(basename(fname))[0] + '.json'
if exists(tname):
print("Already transcribed", tname)
else:
print("Transcribing", fname)
with open(fname, 'rb') as r:
watson_response = requests.post(
WATSON_ENDPOINT,
data=r,
auth=(WATSON_USERNAME, WATSON_PASSWORD),
params=WATSON_DEFAULT_PARAMS,
headers=WATSON_DEFAULT_HEADERS,
stream=False
)
with open(tname, 'w') as w:
w.write(watson_response.text)
print("Wrote transcript to", tname)
# Print out the raw transcript and word csv
rawfile = open("raw.txt", "w")
wordsfile = open("words.csv", "w")
csvfile = csv.writer(wordsfile)
csvfile.writerow(['word', 'confidence', 'start', 'end'])
for fname in sorted(glob("*.json")):
with open(fname, 'r') as f:
results = json.load(f)['results']
for linenum, result in enumerate(results): # each result is a line
if result.get('alternatives'): # each result may have many alternatives
# just pick best alternative
lineobj = result.get('alternatives')[0]
# rawfile.writeline(lineobj['transcript'])
word_timestamps = lineobj['timestamps']
if word_timestamps:
rawfile.write(lineobj['transcript'] + "\n")
word_confidences = lineobj['word_confidence']
for idx, wordts in enumerate(word_timestamps):
txt, tstart, tend = wordts
confidence = round(100 * word_confidences[idx][1])
csvfile.writerow([txt, confidence, tstart, tend])
rawfile.close()
wordsfile.close()
在views.py文件中,我需要能够将rawfile作为文本打印到新的HTML结果页面上,该页面应该在用户记录消息并将其上传到Watson后打开。