我正在从Amazon Streaming API获得此响应。有人可以帮我解决一下我在这里做错了什么吗。
b'\ x00 \ x00 \ x00 \ xa3 \ x00 \ x00 \ x00ah \ x10k \ xe1 \ x0f:exception-type \ x07 \ x00 \ x13BadRequestException \ r:content-type \ x07 \ x00 \ x10 \ x10application / json \ r:消息类型\ x07 \ x00 \ texception {“消息”:“收到意外的WebSocket框架。”} \ xbd \ xceK \ x8a'
:消息类型异常{“消息”:“接收到意外的WebSocket框架。”}½κ
我正在使用下面的代码
导入所有库
import asyncio
import websockets
import json
import sys, os, base64, datetime, hashlib, hmac, urllib
import pyaudio
import struct
import numpy as np
import wave
import argparse
import tempfile
import queue
import sys
import sounddevice as sd
import soundfile as sf
import numpy # Make sure NumPy is loaded before it is used in the callback
assert numpy # avoid "imported but unused" message (W0611)
使用Soundevice流音频的代码
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-l', '--list-devices', action='store_true', help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, parents=[parser])
parser.add_argument('filename', nargs='?', metavar='FILENAME',help='audio file to store recording to')
parser.add_argument('-d', '--device', type=int_or_str,help='input device (numeric ID or substring)')
parser.add_argument('-r', '--samplerate', type=int, help='sampling rate')
parser.add_argument('-c', '--channels', type=int, default=1, help='number of input channels')
parser.add_argument('-t', '--subtype', type=str, help='sound file subtype (e.g. "PCM_24")')
args = parser.parse_args(remaining)
q = queue.Queue()
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(indata.copy())
按照Amazon transcribe的要求创建用于连接的URl
def createPresignedUrl(data):
method = 'GET'
service = 'transcribe'
region = data['region']
host = 'transcribestreaming.' + region + '.amazonaws.com:8443'
endpoint = "wss://" + host
def sign(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()
def getSignatureKey(key, dateStamp, regionName, serviceName):
kDate = sign(('AWS4' + key).encode('utf-8'), dateStamp)
kRegion = sign(kDate, regionName)
kService = sign(kRegion, serviceName)
kSigning = sign(kService, 'aws4_request')
return kSigning
access_key = data['key']
secret_key = data['secret']
if access_key == '' or secret_key == '':
print('No access key is available.')
sys.exit()
# Create a date for headers and the credential string
t = datetime.datetime.utcnow()
amz_date = t.strftime('%Y%m%dT%H%M%SZ') # Format date as YYYYMMDD'T'HHMMSS'Z'
datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope
canonical_uri = '/stream-transcription-websocket'
canonical_headers = 'host:' + host + '\n'
signed_headers = 'host'
algorithm = 'AWS4-HMAC-SHA256'
credential_scope = datestamp + '/' + region + '/' + service + '/' + 'aws4_request'
canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256'
canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus(access_key + '/' + credential_scope)
canonical_querystring += '&X-Amz-Date=' + amz_date
canonical_querystring += '&X-Amz-Expires=300'
canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers
canonical_querystring += '&language-code=' + data['languageCode']
canonical_querystring += '&media-encoding=pcm'
canonical_querystring += '&sample-rate=' + str(data['sampleRate'])
payload_hash = hashlib.sha256(('').encode('utf-8')).hexdigest()
# Step 6: Combine elements to create canonical request
canonical_request = method + '\n' + canonical_uri + '\n' + canonical_querystring + '\n' + canonical_headers + '\n' + signed_headers + '\n' + payload_hash
string_to_sign = algorithm + '\n' + amz_date + '\n' + credential_scope + '\n' + hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
# Create the signing key
signing_key = getSignatureKey(secret_key, datestamp, region, service)
# Sign the string_to_sign using the signing_key
signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest()
canonical_querystring += '&X-Amz-Signature=' + signature
request_url = endpoint + canonical_uri + "?" + canonical_querystring
return request_url
data = {
'key': 'Add your key',
'secret': 'Add your secret key',
'region': 'us-east-1',
'languageCode': 'en-US',
'sampleRate': 44100
}
进行PCM编码的代码
url = createPresignedUrl(data)
# FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 16000
frames = []
# stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
def pcmEncode(in_data):
offset = 0
input_len = len(in_data)
buffer = []
for i in range(input_len):
offset += 2
s = max(-1, min(1, in_data[i]))
b = (s * 32768) if (s < 0) else (s * 32767)
buffer.insert(offset, b)
buffer = np.array(buffer)
return buffer
def downsampleBuffer(buffer, outputSampleRate = 16000) :
if outputSampleRate == RATE :
return buffer
sampleRateRatio = RATE / outputSampleRate
newLength = round( len(buffer) / sampleRateRatio )
result = []
offsetResult = 0
offsetBuffer = 0
while offsetResult < newLength :
nextOffsetBuffer = round((offsetResult + 1) * sampleRateRatio)
accum = 0
count = 0
len_buffer = nextOffsetBuffer if nextOffsetBuffer < len(buffer) else len(buffer)
for i in range( offsetBuffer, len_buffer):
accum += buffer[i]
count += 1
result.append(accum / count)
offsetResult += 1
offsetBuffer = nextOffsetBuffer
return result
与AWS建立连接以进行转录
async def start_stream():
try:
connection = websockets.connect(url)
stream = sd.Stream(samplerate=args.samplerate, blocksize=None, device=args.device,
channels=args.channels, dtype=None, latency=None, extra_settings=None,
callback=None, finished_callback=None, clip_off=None, dither_off=None,
never_drop_input=None, prime_output_buffers_using_stream_callback=None)
stream.start()
while True:
a = stream.read(200)
buffer = downsampleBuffer(a[0])
result = pcmEncode(buffer)
async with connection as ws:
sent_data = {
"headers": {
":message-type": {"type": "string", "value": "event"},
":event-type": {"type": "string", "value": "AudioEvent"},
":content-type": {"type": "string", "value": "application/octet-stream"}
},
"body": str(result)
}
await ws.send(json.dumps(sent_data))
response = await ws.recv()
print(response)
print(response.decode('latin1'))
except KeyboardInterrupt:
parser.exit('\nInterrupted by user')
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
if status:
parser.exit('Error during playback: ' + str(status))
asyncio.get_event_loop().run_until_complete(start_stream())
stream.stop()
stream.close()
答案 0 :(得分:0)
我现在相信 BadRequestException
指的是没有正确编码帧而不是音频数据错误。我发现您的代码存在一些问题:
您需要以特殊方式对标头/正文进行编码:https://docs.aws.amazon.com/transcribe/latest/dg/event-stream.html
您需要对发送的缓冲区非常谨慎。音频需要是 16 位/无符号 (int)/小端 (See here)。现在,您只是将浮点数(您的麦克风数据是浮点数吗??)转换为 16 位友好的值,但将其存储在位大小由系统决定的缓冲区中(实际上是 32 位或 64 位)并且您正在使用 JSON 字符串编码器对其进行编码。之后不太可能是正确的格式。基本上,您需要一个缓冲区库,让您可以使用指定的位大小(16)和字节序值(小)设置 int。例如,这是我的飞镖代码:
for (var i=0; i<audioChunk.length; i++) {
messageBytes.setInt16(offset, audioChunk[i], Endian.little);
offset += 2;
}
继续进行的最佳方法是编写 AWS 响应所需的解码函数,然后对您的编码帧进行解码,看看结果是否相同。使用像 [-32000, -100, 0, 200 31000] 之类的 audo 测试数据或类似的东西,这样你就可以测试字节顺序等都是正确的。