在Ruby中使用Amazon transcribe流式转录

时间:2019-05-06 04:06:45

标签: ruby-on-rails amazon-web-services rubygems aws-sdk

我希望使用aws-sdk-transcribestreamingservice gem从音频文件中获取成绩单。

我已按照他们的说明构建了所有内容,并以正确的格式提供了音频文件。

2.5.3 :375 > input_stream = Aws::TranscribeStreamingService::EventStreams::AudioStream.new
 => #<Aws::TranscribeStreamingService::EventStreams::AudioStream:0x00007fa8ec8fb2d0 @event_emitter=#<Aws::EventEmitter:0x00007fa8ec8fb280 @listeners={}, @validate_event=true, @status=:sleep, @signal_queue=#<Thread::Queue:0x00007fa8ec8fb230>>> 


2.5.3 :376 > output_stream = Aws::TranscribeStreamingService::EventStreams::TranscriptResultStream.new
 => #<Aws::TranscribeStreamingService::EventStreams::TranscriptResultStream:0x00007fa8e9edc800 @event_emitter=#<Aws::EventEmitter:0x00007fa8e9edc710 @listeners={}, @validate_event=true, @status=:sleep, @signal_queue=#<Thread::Queue:0x00007fa8e9edc6c0>>> 

2.5.3 :378 > FILES_TO_APPEND = ["public/podcast.wav"]
(irb):378: warning: already initialized constant FILES_TO_APPEND
(irb):343: warning: previous definition of FILES_TO_APPEND was here
 => ["public/podcast.wav"] 

2.5.3 :380 > Writer.new("public/append.wav", Format.new(:mono, :pcm_16, 16000)) do |writer|
2.5.3 :381 >       FILES_TO_APPEND.each do |file_name|
2.5.3 :382 >           Reader.new(file_name).each_buffer do |buffer|
2.5.3 :383 >               writer.write(buffer)
2.5.3 :384?>           end
2.5.3 :385?>       end
2.5.3 :386?>   end

 => #<WaveFile::Writer:0x00007fa8eca6a508 @io=#<File:public/append.wav (closed)>, @io_source=:file_name, @start_pos=0, @format=#<WaveFile::Format:0x00007fa8eca6a6c0 @channels=1, @sample_format=:pcm, @bits_per_sample=16, @sample_rate=16000, @block_align=2, @byte_rate=32000, @speaker_mapping=[:front_center]>, @closed=true, @total_sample_frames=549996, @pack_code="s<*"> 


2.5.3 :388 > output_stream.on_transcript_event_event do |event|
2.5.3 :389 >       unless event.transcript.results.empty?
2.5.3 :390?>         event.transcript.results.each do |result|
2.5.3 :391 >               result.alternatives.each {|alter| puts alter.transcript.inspect }
2.5.3 :392?>           end
2.5.3 :393?>       end
2.5.3 :394?>   end
 => [#<Proc:0x00007fa8eed0b710@(irb):388>] 

2.5.3 :395 > output_stream.on_bad_request_exception_event do |exception|
2.5.3 :396 >       input_stream.signal_end_stream
2.5.3 :397?>   end
 => [#<Proc:0x00007fa8ee879990@(irb):395>] 


2.5.3 :399 > async_resp = async_client.start_stream_transcription(
2.5.3 :400 >         language_code: "en-US",
2.5.3 :401 >         media_encoding: "pcm",
2.5.3 :402 >         media_sample_rate_hertz: 16000,
2.5.3 :403 >         input_event_stream_handler: input_stream,
2.5.3 :404 >         output_event_stream_handler: output_stream,
2.5.3 :405 >     
2.5.3 :406 >     )
 => #<Seahorse::Client::AsyncResponse:0x00007fa8eda93c40 @response=nil, @stream=#<HTTP2::Stream:0x00007fa8e9f147f0 @connection=#<HTTP2::Client:0x00007fa8ee9bf070 @stream_id=3, @state=:connected, @local_role=:client, @remote_role=:server, @local_settings={:settings_header_table_size=>4096, :settings_enable_push=>1, :settings_max_concurrent_streams=>100, :settings_initial_window_size=>65535, :settings_max_frame_size=>16384, :settings_max_header_list_size=>2147483647}, @remote_settings={:settings_header_table_size=>4096, :settings_enable_push=>1, :settings_max_concurrent_streams=>2147483647, :settings_initial_window_size=>65535, :settings_max_frame_size=>16384, :settings_max_header_list_size=>2147483647}, @compressor=#<HTTP2::Header::Compressor:0x00007fa8ee9bebe8 @cc=#<HTTP2::Header::EncodingContext:0x00007fa8ee9beb48 @table=[["content-length", "0"], ["authorization", "AWS4-HMAC-SHA256 Credential=XXXXXXXXXXXXXXXXX/20190506/us-west-2/transcribe/aws4_request, SignedHeaders=amz-sdk-invocation-id;host;x-amz-content-sha256;x-amz-date;x-amzn-transcribe-language-code;x-amzn-transcribe-media-encoding;x-amzn-transcribe-sample-rate, Signature=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"], ["x-amz-content-sha256", "XXXXXXXXXXXXXXXXXXXXXXXXXXX"], ["x-amz-date", "20190506T024556Z"], ["host", "transcribestreaming.us-west-2.amazonaws.com"], ["x-amzn-transcribe-media-encoding", "pcm"], ["x-amzn-transcribe-sample-rate", "16000"], ["x-amzn-transcribe-language-code", "en-US"], ["user-agent", "aws-sdk-ruby3/3.48.3 ruby/2.5.3 x86_64-darwin17 aws-sdk-transcribestreamingservice/1.1.0"], ["amz-sdk-invocation-id", "a7191725-cc2e-4a12-8417-4758c4e22d35"], [":path", "/stream-transcription"]], @options={:huffman=>:shorter, :index=>:all, :table_size=>4096, :settings_max_concurrent_streams=>100}, @limit=4096>>, @decompressor=#<HTTP2::Header::Decompressor:0x00007fa8ee9be9e0 @cc=#<HTTP2::Header::EncodingContext:0x00007fa8ee9be968 @table=[], @options={:huffman=>:shorter, :index=>:all, :table_size=>4096, :settings_max_concurrent_streams=>100}, @limit=4096>>, @active_stream_count=1, @streams={1=>#<HTTP2::Stream:0x00007fa8e9f147f0 ...>}, @streams_recently_closed={}, @pending_settings=[[[:settings_max_concurrent_streams, 100]], [[:settings_max_concurrent_streams, 100]]], @framer=#<HTTP2::Framer:0x00007fa8ee9be788 @max_frame_size=16384>, @local_window_limit=65535, @local_window=65535, @remote_window_limit=65535, @remote_window=65535, @recv_buffer="", @send_buffer=[], @continuation=[], @error=nil, @h2c_upgrade=nil, @closed_since=nil, @listeners={:frame=>[#<Proc:0x00007fa8e9f03068@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/aws-sdk-core-3.48.3/lib/seahorse/client/h2/connection.rb:169>], :frame_sent=>[#<Proc:0x00007fa8e9f03018@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/aws-sdk-core-3.48.3/lib/seahorse/client/h2/connection.rb:179>], :frame_received=>[#<Proc:0x00007fa8e9f02fc8@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/aws-sdk-core-3.48.3/lib/seahorse/client/h2/connection.rb:182>]}>, @id=1, @weight=16, @dependency=0, @listeners={:priority=>[], :window=>[#<Proc:0x00007fa8e9f14480@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/http-2-0.10.1/lib/http/2/stream.rb:90>], :local_window=>[#<Proc:0x00007fa8e9f14430@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/http-2-0.10.1/lib/http/2/stream.rb:91>], :active=>[], :close=>[#<Proc:0x00007fa8e9f142c8@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/http-2-0.10.1/lib/http/2/emitter.rb:21>, #<Proc:0x00007fa8e9f02e88@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/aws-sdk-core-3.48.3/lib/seahorse/client/h2/handler.rb:93>], :frame=>[#<Proc:0x00007fa8e9f14278 (lambda)>], :headers=>[#<Proc:0x00007fa8e9f02f50@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/aws-sdk-core-3.48.3/lib/seahorse/client/h2/handler.rb:85>], :data=>[#<Proc:0x00007fa8e9f02f00@/Users/etiennelandro/.rvm/gems/ruby-2.5.3/gems/aws-sdk-core-3.48.3/lib/seahorse/client/h2/handler.rb:89>]}, @local_window_max_size=65535, @local_window=65535, @remote_window=65535, @parent=nil, @state=:open, @error=false, @closed=false, @send_buffer=[]>, @stream_mutex=#<Thread::Mutex:0x00007fa8e9f141d8>, @close_condition=#<Thread::ConditionVariable:0x00007fa8e9f141b0>, @sync_queue=#<Thread::Queue:0x00007fa8e9f14188>> 

我不知道这里发生了什么,转录过程奏效了吗? (它在第一行上显示@ result = nil,因此我认为不可以,但是我对该过程不熟悉)。 如何收集实际文本? 有人熟悉这个宝石吗? 谢谢

1 个答案:

答案 0 :(得分:0)

库中的错误处理使调试变得很困难。确保您正确认证。以下代码对我有用:

require 'aws-sdk'

Aws.config.update(
  credentials: Aws::Credentials.new('key', 'secret')
)

async_client = Aws::TranscribeStreamingService::AsyncClient.new(
  region: 'us-east-1'
)
output_stream = Aws::TranscribeStreamingService::EventStreams::TranscriptResultStream.new
input_stream = Aws::TranscribeStreamingService::EventStreams::AudioStream.new

# Register callbacks
output_stream.on_transcript_event_event do |event|
  unless event.transcript.results.empty?
    event.transcript.results.each do |result|
      result.alternatives.each { |alter| puts alter.transcript.inspect }
    end
  end
end

output_stream.on_bad_request_exception_event do |_exception|
  input_stream.signal_end_stream
end

audio_file = File.new('./audio_files/some-mono_16k.wav', 'rb')

async_resp = async_client.start_stream_transcription(
  language_code: 'es-US',
  media_encoding: 'pcm',
  media_sample_rate_hertz: 16_000,
  input_event_stream_handler: input_stream,
  output_event_stream_handler: output_stream
)

until audio_file.eof?
  input_stream.signal_audio_event_event(audio_chunk: audio_file.read(5000))
  sleep(1)
end
sleep(0.5)
input_stream.signal_end_stream
audio_file.close

sync_resp = async_resp.wait