RTMP实时转录

时间:2019-08-02 12:33:48

标签: speech-to-text google-cloud-speech

我想使用Google语音将实时rtmp流转录为文本。 在Google代码中,麦克风是音频流的输入源,但在这里我想使用rtmp而不是麦克风。

我正在使用xuggler读取字节数组并将其存储在sharedQueue中。

但是我的代码由于以下异常而失败。

  

io.grpc.StatusRuntimeException:CANCE LLED:无法读取消息。

public class DataLoader2 implements Runnable {

    static ArrayList<Byte> data = new ArrayList<Byte>();
    static byte[] audioChunk = new byte[1150];
    static ByteBuffer buff;

    private static void extractAudio(String rtmpSourceUrl) {
        IMediaReader mediaReader = ToolFactory.makeReader(rtmpSourceUrl);

        mediaReader.addListener(new MediaToolAdapter() {

            private IContainer container;

            @Override
            public void onReadPacket(IReadPacketEvent event) {


                  event.getPacket().getByteBuffer().get(audioChunk); 
                  try {
                        SpeechToText.sharedQueue.put(audioChunk); 
                      } catch (InterruptedException e) {
                 }

            }

            @Override
            public void onOpenCoder(IOpenCoderEvent event) {
                buff = ByteBuffer.wrap(audioChunk);
                container = event.getSource().getContainer();
            }

            @Override
            public void onAudioSamples(IAudioSamplesEvent event) {

                /*
                 * if (DataLoader2.data.size() < 6400) {
                 * DataLoader2.data.add(event.getMediaData().getByteBuffer().get()); } else {
                 * 
                 * for (byte audio : DataLoader2.data) { buff.put(audio); }
                 * 
                 * byte[] combined = buff.array();
                 * 
                 * try { SpeechToText.sharedQueue.put(combined); } catch (InterruptedException
                 * e) { e.printStackTrace(); }
                 * 
                 * DataLoader2.data.clear(); buff.clear(); buff = ByteBuffer.wrap(audioChunk);
                 * 
                 * }
                 */

                // System.out.println("Event:" + event.getMediaData().getByteBuffer().get());
                // SpeechToText.sharedQueue.put(event.getMediaData().getByteBuffer().get());
            }

            @Override
            public void onClose(ICloseEvent event) {

            }
        });
        while (mediaReader.readPacket() == null) {
        }
    }

    @Override
    public void run() {
        String rtmpSourceUrl = "rtmp://localhost:1935/livewowza/xyz";
        extractAudio(rtmpSourceUrl);

    }
}

公共类SpeechToText {

private static final int STREAMING_LIMIT = 10000; // 10 seconds

public static final String RED = "\033[0;31m";
public static final String GREEN = "\033[0;32m";
public static final String YELLOW = "\033[0;33m";

// Creating shared object
public static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
private static TargetDataLine targetDataLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes

private static int restartCounter = 0;
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
private static int resultEndTimeInMS = 0;
private static int isFinalEndTime = 0;
private static int finalRequestEndTime = 0;
private static boolean newStream = true;
private static double bridgingOffset = 0;
private static boolean lastTranscriptWasFinal = false;
private static StreamController referenceToStreamController;
private static ByteString tempByteString;

private static void start() {
    ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
    try (SpeechClient client = SpeechClient.create()) {

        ClientStream<StreamingRecognizeRequest> clientStream;

        responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {

            ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

            @Override
            public void onComplete() {
                System.out.println("!!!!!!!!!!!!!!!!!!!!!");

            }

            @Override
            public void onError(Throwable arg0) {
                System.out.println(arg0.getMessage());

            }

            @Override
            public void onResponse(StreamingRecognizeResponse response) {
                System.out.println("Inside onResponse ------------");
                responses.add(response);

                StreamingRecognitionResult result = response.getResultsList().get(0);

                Duration resultEndTime = result.getResultEndTime();

                resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
                        + (resultEndTime.getNanos() / 1000000));

                double correctedTime = resultEndTimeInMS - bridgingOffset + (STREAMING_LIMIT * restartCounter);
                DecimalFormat format = new DecimalFormat("0.#");

                SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
                if (result.getIsFinal()) {
                    System.out.print(GREEN);
                    System.out.print("\033[2K\r");
                    System.out.printf("%s: %s\n", format.format(correctedTime), alternative.getTranscript());

                    isFinalEndTime = resultEndTimeInMS;
                    lastTranscriptWasFinal = true;
                } else {
                    System.out.print(RED);
                    System.out.print("\033[2K\r");
                    System.out.printf("%s: %s", format.format(correctedTime), alternative.getTranscript());

                    lastTranscriptWasFinal = false;
                }

            }

            @Override
            public void onStart(StreamController controller) {
                referenceToStreamController = controller;

            }
        };
        clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
        RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder()
                .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode("en-US")
                .setSampleRateHertz(16000)
                .build();

        StreamingRecognitionConfig streamingRecognitionConfig = StreamingRecognitionConfig.newBuilder()
                .setConfig(recognitionConfig).setInterimResults(true).build();

        StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder()
                .setStreamingConfig(streamingRecognitionConfig).build(); // The first request in a streaming call
                                                                            // has to be a config

        clientStream.send(request);
        System.out.println("Configuration request sent");
        long startTime = System.currentTimeMillis();

        while (true) {
            Thread.sleep(5000);
            long estimatedTime = System.currentTimeMillis() - startTime;

            if (estimatedTime >= STREAMING_LIMIT) {


                  clientStream.closeSend(); referenceToStreamController.cancel(); // remove

                  if (resultEndTimeInMS > 0) { finalRequestEndTime = isFinalEndTime; }
                  resultEndTimeInMS = 0;

                  lastAudioInput = null; lastAudioInput = audioInput; audioInput = new
                  ArrayList<ByteString>();

                  restartCounter++;

                  if (!lastTranscriptWasFinal) { System.out.print('\n'); }

                  newStream = true;

                  clientStream =
                  client.streamingRecognizeCallable().splitCall(responseObserver);

                  request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(
                  streamingRecognitionConfig) .build();

                  System.out.println(YELLOW); System.out.printf("%d: RESTARTING REQUEST\n",
                  restartCounter * STREAMING_LIMIT);

                  startTime = System.currentTimeMillis();


            } else {

                if ((newStream) && (lastAudioInput.size() > 0)) {
                    // if this is the first audio from a new request
                    // calculate amount of unfinalized audio from last request
                    // resend the audio to the speech client before incoming audio
                    double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
                    // ms length of each chunk in previous request audio arrayList
                    if (chunkTime != 0) {
                        if (bridgingOffset < 0) {
                            // bridging Offset accounts for time of resent audio
                            // calculated from last request
                            bridgingOffset = 0;
                        }
                        if (bridgingOffset > finalRequestEndTime) {
                            bridgingOffset = finalRequestEndTime;
                        }
                        int chunksFromMS = (int) Math.floor((finalRequestEndTime - bridgingOffset) / chunkTime);
                        // chunks from MS is number of chunks to resend
                        bridgingOffset = (int) Math.floor((lastAudioInput.size() - chunksFromMS) * chunkTime);
                        // set bridging offset for next request
                        for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {

                            request = StreamingRecognizeRequest.newBuilder().setAudioContent(lastAudioInput.get(i))
                                    .build();
                            clientStream.send(request);
                        }
                    }
                    newStream = false;
                }


                tempByteString = ByteString.copyFrom(sharedQueue.take());



                request = StreamingRecognizeRequest.newBuilder().setAudioContent(tempByteString).build();

                audioInput.add(tempByteString);



            }

            clientStream.send(request);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

public static void main(String args[]) {
    DataLoader2 dataLoader = new DataLoader2();
    Thread t = new Thread(dataLoader);
    t.start();
    SpeechToText.start();

}

}

用于PCM编码的FFmpeg命令。

ffmpeg -i rtmp://localhost:1935/liveapp/abc -c:a pcm_s16le -ac 1 -ar 16000 -f flv rtmp://localhost:1935/livewowza/xyz

0 个答案:

没有答案