我想使用Google语音将实时rtmp流转录为文本。 在Google代码中,麦克风是音频流的输入源,但在这里我想使用rtmp而不是麦克风。
我正在使用xuggler读取字节数组并将其存储在sharedQueue中。
但是我的代码由于以下异常而失败。
io.grpc.StatusRuntimeException:CANCE LLED:无法读取消息。
public class DataLoader2 implements Runnable {
static ArrayList<Byte> data = new ArrayList<Byte>();
static byte[] audioChunk = new byte[1150];
static ByteBuffer buff;
private static void extractAudio(String rtmpSourceUrl) {
IMediaReader mediaReader = ToolFactory.makeReader(rtmpSourceUrl);
mediaReader.addListener(new MediaToolAdapter() {
private IContainer container;
@Override
public void onReadPacket(IReadPacketEvent event) {
event.getPacket().getByteBuffer().get(audioChunk);
try {
SpeechToText.sharedQueue.put(audioChunk);
} catch (InterruptedException e) {
}
}
@Override
public void onOpenCoder(IOpenCoderEvent event) {
buff = ByteBuffer.wrap(audioChunk);
container = event.getSource().getContainer();
}
@Override
public void onAudioSamples(IAudioSamplesEvent event) {
/*
* if (DataLoader2.data.size() < 6400) {
* DataLoader2.data.add(event.getMediaData().getByteBuffer().get()); } else {
*
* for (byte audio : DataLoader2.data) { buff.put(audio); }
*
* byte[] combined = buff.array();
*
* try { SpeechToText.sharedQueue.put(combined); } catch (InterruptedException
* e) { e.printStackTrace(); }
*
* DataLoader2.data.clear(); buff.clear(); buff = ByteBuffer.wrap(audioChunk);
*
* }
*/
// System.out.println("Event:" + event.getMediaData().getByteBuffer().get());
// SpeechToText.sharedQueue.put(event.getMediaData().getByteBuffer().get());
}
@Override
public void onClose(ICloseEvent event) {
}
});
while (mediaReader.readPacket() == null) {
}
}
@Override
public void run() {
String rtmpSourceUrl = "rtmp://localhost:1935/livewowza/xyz";
extractAudio(rtmpSourceUrl);
}
}
公共类SpeechToText {
private static final int STREAMING_LIMIT = 10000; // 10 seconds
public static final String RED = "\033[0;31m";
public static final String GREEN = "\033[0;32m";
public static final String YELLOW = "\033[0;33m";
// Creating shared object
public static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
private static TargetDataLine targetDataLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes
private static int restartCounter = 0;
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
private static int resultEndTimeInMS = 0;
private static int isFinalEndTime = 0;
private static int finalRequestEndTime = 0;
private static boolean newStream = true;
private static double bridgingOffset = 0;
private static boolean lastTranscriptWasFinal = false;
private static StreamController referenceToStreamController;
private static ByteString tempByteString;
private static void start() {
ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
try (SpeechClient client = SpeechClient.create()) {
ClientStream<StreamingRecognizeRequest> clientStream;
responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {
ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();
@Override
public void onComplete() {
System.out.println("!!!!!!!!!!!!!!!!!!!!!");
}
@Override
public void onError(Throwable arg0) {
System.out.println(arg0.getMessage());
}
@Override
public void onResponse(StreamingRecognizeResponse response) {
System.out.println("Inside onResponse ------------");
responses.add(response);
StreamingRecognitionResult result = response.getResultsList().get(0);
Duration resultEndTime = result.getResultEndTime();
resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
+ (resultEndTime.getNanos() / 1000000));
double correctedTime = resultEndTimeInMS - bridgingOffset + (STREAMING_LIMIT * restartCounter);
DecimalFormat format = new DecimalFormat("0.#");
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
if (result.getIsFinal()) {
System.out.print(GREEN);
System.out.print("\033[2K\r");
System.out.printf("%s: %s\n", format.format(correctedTime), alternative.getTranscript());
isFinalEndTime = resultEndTimeInMS;
lastTranscriptWasFinal = true;
} else {
System.out.print(RED);
System.out.print("\033[2K\r");
System.out.printf("%s: %s", format.format(correctedTime), alternative.getTranscript());
lastTranscriptWasFinal = false;
}
}
@Override
public void onStart(StreamController controller) {
referenceToStreamController = controller;
}
};
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder()
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
StreamingRecognitionConfig streamingRecognitionConfig = StreamingRecognitionConfig.newBuilder()
.setConfig(recognitionConfig).setInterimResults(true).build();
StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig).build(); // The first request in a streaming call
// has to be a config
clientStream.send(request);
System.out.println("Configuration request sent");
long startTime = System.currentTimeMillis();
while (true) {
Thread.sleep(5000);
long estimatedTime = System.currentTimeMillis() - startTime;
if (estimatedTime >= STREAMING_LIMIT) {
clientStream.closeSend(); referenceToStreamController.cancel(); // remove
if (resultEndTimeInMS > 0) { finalRequestEndTime = isFinalEndTime; }
resultEndTimeInMS = 0;
lastAudioInput = null; lastAudioInput = audioInput; audioInput = new
ArrayList<ByteString>();
restartCounter++;
if (!lastTranscriptWasFinal) { System.out.print('\n'); }
newStream = true;
clientStream =
client.streamingRecognizeCallable().splitCall(responseObserver);
request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(
streamingRecognitionConfig) .build();
System.out.println(YELLOW); System.out.printf("%d: RESTARTING REQUEST\n",
restartCounter * STREAMING_LIMIT);
startTime = System.currentTimeMillis();
} else {
if ((newStream) && (lastAudioInput.size() > 0)) {
// if this is the first audio from a new request
// calculate amount of unfinalized audio from last request
// resend the audio to the speech client before incoming audio
double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
// ms length of each chunk in previous request audio arrayList
if (chunkTime != 0) {
if (bridgingOffset < 0) {
// bridging Offset accounts for time of resent audio
// calculated from last request
bridgingOffset = 0;
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime;
}
int chunksFromMS = (int) Math.floor((finalRequestEndTime - bridgingOffset) / chunkTime);
// chunks from MS is number of chunks to resend
bridgingOffset = (int) Math.floor((lastAudioInput.size() - chunksFromMS) * chunkTime);
// set bridging offset for next request
for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {
request = StreamingRecognizeRequest.newBuilder().setAudioContent(lastAudioInput.get(i))
.build();
clientStream.send(request);
}
}
newStream = false;
}
tempByteString = ByteString.copyFrom(sharedQueue.take());
request = StreamingRecognizeRequest.newBuilder().setAudioContent(tempByteString).build();
audioInput.add(tempByteString);
}
clientStream.send(request);
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String args[]) {
DataLoader2 dataLoader = new DataLoader2();
Thread t = new Thread(dataLoader);
t.start();
SpeechToText.start();
}
}
用于PCM编码的FFmpeg命令。
ffmpeg -i rtmp://localhost:1935/liveapp/abc -c:a pcm_s16le -ac 1 -ar 16000 -f flv rtmp://localhost:1935/livewowza/xyz