有没有办法获得句子的时间戳而不是从Google Speech API收到的文字记录中的单词?我目前正在实现以下代码。
#this python file deals with providing word offsets for seek operations
def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=44100,
language_code='en-US',
enable_word_time_offsets=True)
operation = client.long_running_recognize(config, audio)
print('Waiting for operation to complete...')
result= operation.result(timeout=6000)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in result.results:
alternative = result.alternatives[0]
print('Transcript: {}'.format(alternative.transcript))
print('Confidence: {}'.format(alternative.confidence))
for word_info in alternative.words:
word = word_info.word
start_time = word_info.start_time
end_time = word_info.end_time
print('Word: {}, start_time: {}, end_time: {}'.format(word,start_time.seconds + start_time.nanos * 1e-9,end_time.seconds + end_time.nanos * 1e-9))
if __name__ == '__main__':
gcs_uri="gs://speechmldemo/DirtyAudioExample.flac"
transcribe_gcs(gcs_uri)r code here