有人知道如何通过使用Google语音文本异步API将wav或MP3格式的长语音转换为文本吗?我正在使用下面的代码,但是在发现upload_blob()函数时总是会出错。
谢谢。
from pydub import AudioSegment
import io
import os
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import wave
from google.cloud import storage
filepath = "/home/marte/Documentos/Python_Scripts/Law/"
output_filepath = "/home/marte/Documentos/Python_Scripts/Law/"
bucketname = "callsaudiofiles"
def frame_rate_channel(audio_file_name):
with wave.open(audio_file_name, "rb") as wave_file:
frame_rate = wave_file.getframerate()
channels = wave_file.getnchannels()
return frame_rate,channels
def google_transcribe(audio_file_name):
file_name = filepath + audio_file_name
frame_rate, channels = frame_rate_channel(audio_file_name)
bucket_name = bucketname
source_file_name = filepath + audio_file_name
destination_blob_name = audio_file_name
upload_blob(bucket_name, source_file_name, destination_blob_name)
gcs_uri = 'gs://' + bucketname + '/' + audio_file_name
transcript = ''
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=frame_rate,
language_code='es-CO')
operation = client.long_running_recognize(config, audio)
response = operation.result(timeout=10000)
for result in response.results:
transcript += result.alternatives[0].transcript
delete_blob(bucket_name, destination_blob_name)
return transcript
def write_transcripts(transcript_filename,transcript):
f= open(output_filepath + transcript_filename,"w+")
f.write(transcript)
f.close()
sentencia = google_transcribe("onu-santos.wav")
write_transcripts("onu-santos.txt",sentencia)
答案 0 :(得分:0)
我终于设法通过在我的Google Cloud Storage帐户中设置一个新的“段”并通过以下方式将凭据设置为本地变量来解决了该问题:
export GOOGLE_APPLICATION_CREDENTIALS='/home/marte/Documentos/Python_Scripts/Speech/sequoia-31501208ed06.json'
此链接很有用: https://cloud.google.com/docs/authentication/getting-started