运行代码时,Python传递/忽略Google Cloud Storage中的`create_bucket`和`upload_blob`函数,错误为404

时间:2019-07-11 21:06:31

标签: python google-cloud-storage

运行代码时遇到问题。它正在传递/忽略create_bucket()upload_blob()函数,并返回一个NotFound: 404 No such object: gcspeechstorage/output.wav错误。我已经隔离了代码的工作部分,剩下的还有这两个有问题的功能。我还要注意,在我问的上一个问题中,我被指示测试transcribe_gcs,并且 did 确实仅在手动上传文件时有效。因此,我的问题是创建存储桶,然后上传所述文件。感谢您的帮助。

预期结果:创建一个存储桶,将WAV文件上传到GCS存储桶,然后检索该文件进行转录,然后分析情感。

实际结果:录制音频,然后崩溃,出现上述404错误。

完整代码(为您提供更好的图片):

import pyaudio
import wave
import pprint
import argparse
import datetime
import io
import json
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from google.cloud import storage
import sys
from oauth2client.service_account import ServiceAccountCredentials

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:/Users/Dave/Desktop/mizu/Project Mizu-7e2ecd8c5804.json'

bucket_name = "gcspeechstorage"
source_file_name = "C:/Users/Dave/Desktop/mizu/output.wav"
destination_blob_name = "output.wav"
gcs_uri = "gs://gcspeechstorage/output.wav"

def create_bucket(bucket_name):
    """Creates a new bucket."""
    storage_client = storage.Client()
    bucket = storage_client.create_bucket(bucket_name)
    print('Bucket {} created'.format(bucket.name))

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))

# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    audio = types.RecognitionAudio(uri=gcs_uri)
    config = types.RecognitionConfig(
        encoding= 'LINEAR16',
        sample_rate_hertz=44100,
        language_code='en-US')

    operation = client.long_running_recognize(config, audio)

    print('Waiting for operation to complete...')
    response = operation.result(timeout=90)

    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        print(u'Transcript: {}'.format(result.alternatives[0].transcript))

        transcribedSpeechFile = open('speechToAnalyze.txt', 'a+')  # this is where a text file is made with the transcribed speech

        transcribedSpeechFile.write(format(result.alternatives[0].transcript))

        transcribedSpeechFile.close()

        print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_transcribe_async_gcs]


if __name__ == '__main__':
    transcribe_gcs(gcs_uri)


audio_rec = open('speechToAnalyze.txt', 'r')

sid = SentimentIntensityAnalyzer()
for sentence in audio_rec:
    ss = sid.polarity_scores(sentence)
    for k in ss:
        print('{0}: {1}, '.format(k, ss[k]), end='')
    print()

错误消息:

C:\Users\Dave\AppData\Local\Programs\Python\Python37\python.exe C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py
* recording
* done recording
Traceback (most recent call last):
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\grpc_helpers.py", line 57, in error_remapped_callable
    return callable_(*args, **kwargs)
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\grpc\_channel.py", line 565, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\grpc\_channel.py", line 467, in _end_unary_response_blocking
    raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
    status = StatusCode.NOT_FOUND
    details = "No such object: gcspeechstorage/output.wav"
    debug_error_string = "{"created":"@1562878577.907000000","description":"Error received from peer ipv6:[2607:f8b0:4000:806::200a]:443","file":"src/core/lib/surface/call.cc","file_line":1052,"grpc_message":"No such object: gcspeechstorage/output.wav","grpc_status":5}"
>

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py", line 112, in <module>
    transcribe_gcs(gcs_uri)
  File "C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py", line 90, in transcribe_gcs
    operation = client.long_running_recognize(config, audio)
  File "C:\Users\Dave\AppData\Local\Programs\Python\Python37\lib\site-packages\google\cloud\speech_v1\gapic\speech_client.py", line 326, in long_running_recognize
    request, retry=retry, timeout=timeout, metadata=metadata
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\gapic_v1\method.py", line 143, in __call__
    return wrapped_func(*args, **kwargs)
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\retry.py", line 273, in retry_wrapped_func
    on_error=on_error,
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\retry.py", line 182, in retry_target
    return target()
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\timeout.py", line 214, in func_with_timeout
    return func(*args, **kwargs)
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\grpc_helpers.py", line 59, in error_remapped_callable
    six.raise_from(exceptions.from_grpc_error(exc), exc)
  File "<string>", line 3, in raise_from
google.api_core.exceptions.NotFound: 404 No such object: gcspeechstorage/output.wav

1 个答案:

答案 0 :(得分:0)

它不会传递或忽略您的功能。您只是没有在if __name__ == '__main__':块内调用它们。脚本的编写方式,只有在使用以下命令运行时,它才会尝试调用transcribe_gcs(gcs_uri) python FrankensteinedFile.py。因此,完全没有意义,因为它从未被记录和上传,因此无法在存储桶中找到特定文件。

检查this link以了解if __name__ == '__main__':块的工作方式。 这是代码的结构,它可以调用您编写的所有内容:

import pyaudio
import wave
import pprint
import argparse
import datetime
import io
import json
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from google.cloud import storage
import sys
from oauth2client.service_account import ServiceAccountCredentials


def record_audio(chunk, format, channels, rate, record_seconds, wave_output_filename):
  p = pyaudio.PyAudio()

  stream = p.open(format=format,
                  channels=channels,
                  rate=rate,
                  input=True,
                  frames_per_buffer=chunk)

  print("* recording")

  frames = []

  for i in range(0, int(rate / chunk * record_seconds)):
      data = stream.read(chunk)
      frames.append(data)

  print("* done recording")

  stream.stop_stream()
  stream.close()
  p.terminate()

  wf = wave.open(wave_output_filename, 'wb')
  wf.setnchannels(channels)
  wf.setsampwidth(p.get_sample_size(format))
  wf.setframerate(rate)
  wf.writeframes(b''.join(frames))
  wf.close()

def create_bucket(bucket_name):
    """Creates a new bucket."""
    storage_client = storage.Client()
    bucket = storage_client.create_bucket(bucket_name)
    print('Bucket {} created'.format(bucket.name))

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))

# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    audio = types.RecognitionAudio(uri=gcs_uri)
    config = types.RecognitionConfig(
        encoding= 'LINEAR16',
        sample_rate_hertz=44100,
        language_code='en-US')

    operation = client.long_running_recognize(config, audio)

    print('Waiting for operation to complete...')
    response = operation.result(timeout=90)

    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        print(u'Transcript: {}'.format(result.alternatives[0].transcript))

        transcribedSpeechFile = open('speechToAnalyze.txt', 'a+')  # this is where a text file is made with the transcribed speech

        transcribedSpeechFile.write(format(result.alternatives[0].transcript))

        transcribedSpeechFile.close()

        print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_transcribe_async_gcs]

def analyze_text():
  audio_rec = open('speechToAnalyze.txt', 'r')

  sid = SentimentIntensityAnalyzer()
  for sentence in audio_rec:
      ss = sid.polarity_scores(sentence)
      for k in ss:
          print('{0}: {1}, '.format(k, ss[k]), end='')
      print()

if __name__ == '__main__':
  os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:/Users/Dave/Desktop/mizu/Project Mizu-7e2ecd8c5804.json'

  bucket_name = "gcspeechstorage"
  source_file_name = "C:/Users/Dave/Desktop/mizu/output.wav"
  destination_blob_name = "output.wav"
  gcs_uri = "gs://gcspeechstorage/output.wav"  

  chunk = 1024
  format = pyaudio.paInt16
  channels = 1
  rate = 44100
  record_seconds = 10
  wave_output_filename = "output.wav"

  record_audio(chunk, format, channels, rate, record_seconds, wave_output_filename)
  create_bucket(bucket_name)
  upload_blob(bucket_name, source_file_name, destination_blob_name)
  transcribe_gcs(gcs_uri)
  analyze_text()