Question

如何使用google.cloud.speech python将语音转换为文本。我想使用SpeechClient将audio.raw文件数据转换为text。在client.py文件中，读取每个332 characters音频数据并通过套接字发送。在server.py中，从套接字接收332个字符，然后使用SpeechClient将其转换为文本。

这是我的示例代码

client.py

# coding:utf-8
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
from contextlib import closing
import socket
import time
import sys
import os
import struct
import logging

def send():
    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    with closing(sock):
        with open('audio.raw', 'rb') as f:
            packets = iter(lambda: f.read(332), b'')
            for packet in packets:
                try:
                    sock.sendto(packet, ('192.168.10.64', 12345))
                    time.sleep(0.02)
                except Exception as e:
                    print(e)
    return


if __name__ == '__main__':
    print('==================Testing=======================')
    send()
    print('===============Finished Test===============')

server.py

# coding:utf-8
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
from contextlib import closing
import socket
import time
import sys
import os
import struct
import logging

def socket_open():
    print('=========================================')

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code='en-US')
    streaming_config = types.StreamingRecognitionConfig(
        config=config)

    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    with closing(sock):
        sock.bind(('192.168.10.64', 12345))
        print('socket ok')

        first_packet = None
        while True:
            try:
                first_packet = sock.recv(332)
                stream = [first_packet]
                requests = (types.StreamingRecognizeRequest(audio_content=chunk)
                            for chunk in stream)
                responses = client.streaming_recognize(
                    streaming_config, requests)
                for response in responses:
                    for result in response.results:
                        print('Finished: {}'.format(result.is_final))
                        print('Stability: {}'.format(result.stability))
                        alternatives = result.alternatives
                        for alternative in alternatives:
                            print('Confidence: {}'.format(
                                alternative.confidence))
                            print(u'Transcript: {}'.format(
                                alternative.transcript))
                sock.settimeout(300)
            except socket.timeout:
                print('socket timeout error')
                break
            except socket.error:
                print('socket error occured')
                break


if __name__ == '__main__':
    print('==================Open Socket=======================')
    socket_open()
    print('===============Finished Socket===============')

我得到的输出响应为空。

python将语音转换为文本

0 个答案: