Question

我正在从一个目录中获取输入，用于单个音频文件，我将输出保存在CSV文件中，文件名和转换后的语音输出到文本但我在该目录中有100个文件（即001.wav ，002.wav，003.wav .......... 100.wav）

我想编写一个循环或函数，将语音自动保存为CSV文本输出，并在不同的行中使用相应的文件名。

以下是代码：

import speech_recognition as sr
import csv
import os
AUDIO_FILE =path.join(path.dirname('C:/path/to/directory'), "001.wav")
file_name = os.path.basename(AUDIO_FILE)
name = os.path.basename(AUDIO_FILE)

# use the audio file as the audio source
r = sr.Recognizer()
with sr.AudioFile(AUDIO_FILE) as source:
audio = r.record(source)  # read the entire audio file

# recognize speech using Google Speech Recognition
try:
    # for testing purposes, we're just using the default API key
    # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
    # instead of `r.recognize_google(audio)`
    a =  r.recognize_google(audio)        
except sr.UnknownValueError:
    a = "Google Speech Recognition could not understand audio"
except sr.RequestError as e:
    a = "Could not request results from Google Speech Recognition service; {0}".format(e)

try:
    b = r.recognize_sphinx(audio)
except sr.UnknownValueError:
    b = "Sphinx could not understand audio"
except sr.RequestError as e:
    b = "Sphinx error; {0}".format(e)

with open('speech_output.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow(['file_name','google',sphinx])
writer.writerow([file_name,a,b])

引用代码。 https://github.com/Uberi/speech_recognition/blob/master/examples/audio_transcribe.py

Answer 1

您可以使用os.walk获取目录和子目录的所有文件，我已将其包含在下面代码中的get_file_paths()中，这是一个示例：

import speech_recognition as sr
import csv
import os


DIRNAME = r'c:\path\to\directory'
OUTPUTFILE = r'c:\path\to\outputfiledir\outputfile.csv'

def get_file_paths(dirname):
    file_paths = []  
    for root, directories, files in os.walk(dirname):
        for filename in files:
            filepath = os.path.join(root, filename)
            file_paths.append(filepath)  
    return file_paths    

def process_file(file):
    r = sr.Recognizer()
    a = ''
    with sr.AudioFile(file) as source:
        audio = r.record(source)    
        try:
            a =  r.recognize_google(audio)        
        except sr.UnknownValueError:
            a = "Google Speech Recognition could not understand audio"
        except sr.RequestError as e:
            a = "Could not request results from Google Speech Recognition service; {0}".format(e)  
    return a

def main():
    files = get_file_paths(DIRNAME)                 # get all file-paths of all files in dirname and subdirectories
    for file in files:                              # execute for each file
        (filepath, ext) = os.path.splitext(file)    # get the file extension
        file_name = os.path.basename(file)          # get the basename for writing to output file
        if ext == '.wav':                           # only interested if extension is '.wav'
            a = process_file(file)                  # result is returned to a
            with open(OUTPUTFILE, 'a') as f:        # write results to file
                writer = csv.writer(f)
                writer.writerow(['file_name','google'])
                writer.writerow([file_name, a])            


if __name__ == '__main__':
    main()

如果你想做多个识别器，这样的东西可以工作。请注意，这是一个未经测试的例子：

import speech_recognition as sr
import csv
import os


DIRNAME = r'c:\path\to\directory'
OUTPUTFILE = r'c:\path\to\outputfiledir\outputfile.csv'

def get_file_paths(dirname):
    file_paths = []  
    for root, directories, files in os.walk(dirname):
        for filename in files:
            filepath = os.path.join(root, filename)
            file_paths.append(filepath)  
    return file_paths    

def recog_multiple(file):
    r = sr.Recognizer()
    r_types = ['recognize_google', 'recognize_sphinx']
    results = []
    for r_type in r_types:
        result = ''
        with sr.AudioFile(file) as source:
            audio = r.record(source)
            try:
                result = r_type + ': ' + str(getattr(r, r_type)(audio))
            except sr.UnknownValueError:
                result = r_type + ': Speech Recognition could not understand audio'
            except sr.RequestError as e:
                result = r_type + ': Could not request results from Speech Recognition service; {0}'.format(e)        
        results.append(result)
    return results

def main():
    files = get_file_paths(DIRNAME)                 # get all file-paths of all files in dirname and subdirectories
    for file in files:                              # execute for each file
        (filepath, ext) = os.path.splitext(file)    # get the file extension
        file_name = os.path.basename(file)          # get the basename for writing to output file
        if ext == '.wav':                           # only interested if extension is '.wav'
            a = recog_multiple(file)                # result is returned to a
            with open(OUTPUTFILE, 'a') as f:        # write results to file
                writer = csv.writer(f)
                writer.writerow(['file_name','results'])
                writer.writerow([file_name, a])            


if __name__ == '__main__':
    main()

用于从目录中获取多个音频文件的函数循环

1 个答案: