Question

正如标题所说，我想通过我的麦克风连续录制原始音频。所以这个想法是在后台运行一个简单的C程序作为服务，它将创建大量的音频并通过sphinx语音识别发送这些文件。

之后，我可以使用识别的单词进行一些处理。

问题在于（持续）识别。我不能只记录包含我所说的10秒钟的音频块，因为可能是块[33] - ＆gt; chunk [34]属于一起然后sphinx会输出类似的东西：

# get unique cluster_a values which are > -1
cluster_a_ids = df[df['cluster_a'] > -1]['cluster_a'].unique() 
other_clusters = ['cluster_b', 'cluster_c', 'cluster_d']
# iterate through unique cluster_a values
for id in cluster_a_ids:
    # get the rows whose cluster_a == id
    id_rows = df[df['cluster_a'] == id]
    # iterate through the other columns
    for cluster in other_clusters:
        # find rows from df where e.g. cluster_b == value in cluster_b column of id_rows
        match_rows = df[df[cluster] == id_rows[cluster].values[0]]
        if match_rows.shape[0] == id_rows.shape[0]:
            df.loc[match_rows.index, cluster] = -1

另一种方法是连续录制音频但是我无法使用sphinx处理大型音频文件。

我使用了pocketsphinx的基本example：

recognized chunk[33] -> ["enable light"]
recognized chunk[34] -> ["5 with 50 percent"]

}

here是使用ffmpeg创建简单音频文件/块的基本示例：

#include <pocketsphinx.h>

int main(int argc, char *argv[])
{
ps_decoder_t *ps;
cmd_ln_t *config;
FILE *fh;
char const *hyp, *uttid;
int16 buf[512];
int rv;
int32 score;

config = cmd_ln_init(NULL, ps_args(), TRUE,
             "-hmm", MODELDIR "/en-us/en-us",
             "-lm", MODELDIR "/en-us/en-us.lm.bin",
             "-dict", MODELDIR "/en-us/cmudict-en-us.dict",
             NULL);
if (config == NULL) {
fprintf(stderr, "Failed to create config object, see log for details\n");
return -1;
}

ps = ps_init(config);
if (ps == NULL) {
fprintf(stderr, "Failed to create recognizer, see log for details\n");
return -1;
}

fh = fopen("audiochunk_33.raw", "rb");
if (fh == NULL) {
fprintf(stderr, "Unable to open input file goforward.raw\n");
return -1;
}

rv = ps_start_utt(ps);

while (!feof(fh)) {
size_t nsamp;
nsamp = fread(buf, 2, 512, fh);
rv = ps_process_raw(ps, buf, nsamp, FALSE, FALSE);
}

rv = ps_end_utt(ps);
hyp = ps_get_hyp(ps, &score);
printf("Recognized: %s\n", hyp);

fclose(fh);
ps_free(ps);
cmd_ln_free_r(config);

return 0;

BR 迈克尔

使用pocketsphinx / ffmpeg连续记录/识别音频

0 个答案: