问题:我正在尝试使用espeak文字转语音引擎。 所以我让它在Linux上运行得很好(下面的代码)。 现在我想将这个基本程序移植到Windows上,但这几乎是不可能的......
部分问题是windows dll只允许AUDIO_OUTPUT_SYNCHRONOUS,这意味着它需要回调,但我无法弄清楚如何从回调播放音频......首先它崩溃了,然后我意识到,我需要一个回调函数,现在我在回调函数中获取数据,但我不知道如何播放它...因为它既不是一个wav文件,也不像Linux那样自动播放。
sourceforge网站相当无用,因为它基本上说是使用SAPI版本,但是没有关于如何使用sapi espeak dll的例子......
无论如何,这是我的代码,有人可以帮忙吗?
#ifdef __cplusplus
#include <cstdio>
#include <cstdlib>
#include <cstring>
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include <assert.h>
#include <ctype.h>
//#include "speak_lib.h"
#include "espeak/speak_lib.h"
// libespeak-dev: /usr/include/espeak/speak_lib.h
// apt-get install libespeak-dev
// apt-get install libportaudio-dev
// g++ -o mine mine.cpp -lespeak
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak
char voicename[40];
int samplerate;
int quiet = 0;
static char genders[4] = {' ','M','F',' '};
//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/
const char *data_path = NULL; // use default path for espeak-data
int strrcmp(const char *s, const char *sub)
{
int slen = strlen(s);
int sublen = strlen(sub);
return memcmp(s + slen - sublen, sub, sublen);
}
char * strrcpy(char *dest, const char *source)
{
// Pre assertions
assert(dest != NULL);
assert(source != NULL);
assert(dest != source);
// tk: parentheses
while((*dest++ = *source++))
;
return(--dest);
}
const char* GetLanguageVoiceName(const char* pszShortSign)
{
#define LANGUAGE_LENGTH 30
static char szReturnValue[LANGUAGE_LENGTH] ;
memset(szReturnValue, 0, LANGUAGE_LENGTH);
for (int i = 0; pszShortSign[i] != '\0'; ++i)
szReturnValue[i] = (char) tolower(pszShortSign[i]);
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);
const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
if( !strrcmp( v->languages, szReturnValue) )
{
strcpy(szReturnValue, v->name);
return szReturnValue;
}
} // End for
strcpy(szReturnValue, "default");
return szReturnValue;
} // End function getvoicename
void ListVoices()
{
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);
const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
printf("Shortsign: %s\n", v->languages);
printf("age: %d\n", v->age);
printf("gender: %c\n", genders[v->gender]);
printf("name: %s\n", v->name);
printf("\n\n");
} // End for
} // End function getvoicename
int main()
{
printf("Hello World!\n");
const char* szVersionInfo = espeak_Info(NULL);
printf("Espeak version: %s\n", szVersionInfo);
samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0);
strcpy(voicename, "default");
// espeak --voices
strcpy(voicename, "german");
strcpy(voicename, GetLanguageVoiceName("DE"));
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
printf("Espeak setvoice error...\n");
}
static char word[200] = "Hello World" ;
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3");
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9");
int speed = 220;
int volume = 500; // volume in range 0-100 0=silence
int pitch = 50; // base pitch, range 0-100. 50=normal
// espeak.cpp 625
espeak_SetParameter(espeakRATE, speed, 0);
espeak_SetParameter(espeakVOLUME,volume,0);
espeak_SetParameter(espeakPITCH,pitch,0);
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
// espeakPUNCTUATION: which punctuation characters to announce:
// value in espeak_PUNCT_TYPE (none, all, some),
espeak_VOICE *voice_spec = espeak_GetCurrentVoice();
voice_spec->gender=2; // 0=none 1=male, 2=female,
//voice_spec->age = age;
espeak_SetVoiceByProperties(voice_spec);
espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();
strcpy(voicename, GetLanguageVoiceName("EN"));
espeak_SetVoiceByName(voicename);
strcpy(word, "Geany was fragged by GSG9 Googlebot");
strcpy(word, "Googlebot");
espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();
espeak_Terminate();
printf("Espeak terminated\n");
return EXIT_SUCCESS;
}
/*
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
memset(&voice_select,0,sizeof(voice_select));
voice_select.languages = voicename;
if(espeak_SetVoiceByProperties(&voice_select) != EE_OK)
{
fprintf(stderr,"%svoice '%s'\n",err_load,voicename);
exit(2);
}
}
*/
#ifdef __cplusplus
#include <cstdio>
#include <cstdlib>
#include <cstring>
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include <assert.h>
#include <ctype.h>
#include "speak_lib.h"
//#include "espeak/speak_lib.h"
// libespeak-dev: /usr/include/espeak/speak_lib.h
// apt-get install libespeak-dev
// apt-get install libportaudio-dev
// g++ -o mine mine.cpp -lespeak
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak
char voicename[40];
int iSampleRate;
int quiet = 0;
static char genders[4] = {' ','M','F',' '};
//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/
//const char *data_path = NULL; // use default path for espeak-data
const char *data_path = "C:\\Users\\Username\\Desktop\\espeak-1.43-source\\espeak-1.43-source\\";
int strrcmp(const char *s, const char *sub)
{
int slen = strlen(s);
int sublen = strlen(sub);
return memcmp(s + slen - sublen, sub, sublen);
}
char * strrcpy(char *dest, const char *source)
{
// Pre assertions
assert(dest != NULL);
assert(source != NULL);
assert(dest != source);
// tk: parentheses
while((*dest++ = *source++))
;
return(--dest);
}
const char* GetLanguageVoiceName(const char* pszShortSign)
{
#define LANGUAGE_LENGTH 30
static char szReturnValue[LANGUAGE_LENGTH] ;
memset(szReturnValue, 0, LANGUAGE_LENGTH);
for (int i = 0; pszShortSign[i] != '\0'; ++i)
szReturnValue[i] = (char) tolower(pszShortSign[i]);
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);
const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
if( !strrcmp( v->languages, szReturnValue) )
{
strcpy(szReturnValue, v->name);
return szReturnValue;
}
} // End for
strcpy(szReturnValue, "default");
return szReturnValue;
} // End function getvoicename
void ListVoices()
{
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);
const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
printf("Shortsign: %s\n", v->languages);
printf("age: %d\n", v->age);
printf("gender: %c\n", genders[v->gender]);
printf("name: %s\n", v->name);
printf("\n\n");
} // End for
} // End function getvoicename
/* Callback from espeak. Directly speaks using AudioTrack. */
#define LOGI(x) printf("%s\n", x)
static int AndroidEspeakDirectSpeechCallback(short *wav, int numsamples, espeak_EVENT *events)
{
char buf[100];
sprintf(buf, "AndroidEspeakDirectSpeechCallback: %d samples", numsamples);
LOGI(buf);
if (wav == NULL)
{
LOGI("Null: speech has completed");
}
if (numsamples > 0)
{
//audout->write(wav, sizeof(short) * numsamples);
sprintf(buf, "AudioTrack wrote: %d bytes", sizeof(short) * numsamples);
LOGI(buf);
}
return 0; // continue synthesis (1 is to abort)
}
static int AndroidEspeakSynthToFileCallback(short *wav, int numsamples,espeak_EVENT *events)
{
char buf[100];
sprintf(buf, "AndroidEspeakSynthToFileCallback: %d samples", numsamples);
LOGI(buf);
if (wav == NULL)
{
LOGI("Null: speech has completed");
}
// The user data should contain the file pointer of the file to write to
//void* user_data = events->user_data;
FILE* user_data = fopen ( "myfile1.wav" , "ab" );
FILE* fp = static_cast<FILE *>(user_data);
// Write all of the samples
fwrite(wav, sizeof(short), numsamples, fp);
return 0; // continue synthesis (1 is to abort)
}
int main()
{
printf("Hello World!\n");
const char* szVersionInfo = espeak_Info(NULL);
printf("Espeak version: %s\n", szVersionInfo);
iSampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, data_path, 0);
if (iSampleRate <= 0)
{
printf("Unable to initialize espeak");
return EXIT_FAILURE;
}
//samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0);
//ListVoices();
strcpy(voicename, "default");
// espeak --voices
//strcpy(voicename, "german");
//strcpy(voicename, GetLanguageVoiceName("DE"));
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
printf("Espeak setvoice error...\n");
}
static char word[200] = "Hello World" ;
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3");
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9");
int speed = 220;
int volume = 500; // volume in range 0-100 0=silence
int pitch = 50; // base pitch, range 0-100. 50=normal
// espeak.cpp 625
espeak_SetParameter(espeakRATE, speed, 0);
espeak_SetParameter(espeakVOLUME,volume,0);
espeak_SetParameter(espeakPITCH,pitch,0);
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
// espeakPUNCTUATION: which punctuation characters to announce:
// value in espeak_PUNCT_TYPE (none, all, some),
//espeak_VOICE *voice_spec = espeak_GetCurrentVoice();
//voice_spec->gender=2; // 0=none 1=male, 2=female,
//voice_spec->age = age;
//espeak_SetVoiceByProperties(voice_spec);
//espeak_SetSynthCallback(AndroidEspeakDirectSpeechCallback);
espeak_SetSynthCallback(AndroidEspeakSynthToFileCallback);
unsigned int unique_identifier;
espeak_ERROR err = espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, &unique_identifier, NULL);
err = espeak_Synchronize();
/*
strcpy(voicename, GetLanguageVoiceName("EN"));
espeak_SetVoiceByName(voicename);
strcpy(word, "Geany was fragged by GSG9 Googlebot");
strcpy(word, "Googlebot");
espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();
*/
// espeak_Cancel();
espeak_Terminate();
printf("Espeak terminated\n");
system("pause");
return EXIT_SUCCESS;
}
答案 0 :(得分:2)
需要对源代码进行一些更改才能使Windows库具有与Linux上相同的功能。我列出了更改here。即用型二进制文件也可用。
所有补丁和描述也被发送到espeak维护者(公开,通过邮件列表和补丁跟踪器),所以将来可能会直接提供。
答案 1 :(得分:1)
您是否尝试将回调中获得的缓冲区传递给 sndplaysnd() ??
Declare Function sndPlaySound Lib "winmm.dll" Alias "sndPlaySoundA" (ByVal lpszSoundName As String, ByVal uFlags As Long) As Long
其标准winAPI如下:
sndPlaySound(buffer[0], SND_ASYNC | SND_MEMORY)
或者,如果你有一个包含音频的wav文件:
sndPlaySound(filename, SND_ASYNC)
playound 有一个ASYNC模式,在播放音频时不会阻止你的程序执行。
注意:我在VB中使用过它,上面的代码段用于VB。如果您使用VC ++编码,则可能需要相应地修改它们。但基本意图仍然是一样的;将缓冲区传递给sndPlaySound并设置ASYNC标志。
祝你好运!