我是编程和图像处理的新手。最近,我开发了一种系统,可以检测来自视频输入的面部并识别该人。如果该人已经在数据库中可用,则将他/她的名称标记到框架上,否则如果该人是新的,则询问他们的姓名并在数据库中获取足够的照片和商店,以便下次可以识别该人。我正在使用fisher-faces算法来完成这项任务。 现在我的问题是,我想让系统说话。我想让它告诉最近确定的人的名字。 我可以用
static class Once { public: Once(){talk();}} Once_;
调用函数talk一次。 但它不是有机的,并且谈话功能不接受用户的输入。
任何人都可以建议我一个解决方案,或者从哪里开始解决这个问题。
谈话功能
int speech(char* value)
{
ISpVoice * pVoice = NULL;
if (FAILED(::CoInitialize(NULL)))
return FALSE;
HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice);
if( SUCCEEDED( hr ) )
{
hr = pVoice->Speak(L"userINPUT", SPF_IS_XML, NULL);
pVoice->Release();
pVoice = NULL;
}
::CoUninitialize();
return TRUE;
}
答案 0 :(得分:0)
所以,这是我的建议:
// -- >8 ---------- speech.h --------------------------
#ifndef __speech_onboard__
#define __speech_onboard__
struct ISpVoice; // fwd ref, since mixing opencv and windows headers is a receipt for desaster
namespace Speech
{
class Voice
{
ISpVoice * spVoice;
public:
Voice();
~Voice();
int speak( const char * txt, int flags=0 ) const ;
// Supported values range from -10 to 10
int setRate( int s );
// Supported values range from 0 to 100
int setVolume( int s );
};
};
#endif // __speech_onboard__
// ---- >8 speech.cpp ------------------------------
#include <windows.h>
#include <sapi.h>
#include "speech.h"
#define COM_RELEASE(x) { if ((x)) (x)->Release(); (x) = NULL; }
namespace Speech
{
struct _ComUser
{
_ComUser() {CoInitialize(0);}
~_ComUser() {CoUninitialize();}
} _we_need_a_singleton_per_module;
inline int w2a( WCHAR *in, char *out )
{
out[0]=0;
return WideCharToMultiByte(CP_ACP, 0, in, -1, out, MAX_PATH, 0, 0);
}
inline int a2w( const char *in, WCHAR *out )
{
out[0]=0;
return MultiByteToWideChar(CP_ACP, 0, in, -1, out, MAX_PATH);
}
Voice::Voice()
: spVoice(0)
{
HRESULT hr = CoCreateInstance( CLSID_SpVoice, NULL, CLSCTX_INPROC_SERVER, IID_ISpVoice, (LPVOID *)&(spVoice) );
}
Voice::~Voice()
{
COM_RELEASE( spVoice );
}
//SPF_ASYNC = ( 1L << 0 ) ,
//SPF_PURGEBEFORESPEAK = ( 1L << 1 ) ,
//SPF_IS_FILENAME = ( 1L << 2 ) ,
//SPF_IS_XML = ( 1L << 3 ) ,
//SPF_IS_NOT_XML = ( 1L << 4 ) ,
//SPF_PERSIST_XML = ( 1L << 5 ) ,
//SPF_NLP_SPEAK_PUNC = ( 1L << 6 ) ,
//SPF_PARSE_SAPI = ( 1L << 7 ) ,
//SPF_PARSE_SSML = ( 1L << 8 ) ,
//SPF_PARSE_AUTODETECT = 0,
int Voice::speak( const char * txt, int flags ) const
{
if ( ! spVoice )
return 0;
WCHAR wtxt[800];
a2w(txt,wtxt);
ULONG pulstream = 0;
HRESULT hr = spVoice->Speak( wtxt, flags, &pulstream );
return hr==S_OK;
}
// Supported values range from -10 to 10
int Voice::setRate( int s )
{
if ( ! spVoice )
return 0;
HRESULT hr = spVoice->SetRate( s );
return hr==S_OK;
}
// Supported values range from 0 to 100
int Voice::setVolume( int s )
{
if ( ! spVoice )
return 0;
HRESULT hr = spVoice->SetVolume ( s );
return hr==S_OK;
}
}
// ----- >8 main.cpp --------------------------------------------
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
using namespace cv;
#include "speech.h"
int main(int argc, char** argv)
{
Speech::Voice voice;
voice.speak("hello , oh, hello!", 1); // async
Mat img(300,300,CV_8UC3,Scalar(255,0,0));
namedWindow("Display window",0);
putText(img,"lala la",Point(20,120),0,2.5,Scalar(0,200,0),5);
imshow("Display window", img);
waitKey(0);
voice.speak("bye bye, see you later !"); // sync
return 0;
}