Question

我是编程和图像处理的新手。最近，我开发了一种系统，可以检测来自视频输入的面部并识别该人。如果该人已经在数据库中可用，则将他/她的名称标记到框架上，否则如果该人是新的，则询问他们的姓名并在数据库中获取足够的照片和商店，以便下次可以识别该人。我正在使用fisher-faces算法来完成这项任务。现在我的问题是，我想让系统说话。我想让它告诉最近确定的人的名字。我可以用

static class Once { public: Once(){talk();}} Once_;

调用函数talk一次。但它不是有机的，并且谈话功能不接受用户的输入。

任何人都可以建议我一个解决方案，或者从哪里开始解决这个问题。

谈话功能

int speech(char* value)
{

ISpVoice * pVoice = NULL;

if (FAILED(::CoInitialize(NULL)))
    return FALSE;

HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice);
if( SUCCEEDED( hr ) )
{
    hr = pVoice->Speak(L"userINPUT", SPF_IS_XML, NULL);
    pVoice->Release();
    pVoice = NULL;
}

::CoUninitialize();
return TRUE;
}

Answer 1

所以，这是我的建议：

// -- >8 ---------- speech.h --------------------------
#ifndef __speech_onboard__
#define __speech_onboard__


struct ISpVoice; // fwd ref, since mixing opencv and windows headers is a receipt for desaster

namespace Speech
{
    class Voice
    {
        ISpVoice * spVoice;

    public:

        Voice();
        ~Voice();


        int speak( const char * txt, int flags=0 ) const ;

        // Supported values range from -10 to 10 
        int setRate( int s );

        // Supported values range from 0 to 100 
        int setVolume( int s );
    };
};


#endif // __speech_onboard__



// ---- >8 speech.cpp ------------------------------
#include <windows.h>
#include <sapi.h>
#include "speech.h"


#define COM_RELEASE(x) { if ((x)) (x)->Release(); (x) = NULL; }


namespace Speech
{
    struct _ComUser
    {
        _ComUser()  {CoInitialize(0);}
        ~_ComUser() {CoUninitialize();}
    } _we_need_a_singleton_per_module;


    inline int w2a( WCHAR *in, char *out )
    {
        out[0]=0;
        return WideCharToMultiByte(CP_ACP, 0, in, -1, out, MAX_PATH, 0, 0); 
    }

    inline int a2w( const char *in, WCHAR *out )
    {
        out[0]=0;
        return MultiByteToWideChar(CP_ACP, 0, in, -1, out, MAX_PATH); 
    }




    Voice::Voice()
        : spVoice(0)
    {
        HRESULT hr = CoCreateInstance( CLSID_SpVoice, NULL, CLSCTX_INPROC_SERVER, IID_ISpVoice, (LPVOID *)&(spVoice) ); 
    }


    Voice::~Voice()
    {
        COM_RELEASE( spVoice );
    }

    //SPF_ASYNC = ( 1L << 0 ) ,
    //SPF_PURGEBEFORESPEAK  = ( 1L << 1 ) ,
    //SPF_IS_FILENAME   = ( 1L << 2 ) ,
    //SPF_IS_XML    = ( 1L << 3 ) ,
    //SPF_IS_NOT_XML    = ( 1L << 4 ) ,
    //SPF_PERSIST_XML   = ( 1L << 5 ) ,
    //SPF_NLP_SPEAK_PUNC    = ( 1L << 6 ) ,
    //SPF_PARSE_SAPI    = ( 1L << 7 ) ,
    //SPF_PARSE_SSML    = ( 1L << 8 ) ,
    //SPF_PARSE_AUTODETECT  = 0,
    int Voice::speak( const char * txt, int flags ) const 
    {
        if ( ! spVoice )
            return 0;

        WCHAR wtxt[800];
        a2w(txt,wtxt);

        ULONG pulstream = 0;
        HRESULT hr = spVoice->Speak( wtxt, flags, &pulstream );

        return hr==S_OK; 
    }


    // Supported values range from -10 to 10 
    int Voice::setRate( int s )
    {
        if ( ! spVoice )
            return 0;

        HRESULT hr = spVoice->SetRate( s );

        return hr==S_OK; 
    }

    // Supported values range from 0 to 100 
    int Voice::setVolume( int s )
    {
        if ( ! spVoice )
            return 0;

        HRESULT hr = spVoice->SetVolume ( s );

        return hr==S_OK; 
    }
}



// ----- >8 main.cpp --------------------------------------------

#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"

using namespace cv;

#include "speech.h"

int main(int argc, char** argv)
{
    Speech::Voice voice;
    voice.speak("hello , oh, hello!", 1); // async

    Mat img(300,300,CV_8UC3,Scalar(255,0,0));
    namedWindow("Display window",0);
    putText(img,"lala la",Point(20,120),0,2.5,Scalar(0,200,0),5);
    imshow("Display window", img);
    waitKey(0);

    voice.speak("bye bye, see you later !"); // sync
    return 0;
}

自主实时人脸识别系统

1 个答案: