使用Kinect和c#语音识别和录制音频

时间:2016-05-30 15:59:50

标签: c# audio kinect grammar speech

我正在正确执行单词识别,但是一旦执行了录音,程序就无法识别单词词典。

是否可以同时运行这两个功能?这是我的代码:

using Microsoft.Kinect;
namespace PowerPointKinect
{
 /// <summary>
 /// Lógica de interacción para MainWindow.xaml
 /// </summary>
public partial class MainWindow : Window
{
    // Creamos instancia de la clase kinectsensor que permite conectar con funciones kinect
    KinectSensor miKinect;

    Stream audioStream;

    WriteableBitmap bitmapImagenColor = null;
    byte[] bytesColor;

    Skeleton[] esqueleto = null;

    bool movimientoAdelanteActivo = false;
    bool movimientoAtrasActivo = false;
    bool movimientoMinimizarActivo = false;
    bool movimientoMaximizarActivo = false;
    bool movimientoRatonActivo = false;
    bool movimientoDibujarActivo = false;
    bool movimientoResaltarActivo = false;
    bool movimientoAudioActivo = false;

    long contador = 0;

    SolidColorBrush brushActivo = new SolidColorBrush(Colors.Green);
    SolidColorBrush brushInactivo = new SolidColorBrush(Colors.Red);
    SolidColorBrush brushInactivoCabeza = new SolidColorBrush(Colors.Blue);

    int screenWidth = (int)SystemParameters.PrimaryScreenWidth;
    int screenHeight = (int)SystemParameters.PrimaryScreenHeight;

    /// <summary>
    /// Gets the metadata for the speech recognizer (acoustic model) most suitable to
    /// process audio from Kinect device.
    /// </summary>
    /// <returns>
    /// RecognizerInfo if found, <code>null</code> otherwise.
    /// </returns>
    private static RecognizerInfo GetKinectRecognizer()
    {
        foreach (RecognizerInfo recognizer in SpeechRecognitionEngine.InstalledRecognizers())
        {
            string value;
            recognizer.AdditionalInfo.TryGetValue("Kinect", out value);
            if ("True".Equals(value, StringComparison.OrdinalIgnoreCase) && "es-ES".Equals(recognizer.Culture.Name, StringComparison.OrdinalIgnoreCase))
            {
                return recognizer;
            }
        }

        return null;
    }
    /// <summary>
    /// Speech recognition engine using audio data from Kinect.
    /// </summary>
    private SpeechRecognitionEngine speechEngine;

    private event RoutedEventHandler FinishedRecording;

    public MainWindow()
    {
        InitializeComponent();

        FinishedRecording += new RoutedEventHandler(MainWindow_FinishedRecording);
    }

    // Evento que se ejecuta al iniciar el proyecto
    private void Window_Loaded_1(object sender, RoutedEventArgs e)
    {
        // almacenamos el primer kinect encontrado en la variable
        miKinect = KinectSensor.KinectSensors.FirstOrDefault();
        if (miKinect == null)
        {
            MessageBox.Show("Esta aplicasion requiere de un sensor de kinect.");
            Application.Current.Shutdown();
        }

        // iniciamos kinect
        miKinect.Start();
        // se habilita el envío de datos por cámara
        miKinect.ColorStream.Enable();
        // habilita el reconocimiento de cuerpo
        miKinect.SkeletonStream.Enable();

        audioStream = miKinect.AudioSource.Start();
        Stream gen_audio_palabras = audioStream;


        // que hacer cuando se tome la imagen
        miKinect.ColorFrameReady += miKinect_ColorFrameReady;
        miKinect.SkeletonFrameReady += miKinect_SkeletonFrameReady;

        RecognizerInfo ri = GetKinectRecognizer();

        if (null != ri)
        {

            this.speechEngine = new SpeechRecognitionEngine(ri.Id);

            /****************************************************************
            * 
            * Use this code to create grammar programmatically rather than from
            * a grammar file.
             */
            var directions = new Choices();
            directions.Add(new SemanticResultValue("salir", "END"));
            directions.Add(new SemanticResultValue("zoom", "ZOOM"));
            directions.Add(new SemanticResultValue("anterior", "BACK"));
            directions.Add(new SemanticResultValue("siguiente", "NEXT"));
            directions.Add(new SemanticResultValue("raton", "MOUSE"));
            directions.Add(new SemanticResultValue("clic", "CLICK"));
            directions.Add(new SemanticResultValue("dibujar", "DRAW"));
            directions.Add(new SemanticResultValue("resaltar", "UNDERLINE"));
            directions.Add(new SemanticResultValue("audio", "AUDIO"));

            var gb = new GrammarBuilder { Culture = ri.Culture };
            gb.Append(directions);

            var g = new Grammar(gb);

            /****************************************************************/

            // Create a grammar from grammar definition XML file.
            //using (var memoryStream = new MemoryStream(Encoding.ASCII.GetBytes(Properties.Resources.SpeechGrammar)))
            //{
            //    var g = new Grammar(memoryStream);
            speechEngine.LoadGrammar(g);
            //}

            speechEngine.SpeechRecognized += SpeechRecognized;


            // For long recognition sessions (a few hours or more), it may be beneficial to turn off adaptation of the acoustic model. 
            // This will prevent recognition accuracy from degrading over time.
            ////speechEngine.UpdateRecognizerSetting("AdaptationOn", 0);

            speechEngine.SetInputToAudioStream(
                gen_audio_palabras, new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));
            speechEngine.RecognizeAsync(RecognizeMode.Multiple);
        }

        Application.Current.Exit += Current_Exit;
    }

    void Current_Exit(object sender, ExitEventArgs e)
    {
        if (miKinect != null) {
            //stop audio if not null
            if (miKinect.AudioSource != null)
            {
                miKinect.AudioSource.Stop();
            }
            miKinect.Stop();
            miKinect = null;

        }
    }

    void miKinect_SkeletonFrameReady(object sender, SkeletonFrameReadyEventArgs e)
    {
        using (SkeletonFrame frame = e.OpenSkeletonFrame())
        {
            if (frame != null)
            {
                esqueleto = new Skeleton[frame.SkeletonArrayLength];
                frame.CopySkeletonDataTo(esqueleto);
            }
        }

        if (esqueleto == null) return;

        Skeleton esqueletoCercano = esqueleto.Where(s => s.TrackingState == SkeletonTrackingState.Tracked)
                                             .OrderBy(s => s.Position.Z * Math.Abs(s.Position.X))
                                             .FirstOrDefault();

        if (esqueletoCercano == null) return;

        var cabeza = esqueletoCercano.Joints[JointType.Head];
        var manoDer = esqueletoCercano.Joints[JointType.HandRight];
        var manoIzq = esqueletoCercano.Joints[JointType.HandLeft];
        var pecho = esqueletoCercano.Joints[JointType.ShoulderCenter];

        if (cabeza.TrackingState == JointTrackingState.NotTracked ||
            manoDer.TrackingState == JointTrackingState.NotTracked ||
            manoIzq.TrackingState == JointTrackingState.NotTracked) 
        {
                return;
        }

        if (movimientoRatonActivo)
        {

            if ((movimientoDibujarActivo)||(movimientoResaltarActivo))
            {                    
                MouseLeftClick();
                //MouseLeftDown();  // BUG: Si activo este modo de dibujo de trazado continuado, 
                                    //      solo pinta un trozo pequeño.
            }

            // obtener X Y para movimientos de ratón:
            float x = manoDer.Position.X - pecho.Position.X;//manoDerX - pechoX;
            float y = pecho.Position.Y - manoDer.Position.Y;//pechoY - manoDerY;
            SetCursorPos((int)((x + 0.05) / 0.35 * screenWidth), (int)(y / 0.35 * screenHeight));
            // fin ratón

        }

        posicionEllipse(ellipseCabeza, cabeza, false, "cabeza");
        posicionEllipse(ellipseManoIzq, manoIzq, movimientoAtrasActivo, "manoIzq");
        posicionEllipse(ellipseManoDer, manoDer, movimientoAdelanteActivo, "manoDer");
        procesoAdelanteAtras(cabeza, manoDer, manoIzq);

    }

    private void procesoAdelanteAtras(Joint cabeza, Joint manoDer, Joint manoIzq)
    {
        if (manoDer.Position.X > cabeza.Position.X + 0.45) {
            if (!movimientoAdelanteActivo)
            {
                movimientoAdelanteActivo = true;
                System.Windows.Forms.SendKeys.SendWait("{Right}");
                textoInfo.Text = "Siguiente ON";
            }
        }
        else
        {
            movimientoAdelanteActivo = false;
        }

        if (manoIzq.Position.X < cabeza.Position.X - 0.45)
        {
            if (!movimientoAtrasActivo)
            {
                movimientoAtrasActivo = true;
                System.Windows.Forms.SendKeys.SendWait("{Left}");
                textoInfo.Text = "Anterior ON";
            }
        }
        else
        {
            movimientoAtrasActivo = false;
        }

        if (manoIzq.Position.X > manoDer.Position.X)
        {
            if (!movimientoMinimizarActivo)
            {
                movimientoMinimizarActivo = true;
                System.Windows.Forms.SendKeys.SendWait("{ESC}");
                textoInfo.Text = "Finalizar ON";
            }
        }
        else
        {
            movimientoMinimizarActivo = false;
        }

        if ((manoIzq.Position.Y > cabeza.Position.Y) && (manoDer.Position.Y > cabeza.Position.Y))
        {
            if (!movimientoMaximizarActivo)
            {
                movimientoMaximizarActivo = true;
                System.Windows.Forms.SendKeys.SendWait("{F5}");
                textoInfo.Text = "Zoom ON";
            }
        }
        else
        {
            movimientoMaximizarActivo = false;
        }
    }

    private void posicionEllipse(Ellipse ellipse, Joint joint, bool activo, string parteCuerpo)
    {
        if ( (activo) || (movimientoMaximizarActivo && ((parteCuerpo == "manoIzq") || (parteCuerpo == "manoDer")))
            || (movimientoMinimizarActivo && ((parteCuerpo == "manoIzq") || (parteCuerpo == "manoDer"))) )
        {
            ellipse.Width = 60;
            ellipse.Height = 60;
            ellipse.Fill = brushActivo;
        }
        else
        {
            ellipse.Width = 20;
            ellipse.Height = 20;
            if (parteCuerpo == "cabeza") {
                ellipse.Width = 60;
                ellipse.Height = 60;
                ellipse.Fill = brushInactivoCabeza; }
            else { ellipse.Fill = brushInactivo; }

        }

        CoordinateMapper mapping = miKinect.CoordinateMapper;

        var point = mapping.MapSkeletonPointToColorPoint(joint.Position, miKinect.ColorStream.Format);
        Canvas.SetLeft(ellipse, point.X - ellipse.Width / 2);
        Canvas.SetTop(ellipse, point.Y - ellipse.Height / 2);
    }


    // Manejador: Que hacer cuando se toma el flujo de datos por cámara
    void miKinect_ColorFrameReady(object sender, ColorImageFrameReadyEventArgs e)
    {
        // va recibiendo cada imagen, la almacena en inageColor y luego se elimina.
        using (ColorImageFrame imagenColor = e.OpenColorImageFrame())
        {
            if (imagenColor == null)
                return;

            // Se extrae el tam de los frames y se ponen en array de tipo bytes
            if (bytesColor == null || bytesColor.Length != imagenColor.PixelDataLength)
                bytesColor = new byte[imagenColor.PixelDataLength];

            // copia los datos del frame al buffer del array
            imagenColor.CopyPixelDataTo(bytesColor);

            if (bitmapImagenColor == null)
            {
                // Se crea el bitmap con sus propiedades
                bitmapImagenColor = new WriteableBitmap(
                    imagenColor.Width,
                    imagenColor.Height,
                    96,
                    96,
                    PixelFormats.Bgr32,
                    null);
            }

            bitmapImagenColor.WritePixels(
                new Int32Rect(0, 0, imagenColor.Width, imagenColor.Height),
                bytesColor,
                imagenColor.Width * imagenColor.BytesPerPixel,
                0);

            imagenVideo.Source = bitmapImagenColor;
        }
    }

    private void SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
    {
        // Speech utterance confidence below which we treat speech as if it hadn't been heard
        const double ConfidenceThreshold = 0.3;

        if (e.Result.Confidence >= ConfidenceThreshold)
        {

            switch (e.Result.Semantics.Value.ToString())
            {
                case "END":
                    System.Windows.Forms.SendKeys.SendWait("{ESC}");
                    textoInfo.Text = "Finalizar ON";
                    break;                     
                case "ZOOM":
                    System.Windows.Forms.SendKeys.SendWait("{F5}");
                    textoInfo.Text = "Zoom ON";
                    break;                        
                case "BACK":
                    if (!movimientoAtrasActivo) {
                        movimientoAtrasActivo = true;
                        System.Windows.Forms.SendKeys.SendWait("{Left}");
                        textoInfo.Text = "Anterior ON";
                    }  else  {
                        movimientoAtrasActivo = false;
                    }
                    break;
                case "NEXT":
                    if (!movimientoAdelanteActivo) {
                        movimientoAdelanteActivo = true;
                        System.Windows.Forms.SendKeys.SendWait("{Right}");
                        textoInfo.Text = "Siguiente ON";
                    } else {
                        movimientoAdelanteActivo = false;
                    }
                    break;
                case "MOUSE":
                    if (!movimientoRatonActivo) {
                        // que hacer cuando raton comando "activado":
                        movimientoRatonActivo = true;
                        textoInfo.Text = "Ratón ON";
                    } else {
                        movimientoRatonActivo = false;
                        movimientoDibujarActivo = false;
                        textoInfo.Text = "Ratón Off";
                    }
                    break;
                case "CLICK":
                    if (movimientoRatonActivo)
                    {
                        // que hacer cuando raton comando "activado" y comando Click "activado":
                        MouseLeftClick();
                        textoInfo.Text = "Ratón ON - Click ON";
                    }
                    else { }
                    break;
                case "DRAW":
                    if (movimientoRatonActivo)
                    {
                        if (!movimientoDibujarActivo) {
                            // que hacer cuando ratón activo y comando "dibujar":
                            movimientoDibujarActivo = true;
                            System.Windows.Forms.SendKeys.SendWait("^{p}");
                            textoInfo.Text = "Dibujar ON";
                        }
                        else {
                            movimientoDibujarActivo = false;
                            movimientoResaltarActivo = false;
                            System.Windows.Forms.SendKeys.SendWait("{ESC}");
                            textoInfo.Text = "Dibujar Off";                            
                        }
                    }                        
                    break;
                case "UNDERLINE":
                    if (!movimientoResaltarActivo)
                    {
                        // que hacer cuando Ratón comando "activado" y comando Resaltar "activado":
                        movimientoResaltarActivo = true;
                        textoInfo.Text = "Ratón ON - Subrayador ON";
                    }
                    else {
                        movimientoResaltarActivo = false;
                        textoInfo.Text = "Ratón ON - Subrayador Off";
                    }
                    break;
                case "AUDIO":
                    if (!movimientoAudioActivo)
                    {
                        // que hacer cuando comando "audio" activado:
                        movimientoAudioActivo = true;
                        ////Start recording audio on new thread
                        var t = new Thread(new ParameterizedThreadStart((RecordAudio)));
                        t.Start(miKinect);
                        //RecordAudio(miKinect);
                        textoInfo.Text = "Grabadora audio ON";
                    }
                    else
                    {
                        movimientoAudioActivo = false;
                        textoInfo.Text = "Grabadora audio Off";
                    }
                    break;
            }
        }
    }



    #region Mouse Controll

    //Move Mouse
    //SetCursorPos(10, 10);

    public void MouseLeftClick()
    {
        mouse_event(MouseEventFlag.LeftDown | MouseEventFlag.Absolute, 0, 0, 0, UIntPtr.Zero);
        //Thread.Sleep(50);
        mouse_event(MouseEventFlag.LeftUp | MouseEventFlag.Absolute, 0, 0, 0, UIntPtr.Zero);
        //mouse_event(MouseEventFlag.LeftDown | MouseEventFlag.LeftUp | MouseEventFlag.Absolute, 0, 0, 0, UIntPtr.Zero);
    }

    public void MouseLeftDown()
    {
        mouse_event(MouseEventFlag.LeftDown, 0, 0, 0, UIntPtr.Zero);
    }
    public void MouseLeftUp()
    {
        mouse_event(MouseEventFlag.LeftUp, 0, 0, 0, UIntPtr.Zero);
    }

    [DllImport("user32.dll")]
    static extern bool SetCursorPos(int X, int Y);
    [DllImport("user32.dll")]
    static extern void mouse_event(MouseEventFlag flags, int dx, int dy, uint data, UIntPtr extraInfo);
    [Flags]
    enum MouseEventFlag : uint
    {
        Move = 0x0001,
        LeftDown = 0x0002,
        LeftUp = 0x0004,
        RightDown = 0x0008,
        RightUp = 0x0010,
        MiddleDown = 0x0020,
        MiddleUp = 0x0040,
        XDown = 0x0080,
        XUp = 0x0100,
        Wheel = 0x0800,
        VirtualDesk = 0x4000,
        Absolute = 0x8000
    }
    #endregion




    void MainWindow_FinishedRecording(object sender, RoutedEventArgs e)
    {
        //This is only required if recording on a separate thread to ensure that enabling the buttons
        //happens on the UI thread
        //Dispatcher.BeginInvoke(new ThreadStart(ReenableButtons));

        //use this if recording on the same thread
        //ReenableButtons(); 
    }

    private void RecordAudio(object kinectSensor)
    {

        KinectSensor _sensor = (KinectSensor)kinectSensor;
        RecordAudio(_sensor);
    }

    private void RecordAudio(KinectSensor kinectSensor)
    {

        if (kinectSensor == null)
        {
            return;
        }

        int duracionAudio = 10;
        int recordingLength = (int)duracionAudio * 2 * 16000;
        byte[] buffer = new byte[1024];

        string ficheroAudio;
        ficheroAudio = DateTime.Now.ToString("yyyyMMddHHmmss") + "_wav.wav";

        using (FileStream _fileStream = new FileStream(ficheroAudio, FileMode.Create))
        {
            WriteWavHeader(_fileStream, recordingLength);

            //Start capturing audio  

       using (audioStream)
       {

           //Simply copy the data from the stream down to the file
           int count, totalCount = 0;
           while ((count = audioStream.Read(buffer, 0, buffer.Length)) > 0 && totalCount < recordingLength)
           {
               _fileStream.Write(buffer, 0, count);
               totalCount += count;
           }

       }

        }


        if (FinishedRecording != null)
        {
            FinishedRecording(null, null);
        }
    }


    /// <summary>
    /// A bare bones WAV file header writer
    /// </summary>        
    static void WriteWavHeader(Stream stream, int dataLength)
    {
        //We need to use a memory stream because the BinaryWriter will close the underlying stream when it is closed
        using (var memStream = new MemoryStream(64))
        {
            int cbFormat = 18; //sizeof(WAVEFORMATEX)
            WAVEFORMATEX format = new WAVEFORMATEX()
            {
                wFormatTag = 1,
                nChannels = 1,
                nSamplesPerSec = 16000,
                nAvgBytesPerSec = 32000,
                nBlockAlign = 2,
                wBitsPerSample = 16,
                cbSize = 0
            };

            using (var bw = new BinaryWriter(memStream))
            {
                //RIFF header
                WriteString(memStream, "RIFF");
                bw.Write(dataLength + cbFormat + 4); //File size - 8
                WriteString(memStream, "WAVE");
                WriteString(memStream, "fmt ");
                bw.Write(cbFormat);

                //WAVEFORMATEX
                bw.Write(format.wFormatTag);
                bw.Write(format.nChannels);
                bw.Write(format.nSamplesPerSec);
                bw.Write(format.nAvgBytesPerSec);
                bw.Write(format.nBlockAlign);
                bw.Write(format.wBitsPerSample);
                bw.Write(format.cbSize);

                //data header
                WriteString(memStream, "data");
                bw.Write(dataLength);
                memStream.WriteTo(stream);
            }
        }
    }

    static void WriteString(Stream stream, string s)
    {
        byte[] bytes = Encoding.ASCII.GetBytes(s);
        stream.Write(bytes, 0, bytes.Length);
    }

    struct WAVEFORMATEX
    {
        public ushort wFormatTag;
        public ushort nChannels;
        public uint nSamplesPerSec;
        public uint nAvgBytesPerSec;
        public ushort nBlockAlign;
        public ushort wBitsPerSample;
        public ushort cbSize;
    }






}

}

0 个答案:

没有答案