Question

我的一个朋友和我目前正在Unity中从事VR项目，我们正在尝试将语音识别作为一项功能来实现。我们正在使用Unity版本2018.3.3f1。这个想法是，用户可以说出一个单词，语音识别功能将查看他们是否正确发音。我们选择为此使用Google云语音转文本服务，因为它支持目标语言（挪威语）。此外，该应用程序也是多人游戏，因此我们正在尝试使用Google云语音的流式传输版本。以下是其文档的链接：https://cloud.google.com/speech-to-text/docs/streaming-recognize

我们要做的是拥有一个实质上为我们运行语音识别的插件。它是上面链接中给出的示例代码的修改：

public Task<bool> StartSpeechRecognition()
    {
        return StreamingMicRecognizeAsync(20, "fantastisk");
    }

    static async Task<bool> StreamingMicRecognizeAsync(int inputTime, string inputWord)
    {
        bool speechSuccess = false;
        Stopwatch timer = new Stopwatch();


        Task delay = Task.Delay(TimeSpan.FromSeconds(1));

        if (NAudio.Wave.WaveIn.DeviceCount < 1)
        {
            //Console.WriteLine("No microphone!");
            return false;
        }

        var speech = SpeechClient.Create();
        var streamingCall = speech.StreamingRecognize();
        // Write the initial request with the config.
        await streamingCall.WriteAsync(
            new StreamingRecognizeRequest()
            {
                StreamingConfig = new StreamingRecognitionConfig()
                {
                    Config = new RecognitionConfig()
                    {
                        Encoding =
                        RecognitionConfig.Types.AudioEncoding.Linear16,
                        SampleRateHertz = 16000,
                        LanguageCode = "nb",
                    },
                    InterimResults = true,
                }
            });




        // Compare speech with the input word, finish if they are the same and speechSuccess becomes true.
        Task compareSpeech = Task.Run(async () =>
        {
            while (await streamingCall.ResponseStream.MoveNext(
                default(CancellationToken)))
            {
                foreach (var result in streamingCall.ResponseStream
                    .Current.Results)
                {
                    foreach (var alternative in result.Alternatives)
                    {
                        if (alternative.Transcript.Replace(" ", String.Empty).Equals(inputWord, StringComparison.InvariantCultureIgnoreCase))
                        {
                            speechSuccess = true;

                            return;
                        }

                    }
                }
            }
        });

        // Read from the microphone and stream to API.
        object writeLock = new object();
        bool writeMore = true;
        var waveIn = new NAudio.Wave.WaveInEvent();
        waveIn.DeviceNumber = 0;
        waveIn.WaveFormat = new NAudio.Wave.WaveFormat(16000, 1);
        waveIn.DataAvailable +=
            (object sender, NAudio.Wave.WaveInEventArgs args) =>
            {
                lock (writeLock)
                {
                    if (!writeMore) return;
                    streamingCall.WriteAsync(
                        new StreamingRecognizeRequest()
                        {
                            AudioContent = Google.Protobuf.ByteString
                                .CopyFrom(args.Buffer, 0, args.BytesRecorded)
                        }).Wait();
                }
            };

        waveIn.StartRecording();
        timer.Start();
        //Console.WriteLine("Speak now.");

        //Delay continues as long as a match has not been found between speech and inputword or time that has passed since recording is lower than inputTime.
        while (!speechSuccess && timer.Elapsed.TotalSeconds <= inputTime)
        {
            await delay;
        }

        // Stop recording and shut down.
        waveIn.StopRecording();
        timer.Stop();

        lock (writeLock) writeMore = false;

        await streamingCall.WriteCompleteAsync();
        await compareSpeech;


        //Console.WriteLine("Finished.");
        return speechSuccess;
    }

我们在Unity中创建了一个小项目，以测试该项目是否与具有以下脚本的多维数据集GameObject一起使用：

private CancellationTokenSource tokenSource;
VR_VoiceRecognition.VoiceRecognition voice = new VR_VoiceRecognition.VoiceRecognition();
IDisposable speech;

// Use this for initialization
void Start() {

    speech = Observable.FromCoroutine(WaitForSpeech).Subscribe();

}

// Update is called once per frame
void Update() {

}

IEnumerator WaitForSpeech()
{
    tokenSource = new CancellationTokenSource();
    CancellationToken token = tokenSource.Token;

    Debug.Log("Starting up");

    Task<bool> t = Task.Run(() => voice.StartSpeechRecognition());

    while (!(t.IsCompleted || t.IsCanceled))
    {
        yield return null;
    }


    if (t.Status != TaskStatus.RanToCompletion)
    {

        yield break;
    }
    else
    {
        bool result = t.Result;
        UnityEngine.Debug.Log(t.Result);
        yield return result;
    }

}

void OnApplicationQuit()
{
    print("Closing application.");
    speech.Dispose();
}

我们还使用了Unity支持向我们推荐的插件，他们认为该插件可能具有称为UniRx（https://assetstore.unity.com/packages/tools/integration/unirx-reactive-extensions-for-unity-17276）的变通方法。

目前，当您第一次在编辑器中播放它时，它可以正常工作。当语音识别返回false时，一切都很好（发生这种情况的两种情况是：找不到麦克风或用户未说出特定单词）。但是，如果成功，则仍然返回true，但是如果您在编辑器中退出播放模式并尝试再次播放，则Unity将冻结。 Unity支持人员怀疑它可能与Google .dll文件或Google API有关。我们不太确定从现在开始该怎么做，我们希望有人可以指出我们正确的方向。

Unity-Google云语音到文本语音识别，成功后Unity冻结

0 个答案: