使用Microsoft的Speech API我想让它识别一个特定的短语然后返回给我在音频中检测到短语的时间。
例如:
短语:“我喜欢苹果”
音频:5分钟音频
在3分30秒说这个短语被检测到,我想将这些信息与短语存在于音频中的事实一起存储。这可能吗?
答案 0 :(得分:0)
找到一个使用“AudioPosition”的例子,似乎是我正在寻找的东西:
using System;
using System.Collections.Generic;
using Microsoft.Speech.Recognition;
namespace SampleRecognition
{
class Program
{
static void Main(string[] args)
// Initialize a SpeechRecognitionEngine object.
{
using (SpeechRecognitionEngine recognizer =
new SpeechRecognitionEngine(new System.Globalization.CultureInfo("en-US")))
{
// Create SemanticResultValue objects that contain cities and airport codes.
SemanticResultValue chicago = new SemanticResultValue("Chicago", "ORD");
SemanticResultValue boston = new SemanticResultValue("Boston", "BOS");
SemanticResultValue miami = new SemanticResultValue("Miami", "MIA");
SemanticResultValue dallas = new SemanticResultValue("Dallas", "DFW");
// Create a Choices object and add the SemanticResultValue objects.
Choices cities = new Choices();
cities.Add(new Choices(new GrammarBuilder[] { chicago, boston, miami, dallas }));
// Build the phrase and add SemanticResultKeys.
GrammarBuilder chooseCities = new GrammarBuilder();
chooseCities.Append("I want to fly from");
chooseCities.Append(new SemanticResultKey("origin", cities));
chooseCities.Append("to");
chooseCities.Append(new SemanticResultKey("destination", cities));
// Build a Grammar object from the GrammarBuilder.
Grammar bookFlight = new Grammar(chooseCities);
bookFlight.Name = "Book Flight";
// Add a handler for the SpeechRecognized event.
recognizer.SpeechRecognized +=
new EventHandler<SpeechRecognizedEventArgs>(recognizer_SpeechRecognized);
// Load the grammar object to the recognizer.
recognizer.LoadGrammarAsync(bookFlight);
// Set the input to the recognizer.
recognizer.SetInputToDefaultAudioDevice();
// Start recognition.
recognizer.RecognizeAsync();
Console.WriteLine("Starting asynchronous recognition...");
// Keep the console window open.
Console.ReadLine();
}
}
// Handle the SpeechRecognized event.
static void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
Console.WriteLine("Recognition result summary:");
Console.WriteLine(
" Recognized phrase: {0}\n" +
" Confidence score {1}\n" +
" Grammar used: {2}\n",
e.Result.Text, e.Result.Confidence, e.Result.Grammar.Name);
// Display the semantic values in the recognition result.
Console.WriteLine(" Semantic results:");
foreach (KeyValuePair<String, SemanticValue> child in e.Result.Semantics)
{
Console.WriteLine(" The {0} city is {1}",
child.Key, child.Value.Value ?? "null");
}
Console.WriteLine();
// Display information about the words in the recognition result.
Console.WriteLine(" Word summary: ");
foreach (RecognizedWordUnit word in e.Result.Words)
{
Console.WriteLine(
" Lexical form ({1})" +
" Pronunciation ({0})" +
" Display form ({2})",
word.Pronunciation, word.LexicalForm, word.DisplayAttributes);
}
// Display information about the audio in the recognition result.
Console.WriteLine(" Input audio summary:\n" +
" Candidate Phrase at: {0} mSec\n" +
" Phrase Length: {1} mSec\n" +
" Input State Time: {2}\n" +
" Input Format: {3}\n",
e.Result.Audio.AudioPosition,
e.Result.Audio.Duration,
e.Result.Audio.StartTime,
e.Result.Audio.Format.EncodingFormat);
// Display information about the alternate recognitions in the recognition result.
Console.WriteLine(" Alternate phrase collection:");
foreach (RecognizedPhrase phrase in e.Result.Alternates)
{
Console.WriteLine(" Phrase: " + phrase.Text);
Console.WriteLine(" Confidence score: " + phrase.Confidence);
}
// Display information about text that was replaced during normalization.
if (e.Result.ReplacementWordUnits.Count != 0)
{
Console.WriteLine(" Replacement text:\n");
foreach (ReplacementText rep in e.Result.ReplacementWordUnits)
{
Console.WriteLine(" At index {0} for {1} words. Text: {2}\n",
rep.FirstWordIndex, rep.CountOfWords, rep.Text);
}
//label.Text += String.Format("\n\n");
}
else
{
Console.WriteLine();
Console.WriteLine("No text was replaced");
}
}
}
}