我在前言中说我在C#编程方面非常新手。我正在开发一个应用程序,使用C#和SAPI v5.4(speechlib)以编程方式修改Windows语音字典。到目前为止,一切都运行良好但我需要更多地了解字符串在合成(浊音)时如何被解释。
我的理解是,在SAPI中,5.4单词被分解为phoneme representations,并且我获得了一些成功,让单词发音成为#34;经过培训的#34;正确使用音素。我也知道我可以手动添加单词到Windows语音识别词典,提供录音,然后提取单词的发音(音素)......但这很麻烦。探索如何在默认情况下合成单词也是有用的,即没有来自我的输入(比如合成器如何解释"海豚"?)。
从编码的角度来看,这是我到目前为止所做的:
using System;
using System.Speech.Synthesis;
namespace SpeechTest
{
class Program
{
static void Main(string[] args)
{
// Set up the speech synthesizer
SpeechSynthesizer synthesizer = new SpeechSynthesizer();
synthesizer.Volume = 100;
synthesizer.Rate = -2;
// Configure the audio output
synthesizer.SetOutputToDefaultAudioDevice();
// Initialize string to store word of interest (not in the speech dictionary)
string myWord = "dolphins";
// Speak the word of interest
synthesizer.Speak(myWord);
// Retrieve pronunciation of myWord
string myPronunciation = // *some code here*
Console.WriteLine("Press any key to exit...");
Console.ReadLine();
}
}
}
答案 0 :(得分:0)
感谢Casey Chesnut的惊人工作,我已经弄清楚如何确定给定字符串的IPA电话。现在我只需要弄清楚如何从IPA手机转换为SAPI符号,但这是针对一个单独的主题(请参阅here了解如何从文本字符串中获取SAPI音素)。
using System;
using System.Collections.ObjectModel;
using System.ComponentModel;
using System.IO;
using System.Speech.Recognition;
using System.Speech.Synthesis;
using System.Windows.Forms;
namespace SpeechTest
{
class Program
{
static void Main(string[] args)
{
string MyText = "dolphins"; // Initialze string for storing word (or words) of interest
string MyPronunciation = GetPronunciationFromText(MyText.Trim()); // Get IPA pronunciations of MyTe
MessageBox.Show(MyText + " = " + MyPronunciation); // Output MyText and MyPronunciation
}
public static string recoPhonemes;
public static string GetPronunciationFromText(string MyWord)
{
//this is a trick to figure out phonemes used by synthesis engine
//txt to wav
using (MemoryStream audioStream = new MemoryStream())
{
using (SpeechSynthesizer synth = new SpeechSynthesizer())
{
synth.SetOutputToWaveStream(audioStream);
PromptBuilder pb = new PromptBuilder();
//pb.AppendBreak(PromptBreak.ExtraSmall); //'e' wont be recognized if this is large, or non-existent?
//synth.Speak(pb);
synth.Speak(MyWord);
//synth.Speak(pb);
synth.SetOutputToNull();
audioStream.Position = 0;
//now wav to txt (for reco phonemes)
recoPhonemes = String.Empty;
GrammarBuilder gb = new GrammarBuilder(MyWord);
Grammar g = new Grammar(gb); //TODO the hard letters to recognize are 'g' and 'e'
SpeechRecognitionEngine reco = new SpeechRecognitionEngine();
reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);
reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);
reco.UnloadAllGrammars(); //only use the one word grammar
reco.LoadGrammar(g);
reco.SetInputToWaveStream(audioStream);
RecognitionResult rr = reco.Recognize();
reco.SetInputToNull();
if (rr != null)
{
recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
}
//txtRecoPho.Text = recoPhonemes;
return recoPhonemes;
}
}
}
public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)
{
string text = "";
foreach (RecognizedWordUnit word in words)
{
string wordText = "";
if (type == WordType.Text || type == WordType.Normalized)
{
wordText = word.Text;
}
else if (type == WordType.Lexical)
{
wordText = word.LexicalForm;
}
else if (type == WordType.Pronunciation)
{
wordText = word.Pronunciation;
//MessageBox.Show(word.LexicalForm);
}
else
{
throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));
}
//Use display attribute
if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)
{
wordText += " ";
}
if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)
{
wordText += " ";
}
if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)
{
wordText = wordText.TrimStart();
}
if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)
{
wordText = wordText.TrimEnd();
}
text += wordText;
}
return text;
}
public static void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
{
recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
}
public static void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
}
}
public enum WordType
{
Text,
Normalized = Text,
Lexical,
Pronunciation
}
}
// Credit for method of retrieving IPA pronunciation from a string goes to Casey Chesnut (http://www.mperfect.net/speechSamples/)