我想创建一个程序来识别文件.wav
中的语音。我尝试了下面的代码,但它抛出异常
线程“main”中的异常java.lang.OutOfMemoryError:超出GC开销限制
即使我的“eclipse.ini”有这个属性:
--launcher.XXMaxPermSize 2048M
--launcher.XXMaxPermSize 2048m
-Xms2048m
-Xmx2048m
如何修复此异常?
Java代码:
import java.net.MalformedURLException;
import java.net.URL;
import edu.cmu.sphinx.frontend.util.AudioFileDataSource;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;
public class TestRecognizer {
public static void main(String[] args) {
ConfigurationManager cm;
if (args.length > 0) {
cm = new ConfigurationManager(args[0]);
} else {
cm = new ConfigurationManager("english_use_LexTreeLinguist.xml");
}
URL audioURL = null;
try {
audioURL = new URL("file:./10001-90210-01803.wav");
} catch (MalformedURLException e) {
e.printStackTrace();
}
if(audioURL == null)
throw new IllegalArgumentException("Given audio file doesn't exist.");
// allocate the recognizer
System.out.println("Loading recognizer");
Recognizer recognizer = (Recognizer) cm.lookup("recognizer");
recognizer.allocate();
System.out.println("Loading audio");
AudioFileDataSource dataSource = (AudioFileDataSource) cm.lookup("audioFileDataSource");
dataSource.setAudioFile(audioURL, null);
// loop the recognition until the programm exits.
Result result;
System.out.println("recognizing");`enter code here`
while ((result = recognizer.recognize())!= null) {
String resultText = result.getBestResultNoFiller();
System.out.println(resultText);
}
}
}
XML文件配置:
<config>
<!-- ******************************************************** -->
<!-- frequently tuned properties -->
<!-- ******************************************************** -->
<property name="absoluteBeamWidth" value="-1"/>
<property name="relativeBeamWidth" value="1E-80"/>
<property name="wordInsertionProbability" value=".1"/>
<property name="languageWeight" value="8"/>
<property name="silenceInsertionProbability" value="1"/>
<property name="fillerInsertionProbability" value="1E-10"/>
<property name="logLevel" value="WARNING"/>
<property name="recognizer" value="recognizer"/>
<property name="linguist" value="lexTreeLinguist"/>
<property name="frontend" value="mfcFrontEnd"/>
<!-- ******************************************************** -->
<!-- The Recognizer configuration -->
<!-- ******************************************************** -->
<component name="recognizer"
type="edu.cmu.sphinx.recognizer.Recognizer">
<property name="decoder" value="decoder"/>
<propertylist name="monitors">
</propertylist>
</component>
<!-- ******************************************************** -->
<!-- The Decoder configuration -->
<!-- ******************************************************** -->
<component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
<property name="searchManager" value="searchManager"/>
</component>
<component name="searchManager"
type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">
<property name="logMath" value="logMath"/>
<property name="linguist" value="${linguist}"/>
<property name="pruner" value="trivialPruner"/>
<property name="scorer" value="threadedScorer"/>
<property name="activeListFactory" value="activeList"/>
</component>
<component name="activeList"
type="edu.cmu.sphinx.decoder.search.SortingActiveListFactory">
<property name="logMath" value="logMath"/>
<property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
<property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
</component>
<component name="trivialPruner"
type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>
<component name="threadedScorer"
type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
<property name="frontend" value="${frontend}"/>
<property name="isCpuRelative" value="true"/>
<property name="numThreads" value="0"/>
<property name="minScoreablesPerThread" value="10"/>
<property name="scoreablesKeepFeature" value="true"/>
</component>
<!-- ******************************************************** -->
<!-- The linguist configuration -->
<!-- ******************************************************** -->
<component name="lexTreeLinguist"
type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
<property name="logMath" value="logMath"/>
<property name="acousticModel" value="wsj"/>
<property name="languageModel" value="trigramModel"/>
<property name="dictionary" value="englishDict"/>
<property name="addFillerWords" value="false"/>
<property name="fillerInsertionProbability" value="${fillerInsertionProbability}"/>
<property name="generateUnitStates" value="false"/>
<property name="wantUnigramSmear" value="true"/>
<property name="unigramSmearWeight" value="1"/>
<property name="wordInsertionProbability"
value="${wordInsertionProbability}"/>
<property name="silenceInsertionProbability"
value="${silenceInsertionProbability}"/>
<property name="languageWeight" value="${languageWeight}"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Language Model configuration -->
<!-- ******************************************************** -->
<component name="trigramModel"
type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
<property name="unigramWeight" value=".5"/>
<property name="maxDepth" value="3"/>
<property name="logMath" value="logMath"/>
<property name="dictionary" value="englishDict"/>
<property name="location"
value="resource:/edu/cmu/sphinx/models/language/en-us.lm.dmp"/>
</component>
<!-- ******************************************************** -->
<!-- The Dictionary configuration -->
<!-- ******************************************************** -->
<component name="englishDict"
type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
<property name="dictionaryPath"
value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/dict/cmudict.0.6d"/>
<property name="fillerPath"
value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/noisedict"/>
<property name="addSilEndingPronunciation" value="false"/>
<property name="wordReplacement" value="<sil>"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The acoustic model configuration -->
<!-- ******************************************************** -->
<component name="wsj"
type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
<property name="loader" value="wsjLoader"/>
<property name="unitManager" value="unitManager"/>
</component>
<component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
<property name="logMath" value="logMath"/>
<property name="unitManager" value="unitManager"/>
<property name="location" value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz"/>
</component>
<!-- ******************************************************** -->
<!-- The unit manager configuration -->
<!-- ******************************************************** -->
<component name="unitManager"
type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>
<!-- ******************************************************** -->
<!-- The frontend configuration -->
<!-- ******************************************************** -->
<component name="mfcFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
<propertylist name="pipeline">
<!--item>streamDataSource </item-->
<item>audioFileDataSource </item>
<item>preemphasizer </item>
<item>windower </item>
<item>fft </item>
<item>melFilterBank </item>
<item>dct </item>
<item>batchCMN </item>
<item>featureExtraction </item>
</propertylist>
</component>
<component name="streamDataSource"
type="edu.cmu.sphinx.frontend.util.StreamDataSource">
<property name="sampleRate" value="16000"/>
<property name="bitsPerSample" value="16"/>
<property name="bigEndianData" value="false"/>
<property name="signedData" value="true"/>
</component>
<component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>
<component name="preemphasizer"
type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>
<component name="windower"
type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>
<component name="fft"
type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>
<component name="melFilterBank"
type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>
<component name="dct"
type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>
<component name="batchCMN"
type="edu.cmu.sphinx.frontend.feature.BatchCMN"/>
<component name="featureExtraction"
type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>
<!-- ******************************************************* -->
<!-- Miscellaneous components -->
<!-- ******************************************************* -->
<component name="logMath" type="edu.cmu.sphinx.util.LogMath">
<property name="logBase" value="1.0001"/>
<property name="useAddTable" value="true"/>
</component>
</config>
答案 0 :(得分:0)
您的配置完全错误,光束太宽,前端配置不正确。
如果要进行修改或仅使用没有XML文件的高级API,则需要使用默认配置default.config.xml。为获得最佳解码精度,您需要使用下载中提供的en-us-8khz声学模型。
如果您想转录8khz音频,您还需要调用recognizer.setSampleRate(8000);