从result.getTimedBestResult()获取wordToken时间戳

时间:2014-02-14 08:52:53

标签: sphinx4

我正在为通用视频创建字幕生成器。 其中一个主要阻碍者是让每个单词的时间戳与视频对齐,这有点让我感到害怕。结果类有一个getTimedBestResult()函数,它应该以word(timestamp)的形式返回一些东西,例如:(0:20-0:22)。在我调用它的那一刻,我得到了一个输出,在“WordToken one null”的条款中,其中一个是被识别的单词,null应该是时间戳。我已阅读文档,目前无法找到解决方案。

String resultText = result.getTimedBestResult(false,false);

我使用hub4模型和CMUdict0.6,如​​果有任何重要性,请使用 wordPruningBreadthFirstSearchManager(lexTree语言学家)。 我不知道我是否在config.xml中做错了什么,

<?xml version="1.0" encoding="UTF-8"?>
<!-- ********************************************************
     Sphinx-4 Configuration file
     ******************************************************** 
-->
<config>
    <!-- ******************************************************** -->
    <!-- frequently tuned properties -->
    <!-- ******************************************************** -->
    <property name="absoluteBeamWidth" value="10000"/>
    <property name="relativeBeamWidth" value="1E-80"/>
    <property name="absoluteWordBeamWidth" value="20"/>
    <property name="relativeWordBeamWidth" value="1E-60"/>
    <property name="wordInsertionProbability" value="0.2"/>
    <property name="languageWeight" value="10.5"/>
    <property name="silenceInsertionProbability" value=".05"/>
    <property name="frontend" value="epFrontEnd"/>
    <property name="recognizer" value="recognizer"/>
    <property name="showCreations" value="false"/>
    <config>
        <property name="logLevel" value="SEVERE"/>
    </config>

    <!-- ************************************************** -->
    <!-- Batch mode -->
    <!-- ************************************************** -->
  <component name="batch" type="edu.cmu.sphinx.tools.batch.BatchModeRecognizer">
        <propertylist name="inputDataProcessors">
            <item>audioFileDataSource</item>
        </propertylist>
        <property name="skip" value="0"/>
        <property name="recognizer" value="${recognizer}"/>
    </component> 

    <!-- ******************************************************** -->
    <!-- word recognizer configuration -->
    <!-- ******************************************************** -->
    <component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
        <property name="decoder" value="decoder"/>
        <propertylist name="monitors">
            <item>accuracyTracker </item>
            <item>speedTracker </item>
            <item>memoryTracker </item>
            <item>recognizerMonitor </item>
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The Decoder configuration -->
    <!-- ******************************************************** -->
    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="wordPruningSearchManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Search Manager -->
    <!-- ******************************************************** -->
    <component name="wordPruningSearchManager" type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="lexTreeLinguist"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListManager" value="activeListManager"/>
        <property name="growSkipInterval" value="0"/>
        <property name="checkStateOrder" value="false"/>
        <property name="buildWordLattice" value="false"/>
        <property name="maxLatticeEdges" value="3"/>
        <property name="acousticLookaheadFrames" value="1.7"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Active Lists -->
    <!-- ******************************************************** -->
    <component name="activeListManager" type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
        <propertylist name="activeListFactories">
            <item>standardActiveListFactory</item>
            <item>wordActiveListFactory</item>
            <item>wordActiveListFactory</item>
            <item>standardActiveListFactory</item>
            <item>standardActiveListFactory</item>
            <item>standardActiveListFactory</item>
        </propertylist>
    </component>

    <component name="standardActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <component name="wordActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteWordBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeWordBeamWidth}"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Pruner -->
    <!-- ******************************************************** -->
    <component name="trivialPruner" type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

    <!-- ******************************************************** -->
    <!-- The Scorer -->
    <!-- ******************************************************** -->
    <component name="threadedScorer" type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="${frontend}"/>
        <property name="isCpuRelative" value="false"/>
        <property name="numThreads" value="10"/>
        <property name="minScoreablesPerThread" value="10"/>
        <property name="scoreablesKeepFeature" value="false"/>
    </component>
    <!-- ******************************************************** -->
    <!-- The linguist configuration -->
    <!-- ******************************************************** -->
    <component name="lexTreeLinguist" type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
        <property name="logMath" value="logMath"/>
        <property name="acousticModel" value="wsj"/>
        <property name="languageModel" value="language"/>
        <property name="dictionary" value="dict"/>
        <property name="addFillerWords" value="false"/>
        <property name="fillerInsertionProbability" value="1E-10"/>
        <property name="generateUnitStates" value="true"/>
        <property name="wantUnigramSmear" value="true"/>
        <property name="unigramSmearWeight" value="1"/>
        <property name="wordInsertionProbability" value="${wordInsertionProbability}"/>
        <property name="silenceInsertionProbability" value="${silenceInsertionProbability}"/>
        <property name="languageWeight" value="${languageWeight}"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Dictionary configuration -->
    <!-- ******************************************************** -->
    <component name="dict" type="edu.cmu.sphinx.linguist.dictionary.FullDictionary">
        <property name="dictionaryPath" value="file:///C:/Users/Asgard/Documents/workspace/AsgardGit/asgard/dict/cmudict.06d.dict"/> 
        <property name="fillerPath"     value="file:///C:/Users/Asgard/Documents/workspace/AsgardGit/asgard/dict/filler.filler"/>
        <property name="addSilEndingPronunciation" value="false"/>
        <property name="wordReplacement" value="&lt;sil&gt;"/>
        <property name="allowMissingWords" value="false"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Language Model configuration -->
    <!-- ******************************************************** -->

    <component name="language" type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
        <property name="logMath" value="logMath"/>
        <property name="maxDepth" value="3"/>
        <property name="unigramWeight" value=".5"/>
        <property name="dictionary" value="dict"/>
        <property name="location" value="file:///C:/Users/Asgard/Documents/workspace/lang/language_model.arpaformat.DMP"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The acoustic model configuration-->
    <!-- ******************************************************** -->
    <component name="wsj" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
        <property name="loader" value="wsjLoader"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
        <property name="location" value="file:///C:/Users/Asgard/Documents/workspace/acc/hub4_cd_continuous_8gau_1s_c_d_dd/"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The unit manager configuration -->
    <!-- ******************************************************** -->
    <component name="unitManager" type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>
    <!-- ******************************************************** -->
    <!-- The frontend configuration -->
    <!-- ******************************************************** -->
    <component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <item>audioFileDataSource </item>
            <item>dataBlocker </item>
            <item>speechClassifier </item>
            <item>speechMarker </item>
            <item>nonSpeechDataFilter </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>liveCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>
    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>

    <component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker"/>

    <component name="speechClassifier" type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier"/>

    <component name="nonSpeechDataFilter" 
               type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>

    <component name="speechMarker" type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" />

    <component name="preemphasizer"
               type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower" 
               type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
    </component>

    <component name="fft" 
               type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
    </component>

    <component name="melFilterBank" 
               type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
    </component>

    <component name="dct" 
               type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="liveCMN" 
               type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>

    <component name="featureExtraction" 
               type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>


    <!-- ******************************************************* -->
    <!-- monitors -->
    <!-- ******************************************************* -->
    <component name="accuracyTracker" type="edu.cmu.sphinx.instrumentation.BestConfidenceAccuracyTracker">
        <property name="confidenceScorer" value="confidenceScorer"/>
        <property name="recognizer" value="${recognizer}"/>
        <property name="showRawResults" value="true"/>
        <property name="showAlignedResults" value="true"/>
    </component>

    <component name="confidenceScorer" type="edu.cmu.sphinx.result.SausageMaker"/>

    <component name="memoryTracker" type="edu.cmu.sphinx.instrumentation.MemoryTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="showDetails" value="false"/>
        <property name="showSummary" value="false"/>
    </component>

    <component name="speedTracker" type="edu.cmu.sphinx.instrumentation.SpeedTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="frontend" value="${frontend}"/>
        <property name="showDetails" value="false"/>
    </component>

    <component name="recognizerMonitor" type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
        <property name="recognizer" value="${recognizer}"/>
        <propertylist name="allocatedMonitors">
            <item>configMonitor</item>
        </propertylist>
    </component>

    <component name="configMonitor" type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
        <property name="showConfig" value="false"/>
    </component>


    <!-- ******************************************************* -->
    <!-- Miscellaneous components -->
    <!-- ******************************************************* -->
    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>
</config>

感谢您的回复

0 个答案:

没有答案