将StandardTokenizerFactory与货币一起使用

时间:2016-11-29 23:19:34

标签: solr lucene

此问题中描述的fieldType配置适用于检测货币(例如,包含“$ 30”的文档)。但是,我们希望使用StandardTokenizerFactory,而不是WhiteSpaceTokenizerFactory - 此配置使用StandardTokenizerFactory返回误报(例如,包含没有$符号的数字30的文档)。解决方案是什么?

由于

How do I find documents containing digits and dollar signs in Solr?

1 个答案:

答案 0 :(得分:0)

通过帖子解决到solr用户组 http://lucene.472066.n3.nabble.com/How-to-use-the-StandardTokenizer-with-currency-td4308072.html#a4308097

这是我的配置

<!-- VB - Just like text_general, but supports $ currency matching and autoGeneratePhraseQueries -->
<fieldType name="text_curr_3" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  <analyzer type="index">
    <charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/>
    <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\$" replacement="xxdollarxx"/>
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.PatternReplaceFilterFactory" pattern="xxdollarxx" replacement="\$" replace="all"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
    <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" types="word-delim-types.txt" />
    <filter class="solr.LowerCaseFilterFactory"/>
 </analyzer>
  <analyzer type="query">
    <charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/>
    <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\$" replacement="xxdollarxx"/>
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.PatternReplaceFilterFactory" pattern="xxdollarxx" replacement="\$" replace="all"/>
    <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
    <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"  types="word-delim-types.txt" />
    <filter class="solr.LowerCaseFilterFactory"/>   
  </analyzer>
</fieldType>