solr没有用多语言搜索

时间:2017-01-20 06:52:49

标签: apache solr full-text-search

这是我的schema.xml

<?xml version="1.0" encoding="UTF-8"?>
<!-- multi language in single core R&D Pallav Jha  -->
<schema name="Pallav" version="1.14">
  <uniqueKey>SolrId</uniqueKey>
  <defaultSearchField>Name</defaultSearchField>
  <solrQueryParser defaultOperator="OR"/>
  <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true"/>
  <fieldType name="date" class="solr.TrieDateField" positionIncrementGap="0" precisionStep="6"/>
  <fieldType name="float" class="solr.TrieFloatField" positionIncrementGap="0" precisionStep="0"/>
  <fieldType name="int" class="solr.TrieIntField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
  <fieldType name="long" class="solr.TrieLongField" positionIncrementGap="0" precisionStep="0"/>
  <fieldType name="nGramAttributes" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="!!.*?!!" replacement=""/>
      <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
      <tokenizer class="solr.KeywordTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.EdgeNGramFilterFactory" maxGramSize="10" minGramSize="1"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.FrenchLightStemFilterFactory"/>
      <filter class="solr.SnowballPorterFilterFactory" language="French" />
      <filter class="solr.ASCIIFoldingFilterFactory"/>
    </analyzer>
    <analyzer type="query">
      <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
      <tokenizer class="solr.KeywordTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.SnowballPorterFilterFactory" language="German2"/>
      <filter class="solr.PorterStemFilterFactory"/>
      <filter class="solr.FrenchLightStemFilterFactory"/>
      <filter class="solr.SnowballPorterFilterFactory" language="French" />
      <filter class="solr.ASCIIFoldingFilterFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="nGramtext" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <charFilter class="solr.HTMLStripCharFilterFactory"/>
      <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.EdgeNGramFilterFactory" maxGramSize="15" minGramSize="3"/>
      <filter class="solr.PorterStemFilterFactory"/>
      <filter class="solr.PhoneticFilterFactory" encoder="Soundex" inject="true"/>
      <filter class="solr.FrenchLightStemFilterFactory"/>
      <filter class="solr.SnowballPorterFilterFactory" language="French" />
      <filter class="solr.ASCIIFoldingFilterFactory"/>
    </analyzer>
    <analyzer type="query">
      <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.PorterStemFilterFactory"/>
      <filter class="solr.PhoneticFilterFactory" encoder="Soundex" inject="true"/>
      <filter class="solr.FrenchLightStemFilterFactory"/>
      <filter class="solr.SnowballPorterFilterFactory" language="French" />
      <filter class="solr.ASCIIFoldingFilterFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true"/>
  <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <charFilter class="solr.HTMLStripCharFilterFactory"/>
      <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.PorterStemFilterFactory"/>
      <filter class="solr.PhoneticFilterFactory" encoder="Soundex" inject="true"/>
    </analyzer>
    <analyzer type="query">
      <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.PorterStemFilterFactory"/>
      <filter class="solr.PhoneticFilterFactory" encoder="Soundex" inject="true"/>
    </analyzer>
  </fieldType>
  <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
  </fieldType>
  <field name="SolrId" type="string" indexed="true" required="true" stored="true"/>
  <field name="Name" type="string" indexed="true" required="true" stored="true"/> 

  <field name="en_Name" type="string" indexed="true" required="false" stored="true"/>
  <field name="nl_Name" type="string" indexed="true" required="false" stored="true"/>
  <field name="fr_Name" type="string" indexed="true" required="false" stored="true"/>
  <field name="hi_Name" type="string" indexed="true" required="false" stored="true"/> 

  <field name="_version_" type="long" indexed="true" stored="true"/>
  <field name="nGramContent" type="nGramtext" multiValued="true" indexed="true" required="false" stored="false"/>
  <dynamicField name="CDO_*" type="int" indexed="true" required="false" stored="true"/>
  <dynamicField name="MDO_*" type="int" indexed="true" required="false" stored="true"/>
  <dynamicField name="pa_*" type="string" multiValued="true" indexed="true" required="false" stored="true"/>
  <dynamicField name="cp_*" type="string" indexed="true" required="false" stored="true"/>
  <dynamicField name="f_*" type="string" multiValued="true" indexed="true" required="false" stored="true"/>
  <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
  <!-- <copyField source="Name" dest="SpellContent"/> -->

</schema>

我正在尝试为法语实施多语言搜索以进行测试。 但它不起作用我没有得到任何结果。任何人都可以帮助我。我做错了什么

这是我对法语的结果。solr french search result

2 个答案:

答案 0 :(得分:0)

问题是,字段fr_Name的类型为string,这意味着它未被分析或标记化,如果您要搜索包含空格的内容,例如Apple Mac Book Pro,您需要使用双引号才能完全匹配。因此,查询"fq":"fr_Name":\"Apple Mac Book Pro\"应该适合您。

Solr wiki的一些参考资料:

  

字符串(UTF-8编码的字符串或Unicode)。字符串用于   小字段,不以任何方式标记或分析。他们有个   硬限制略低于32K。

答案 1 :(得分:0)

添加此字段类型

<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.ElisionFilterFactory" articles="lang/contractions_fr.txt" ignoreCase="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" format="snowball" words="lang/stopwords_fr.txt" ignoreCase="true"/>
        <filter class="solr.FrenchLightStemFilterFactory"/>
      </analyzer>
    </fieldType>

为我工作