我一直在尝试向我的网站添加一个使用Apache Solr进行搜索的建议功能。在大多数情况下,一切都像我期望的那样有效。我试图用撇号搜索单词时遇到麻烦(例如,不要,不是,不是等等)。
出于某种原因,建议者将撇号上的单词拆分,然后返回整理结果。例如,"不要"回报"不要"你&#34 ;;所以它提供了一个整理,其中" - " - """" t" - >"& #34;和" y" - >"你"。
我已经使用了分析仪,它看起来不像标记器/过滤器在撇号上分裂,所以我不确定为什么会发生这种情况。任何指针都将非常感谢!
schema.xml中:
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" omitNorms="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="naughtywords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="naughtywords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldType>
solrconfig.xml中:
<searchComponent class="solr.SpellCheckComponent" name="suggest">
<lst name="spellchecker">
<str name="name">suggest</str>
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
<str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookupFactory</str>
<str name="field">spellCheckContent</str>
<float name="threshold">0.001</float>
<str name="buildOnCommit">true</str>
</lst>
</searchComponent>
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
<lst name="defaults">
<str name="spellcheck">true</str>
<str name="spellcheck.dictionary">suggest</str>
<str name="spellcheck.onlyMorePopular">true</str>
<str name="spellcheck.count">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.maxCollations">5</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">10</str>
<str name="df">spellCheckContent</str>
</lst>
<arr name="components">
<str>suggest</str>
<str>query</str>
</arr>
</requestHandler>
建议结果:
"spellcheck": {
"suggestions": [
"don",
{
"numFound": 4,
"startOffset": 0,
"endOffset": 3,
"suggestion": [
"don't",
"don’t",
"done",
"donating"
]
},
"t",
{
"numFound": 5,
"startOffset": 4,
"endOffset": 5,
"suggestion": [
"the",
"to",
"this",
"talk",
"that"
]
},
"y",
{
"numFound": 4,
"startOffset": 6,
"endOffset": 7,
"suggestion": [
"you",
"your",
"years",
"year",
]
}
}