我正在尝试创建一个应用了多个过滤器的自定义分析器。
问题是仅应用了最后一个过滤器(LowerCaseFilter)。
public class CustomAnalyzer : Analyzer
{
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer tokenizer = new KeywordTokenizer(reader);
//Remove basic stop words a, an, the, in, on etc
TokenStream result = new StopFilter(GlobalVariables.LuceneVersion, tokenizer, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
////Remove tile/tiles
CharArraySet stopWords = new CharArraySet(GlobalVariables.LuceneVersion, 1, true)
{
"test",
}
result = new StopFilter(GlobalVariables.LuceneVersion, tokenizer, stopWords);
//Make case insenstive
result = new LowerCaseFilter(GlobalVariables.LuceneVersion, tokenizer);
return new TokenStreamComponents(tokenizer, result);
}
}
答案 0 :(得分:1)
不要将令牌生成器传递到每个过滤器中,而要传递先前的过滤器。
Tokenizer tokenizer = new KeywordTokenizer(reader);
TokenStream result = new StopFilter(GlobalVariables.LuceneVersion, tokenizer, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
CharArraySet stopWords = new CharArraySet(GlobalVariables.LuceneVersion, 1, true)
result = new StopFilter(GlobalVariables.LuceneVersion, result, stopWords);
result = new LowerCaseFilter(GlobalVariables.LuceneVersion, result);
return new TokenStreamComponents(tokenizer, result);