public class CustomAnalyzer extends Analyzer {
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
@Override
protected Analyzer.TokenStreamComponents createComponents(final String fieldName,final Reader reader) {
final ClassicTokenizer src = new ClassicTokenizer(getVersion(), reader);
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new ShingleFilter(src,2,3);
tok = new ClassicFilter(tok);
tok = new LowerCaseFilter(tok);
// tok = new SynonymFilter(tok,SynonymDictionary.getSynonymMap(),true);
return new Analyzer.TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) throws IOException {
src.setMaxTokenLength(CustomAnalyzer.this.maxTokenLength);
super.setReader(reader);
}
};
}
}
public class Test {
public static void main(String[] args) throws Exception {
Directory dir = new NIOFSDirectory(new File("/home/local/test"));
IndexReader indexReader = DirectoryReader.open(dir);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
CustomAnalyzer analyzer1 = new CustomAnalyzer();
TokenStream ts=new CustomSynonymAnalyzer().tokenStream("n",new StringReader("cup board"));
ts.reset();
System.out.println("Tokens are :");
while (ts.incrementToken()) {
System.out.print(ts.getAttribute(CharTermAttribute.class) + ", ");
}
QueryParser parser = new QueryParser("n", analyzer1);
Query query = null;
query = parser.parse("cup board");
System.out.println("\nQuery is");
System.out.println(query.toString());
}
}
我正在使用Lucene 4.10.4。上面代码的输出是,
Tokens are :
cup, cup board, board
Query is
n:cup n:board
我希望得到的查询是 n:cup n:board n:cup board 。但shingle filter形成的令牌未附加在查询中。我只得到 n:cup n:board 。我的错误在哪里?
答案 0 :(得分:0)
这些令牌不会被分析器拆分,它们会被QueryParser语法拆分。它们是单独的查询子句,而不是单独的术语,因为子句用空格分隔。
尝试使用词组查询,以查看差异:parser.parse("\"cup board\"");