我试图在Lucene中使用PrefixQuery来实现自动完成。我对我认为应该起作用的东西做了一个简单的测试,但事实并非如此。我正在索引一些简单的字符串并使用KeywordAnalyzer确保它们没有被标记化,但我的搜索仍然不匹配任何东西。我应该如何索引和搜索字段以获得前缀匹配?
这是我用来测试的单元测试。除了autocomplete和singleTerm方法之外,一切都通过了。
package com.sample.index;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.HashMap;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertTrue;
public class TestIndexStuff {
public static final String FIELD_AUTOCOMPLETE = "autocomplete";
public static final String FIELD_NORMAL = "normal";
private IndexSearcher searcher;
private PerFieldAnalyzerWrapper analyzer;
@Before
public void init() throws IOException {
RAMDirectory idx = new RAMDirectory();
HashMap<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
fieldAnalyzers.put(FIELD_AUTOCOMPLETE, new KeywordAnalyzer());
analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer);
IndexWriter writer = new IndexWriter(idx, config);
addDocs(writer);
writer.close();
searcher = new IndexSearcher(IndexReader.open(idx));
}
private void addDocs(IndexWriter writer) throws IOException {
for (String text : new String[]{"Fred Rogers", "Toni Reed Preckwinkle", "Randy Savage", "Kathryn Janeway", "Madonna", "Fred Savage"}) {
Document doc = new Document();
doc.add(new Field(FIELD_NORMAL, text, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(FIELD_AUTOCOMPLETE, text, Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
}
}
@Test
public void prefixParser() throws ParseException {
Query prefixQuery = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fre*");
assertTrue(prefixQuery instanceof PrefixQuery);
Query normalQuery = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fred");
assertFalse(normalQuery instanceof PrefixQuery);
}
@Test
public void normal() throws ParseException, IOException {
Query query = new QueryParser(Version.LUCENE_35, FIELD_NORMAL, analyzer).parse("Fred");
TopDocs topDocs = searcher.search(query, 10);
assertEquals(2, topDocs.totalHits);
}
@Test
public void autocomplete() throws IOException, ParseException {
Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fre*");
TopDocs topDocs = searcher.search(query, 10);
assertEquals(2, topDocs.totalHits);
}
@Test
public void singleTerm() throws ParseException, IOException {
Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Mado*");
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
}
}
编辑:添加修改后的代码,以便稍后阅读此内容以更改感谢@jpountz后显示完整测试。我选择将它们作为小写字母索引,而不是将事情视为混合案例。我还添加了一个单元测试,以确保中间的术语不匹配,因为这应该只匹配以搜索词开头的内容。
package com.sample.index;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.HashMap;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertTrue;
public class TestIndexStuff {
public static final String FIELD_AUTOCOMPLETE = "autocomplete";
public static final String FIELD_NORMAL = "normal";
private IndexSearcher searcher;
private PerFieldAnalyzerWrapper analyzer;
@Before
public void init() throws IOException {
RAMDirectory idx = new RAMDirectory();
HashMap<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>();
fieldAnalyzers.put(FIELD_AUTOCOMPLETE, new KeywordAnalyzer());
analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer);
IndexWriter writer = new IndexWriter(idx, config);
addDocs(writer);
writer.close();
searcher = new IndexSearcher(IndexReader.open(idx));
}
private void addDocs(IndexWriter writer) throws IOException {
for (String text : new String[]{"Fred Rogers", "Toni Reed Preckwinkle", "Randy Savage", "Kathryn Janeway", "Madonna", "Fred Savage"}) {
Document doc = new Document();
doc.add(new Field(FIELD_NORMAL, text, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(FIELD_AUTOCOMPLETE, text.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
}
}
@Test
public void prefixParser() throws ParseException {
Query prefixQuery = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fre*");
assertTrue(prefixQuery instanceof PrefixQuery);
Query normalQuery = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fred");
assertFalse(normalQuery instanceof PrefixQuery);
}
@Test
public void normal() throws ParseException, IOException {
Query query = new QueryParser(Version.LUCENE_35, FIELD_NORMAL, analyzer).parse("Fred");
TopDocs topDocs = searcher.search(query, 10);
assertEquals(2, topDocs.totalHits);
}
@Test
public void autocomplete() throws IOException, ParseException {
Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Fre*");
TopDocs topDocs = searcher.search(query, 10);
assertEquals(2, topDocs.totalHits);
}
@Test
public void beginningOnly() throws ParseException, IOException {
Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("R*");
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
}
@Test
public void singleTerm() throws ParseException, IOException {
Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Mado*");
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
}
}
答案 0 :(得分:3)
默认情况下,QueryParser会降低特殊查询的条款(特别是前缀查询)。要禁用此功能,请参阅QueryParser.setLowercaseExpandedTerms。
替换
Query query = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer).parse("Mado*");
与
QueryParser qp = new QueryParser(Version.LUCENE_35, FIELD_AUTOCOMPLETE, analyzer);
qp.setLowercaseExpandedTerms(false);
Query query = qp.parse("Mado*");
修复你的测试。