下面的代码可以直接与Lucene 7.3.1一起运行
您只需要更改索引存储路径的路径即可。
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.RAMDirectory;
public class Example {
public static IndexWriter writer;
public static RAMDirectory idxDir;
public static SmartChineseAnalyzer analyzer;
public static void makeIndex() throws IOException {
FSDirectory fsDir = FSDirectory.open(Paths.get("C:\\Users\\gt\\Desktop\\example"));
idxDir = new RAMDirectory(fsDir, IOContext.DEFAULT);
analyzer = new SmartChineseAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setSimilarity(new BM25Similarity());
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
writer = new IndexWriter(idxDir, iwc);
List<String> listSent = new ArrayList<String>();
listSent.add("金古江湖是最好玩的金庸游戏1");
listSent.add("金古江湖是最好玩的金庸游戏2");
int id = 0;
for (String sent : listSent) {
id++;
Document doc = new Document();
doc.add(new TextField("questionType", "A", Field.Store.YES));
doc.add(new TextField("questionId", "62650ACA7FEB446B9140B088EE7C2FF0", Field.Store.YES));
doc.add(new TextField("question", sent.trim(), Field.Store.YES));
writer.addDocument(doc);
}
writer.commit();
writer.close();
}
public static void main(String[] args) throws IOException, ParseException {
makeIndex();
String[] stringQuery = { "A", "62650ACA7FEB446B9140B088EE7C2FF0aaaa", "金古江湖" };
String[] fields = { "questionType", "questionId", "question" };
Occur[] occ = { Occur.MUST, Occur.MUST, Occur.MUST };
// Query query = new TermQuery(new Term("questionId","1"));
Query query = MultiFieldQueryParser.parse(stringQuery, fields, occ, analyzer);
TopDocs results = null;
IndexReader reader = DirectoryReader.open(idxDir);
IndexSearcher searcher = new IndexSearcher(reader);
results = searcher.search(query, 5);
ScoreDoc[] hits = results.scoreDocs;
for (int i = 0; i < hits.length; ++i) {
Document doc = searcher.doc(hits[i].doc);
String strDocSent = doc.get("question");
System.out.println(strDocSent);
}
}
}
在代码中,我添加了两个文档并为其创建索引。
然后我搜索文档。
我希望questionId
字段完全匹配,但现在不匹配。
如何搜索多个词,其中一个词必须完全匹配, 并且其他词的搜索策略可以是模糊的。
答案 0 :(得分:0)
它并没有执行任何形式的模糊搜索,而是您的分析仪正在尝试将字段拆分成单词。您的questionId 62650ACA7FEB446B9140B088EE7C2FF0aaaa被拆分为以下令牌:
由于您希望它是完全匹配的,并且通常表现得像一个ID,因此您不应该对其应用通常的分析器。通常,这样的ID应该用StringField
而不是TextField
索引,因为不分析StringFields。
在查询方面,您可以只使用一个简单的TermQuery,然后通过BooleanQuery将其与查询的其余部分合并。或者,如果您想将其用于QueryParser,则需要使用PerFieldAnalyzerWrapper,类似于:
Map<String,Analyzer> analyzerlist = new HashMap<>();
analyzerlist.put("questionId", new KeywordAnalyzer());
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SmartChineseAnalyzer(), analyzerlist);