我创建了一个Lucene索引,想要查找包含某个单词或短语的所有文档。 当我这样做时,我认识到,当包含该单词的文本越长时,分数越低。
如何创建仅检查文档/字段中是否存在单词的查询?
我是如何创建索引的
public static Directory CreateIndex(IEnumerable<WorkItemDto> workItems)
{
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
Directory index = new RAMDirectory();
IndexWriter writer = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
foreach (WorkItemDto workItemDto in workItems)
{
Document doc = new Document();
doc.Add(new Field("Title", workItemDto.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
//doc.Add(new NumericField("ID", Field.Store.YES, true).SetIntValue(workItemDto.Id));
writer.AddDocument(doc);
}
writer.Dispose();
return index;
}
这就是我创建查询的方式:
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
Query query = new QueryParser(Version.LUCENE_30, "Title", analyzer).Parse("Some");
IndexSearcher searcher = new IndexSearcher(indexDir);
TopDocs docs = searcher.Search(query, 10);
ScoreDoc[] hits = docs.ScoreDocs;