我们最近升级了我们正在处理的CMS并且不得不迁移 Lucene.net V2.3.1.301至V2.9.4.1
我们在原始解决方案中使用了CustomScoreQuery,它使用内置查询无法实现各种过滤。 (GEO,多日期范围等)
自从从旧版本迁移到新版本的Lucene后,它开始返回文档,即使我们检查结果时它们的分数为0甚至是负数
public LuceneTest()
{
Lucene.Net.Store.Directory luceneIndexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(@"C:\inetpub\wwwroot\Project\build\Data\indexes\all_site_search_en"));
Analyzer analyzer = new WhitespaceAnalyzer();
IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory, true);
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_23, "", analyzer);
parser.SetAllowLeadingWildcard(true);
Query dateQuery = ComposeEventDateQuery(new DateTime(2015, 11, 23), new DateTime(2015,11,25), searcher);
BooleanQuery combinedQuery = new BooleanQuery();
BooleanQuery.SetMaxClauseCount(10000);
combinedQuery.Add(dateQuery, BooleanClause.Occur.MUST);
TopDocs hitsFound = searcher.Search(dateQuery, 1000);
System.Console.WriteLine(String.Format("Found {0} matches with the date filters", hitsFound.TotalHits));
System.Console.ReadKey();
}
public static Query ComposeEventDateQuery(DateTime fromDate, DateTime ToDate, IndexSearcher MySearcher)
{
BooleanQuery query = new BooleanQuery();
Query boolQuery3A = new TermQuery(new Lucene.Net.Index.Term("_language", "en"));
Query eventDateQuery = new EventDateQuery1(boolQuery3A, MySearcher, fromDate, ToDate, false);
query.Add(eventDateQuery, BooleanClause.Occur.MUST);
return query;
}
public class EventDateQuery1 : CustomScoreQuery
{
private Searcher _searcher;
private DateTime _fromDT;
private DateTime _toDT;
private readonly string _dateFormat = "yyyyMMdd";
private bool _shouldMatchNonEvents = true;
public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, bool shouldMatchNonEvents, int dateRange = 14)
: base(subQuery)
{
_searcher = searcher;
_fromDT = fromDT.Date;
_toDT = fromDT.AddDays(dateRange).Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}
public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents)
: base(subQuery)
{
_searcher = searcher;
_fromDT = fromDT.Date;
_toDT = toDT.Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}
public override string ToString()
{
return GenerateUniqueKey();
}
public override string ToString(string field)
{
return GenerateUniqueKey();
}
public override string Name()
{
return GenerateUniqueKey();
}
public string GenerateUniqueKey()
{
return String.Format("EventDateQuery_{0}_{1}_{2}", _fromDT.ToString(_dateFormat), _toDT.ToString(_dateFormat), _shouldMatchNonEvents.ToString());
}
protected override CustomScoreProvider GetCustomScoreProvider(IndexReader reader)
{
return new EventDateQueryCustomScoreProvider(reader, _fromDT, _toDT, _shouldMatchNonEvents);
}
}
public class EventDateQueryCustomScoreProvider : CustomScoreProvider
{
private DateTime _fromDT;
private DateTime _toDT;
private readonly string _dateFormat = "yyyyMMdd";
private bool _shouldMatchNonEvents = true;
private float NoMatchFloat = 0f;
private float MatchFloat = 1f;
public EventDateQueryCustomScoreProvider(IndexReader reader, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents)
: base(reader)
{
_fromDT = fromDT.Date;
_toDT = toDT.Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}
public override float CustomScore(int doc, float subQueryScore, float valSrcScore)
{
return myScore(doc);
}
public override float CustomScore(int doc, float subQueryScore, float[] valSrcScores)
{
return myScore(doc);
}
public float myScore(int doc)
{
//Below is a fake implementation just to prove the run
if (doc < 10)
{
return 1F;
}
else
{
return 0F;
}
}
}
关于如何让Lucene不归还这些文件的任何建议都会很棒。 提前谢谢。
答案 0 :(得分:6)
您可以编写自定义Collector
,仅收集>0
分数的文档。然后将此收集器的实例传递给Search()
方法。有一个Collector
here。
但是,如果您不需要所有结果,则documentation建议不要使用此解决方案。这可能就是这种情况,因为您只选择了前1000个文档。