我在.NET中对lucene.net索引进行排序时遇到了问题。我尝试了stackoverflow上的几乎所有解决方案,并寻找谷歌的答案。我正在使用Lucene.NET 2.9.2和ASP.NET 2.0。我想在sql中对字符串进行排序,你可以输入'order by title desc [asc]'
我会告诉你我的代码,我希望有人可以帮助我。
//Here I create Index with some fields
doc.Add(new Field("prod_id",row["prod_id"].ToString(),Field.Store.YES,Field.Index.ANALYZED));
doc.Add(new Field("prod_title", row["prod_title"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_desc", row["prod_desc"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_author", row["prod_author"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_publisher", row["prod_publisher"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_price", row["prod_price"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
//Then next I try to do search with sort option:
//method for return approciate Sort object
private static Sort SetSortForLucene(string _sort)
{
Sort sort;
switch (_sort)
{
case "UnitPriceGorss":
sort = new Sort(new SortField("prod_price",SortField.DOUBLE,false);
break;
case "UnitPriceGorssDESC":
sort = new Sort(new SortField("prod_price",SortField.DOUBLE,true);
break;
case "Title":
//not working
sort = new Sort(new SortField("prod_title", SortField.STRING, true));
break;
case "TitleDESC":
//not working
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
case "":
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
default:
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
}
return sort;
}
//Inside my query of lucene method:
StandardAnalyzer analizer = new StandardAnalyzer(Version.LUCENE_29);
IndexReader reader =IndexReader.Open(IndexPath);
Searcher searcher = new IndexSearcher(reader);
//Here call for Sort object
Sort sort = SetSortForLucene(_sort);
TopFieldDocCollector collector = new TopFieldDocCollector(reader, sort, pageSize);
//Find which document field need to me asked in QueryParser object
string _luceneField = "";
if (luceneField.Contains("_"))
_luceneField = luceneField;
else
switch (luceneField)
{
case "Title": _luceneField = "prod_title"; break;
case "Description": _luceneField = "prod_desc"; break;
case "Author": _luceneField = "prod_author"; break;
case "Publisher": _luceneField = "prod_publisher"; break;
default: _luceneField = "prod_title"; break;
}
QueryParser parser = new QueryParser(_luceneField, analizer);
Query query = parser.Parse(luceneQuery);
ScoreDoc[] hits;
searcher.Search(query,collector);
//Obtaining top records from search but without any sort.
hits = collector.TopDocs().scoreDocs;
foreach (ScoreDoc hit in hits)
{
Document doc = searcher.Doc(hit.doc);
string a = doc.Get("prod_id");
int id = 0;
if (hit.score > score)
{
if (int.TryParse(doc.Get("prod_id"), out id))
tmpId.Add(id);
}
}
//I also define stop words for full text searching and i think this is
//real cause of problem with sorting.
System.String[] stopWords = new System.String[]{"a","że","w","przy","o","bo","co","z","za","ze","ta","i","no","do"};
我用过这个link in stackoverflow. 和this pretty one link解决我的问题,但排序失败,我不知道我的代码有什么问题。
几天后我终于找到了解决方案。我想要排序的字段在表示字符串值时不会被标记化。
例如,当我想按标题(ASCENDING / DESCENDING)对产品进行排序时,你应该这样做:
doc.Add(new Field(Product.PROD_TITLE_SORT, row["prod_title"].ToString().Replace(" ", "_") + "_" + row[Product.PROD_ID].ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
我没有得到的是为什么这个字段不存储而不是analizyng,因此lucene.net可以通过这个添加的字段进行排序。这个排序领域甚至不在索引中!!我查看了lukeall-1.0.1.jar索引浏览器。
其次,您需要创建一个正确的排序方法:
private static Sort SetSortForLucene(string _sort)
{
Sort sort;
_sort = !string.IsNullOrEmpty(_sort) ? _sort : "";
switch (_sort)
{
case "UnitPriceGorss":
sort = new Sort(new SortField(PROD_PRICE, SortField.DOUBLE, false));
break;
case "UnitPriceGorssDESC":
sort = new Sort(new SortField(PROD_PRICE, SortField.DOUBLE, true));
break;
case "Title":
//not it works perfectly.
sort = new Sort(new SortField(PROD_TITLE_SORT, SortField.STRING, true));
break;
case "TitleDESC":
//not it works perfectly.
sort = new Sort(new SortField(PROD_TITLE_SORT, SortField.STRING, false));
break;
case ""://Here is default sorting behavior. It get's result according to Lucene.NET search result score.
sort = new Sort(SortField.FIELD_SCORE);
break;
default:
sort = new Sort(SortField.FIELD_SCORE);
break;
}
return sort;
}
让我真正怀疑的是,当在lucene全文索引中索引字段时,sort可以使用SortField.DOUBLE。
我希望这篇文章能帮助那些在排序方面遇到类似问题的人。
答案 0 :(得分:6)
除非您在查询中返回数据,否则不需要存储该字段。但它仍然被添加到索引中。
您不分析要排序的字段的原因是分析器将字段分成单独的术语,这使得排序非常困难,因为文档的索引中将包含多个单词,这显然无法排序反对整个指数。这适用于所有字段类型,无论它们是单个术语还是不是。
我相信你可以存储该字段,但除非你想在查询中返回它,否则没有必要。
答案 1 :(得分:4)
我怀疑有关排序的一件重要事情。
它不适用于标记化(分析)数据。