Lucene .Net SetBoost on Field不会影响结果

时间:2012-10-08 20:33:32

标签: .net lucene

我在为文档添加字段时编写了以下代码来测试SetBoost方法。

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Lucene;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Directory = Lucene.Net.Store.Directory;
using Version = Lucene.Net.Util.Version;

namespace LuceneTest
{
    public class LuceneTest
    {
        static void Main(string[] args)
        {
            var product1 = new Document();
            product1.Add(new Field("Id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title1 = new Field("title", "Special One", Field.Store.YES, Field.Index.ANALYZED);
            title1.SetBoost(2f);
            product1.Add(title1);
            product1.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));

            var product2 = new Document();
            product2.Add(new Field("Id", "2", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title2 = new Field("title", "Special Two", Field.Store.YES, Field.Index.ANALYZED);
            title2.SetBoost(2f);
            product2.Add(title2);
            product2.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));

            var product3 = new Document();
            product3.Add(new Field("Id", "3", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title3 = new Field("title", "Normal One", Field.Store.YES, Field.Index.ANALYZED);
            title3.SetBoost(2f);
            product3.Add(title3);
            product3.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));

            var product4 = new Document();
            product4.Add(new Field("Id", "4", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title4 = new Field("title", "Normal Two", Field.Store.YES, Field.Index.ANALYZED);
            title4.SetBoost(2f);
            product4.Add(title4);
            product4.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));

            var product5 = new Document();
            product5.Add(new Field("Id", "5", Field.Store.YES, Field.Index.NOT_ANALYZED));
            var title5 = new Field("title", "Special Three", Field.Store.YES, Field.Index.ANALYZED);
            title5.SetBoost(2f);
            product5.Add(title5);
            product5.Add(new Field("synopsis", "normal synopsis", Field.Store.YES, Field.Index.ANALYZED));

            Directory directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\Lucene"));
            Analyzer analyzer = new StandardAnalyzer();
            var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.AddDocument(product1);
            writer.AddDocument(product2);
            writer.AddDocument(product3);
            writer.AddDocument(product4);
            writer.AddDocument(product5);
            writer.Optimize();
            writer.Close();

            Console.WriteLine("searching...");
            var indexReader = IndexReader.Open(directory, true);
            var indexSearcher = new IndexSearcher(indexReader);

            var booleanQuery1 = new BooleanQuery();
            booleanQuery1.Add(new BooleanClause(new PrefixQuery(new Term("title", "special")), BooleanClause.Occur.SHOULD));
            booleanQuery1.Add(new BooleanClause(new PrefixQuery(new Term("synopsis", "special")), BooleanClause.Occur.SHOULD));

            var booleanQuery2 = new BooleanQuery();
            booleanQuery2.Add(new BooleanClause((Query)booleanQuery1, BooleanClause.Occur.MUST));
            TopDocs results = indexSearcher.Search(booleanQuery2, (Filter)null, 200);
            var hits = results.ScoreDocs;

            foreach (var hit in hits)
            {
                var document = indexSearcher.Doc(hit.doc);
                Console.WriteLine(document.Get("Id") + " " + document.Get("title") + " " + hit.score);
            }
            Console.WriteLine("done...");
            Console.ReadLine();
        }

    }   

}

我正在使用Lucene版本2.9.4.1。我在标题字段上设置了提升。当我在标题和概要字段中搜索“特殊”一词时,我希望产品1,2和5位于顶部,但我得到以下内容:

searching...
1 Special One 1.414214
2 Special Two 1.414214
3 Normal One 0.3535534
4 Normal Two 0.3535534
5 Special Three 0.3535534
done...

产品5与产品3和4具有相同的分数,即使它的标题中有“特殊”一词,也不是概要。

任何帮助或想法将不胜感激。 感谢

2 个答案:

答案 0 :(得分:4)

我认为问题在于您使用的是PrefixQueries。前缀查询被重写为常量评分查询。您可以自己设置重写方法,例如:

PrefixQuery pquery = new PrefixQuery(new Term("title", "special"));
pquery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);

或者您可以尝试使用TermQuery而不是PrefixQuery。无论哪种方式,您都应该看到场级提升生效。

哦,请注意,如果你想了解为什么结果会按照它们的方式得分,你应该看看Searcher.explain。评分变得复杂,这是一个非常方便的工具,用于理解和调整它。

答案 1 :(得分:0)

在Lucene In Action一书中,第二版,第49页,第2.5.2节“提升字段”但请记住,当您想要更改字段或文档的提升时,您必须完全删除然后阅读整个文档,或者使用updateDocument方法,它执行相同的操作“。

当您使用相同的索引文件进行测试时,我认为您需要在打开索引之前调用writer.updateDocument。