lucene 2.9.2.2一个很奇怪的问题,无法搜索关键字“a”,其他可以

时间:2011-09-20 02:47:57

标签: c# lucene.net

添加索引代码:

public class IndexManage
{
    public static void AddIndex(List<QuestionItem> itemList)
    {
        Analyzer analyzer =new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
        Lucene.Net.Store.FSDirectory fs = Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo("IndexDirectory"));
        IndexWriter writer =new IndexWriter(fs, analyzer,true,IndexWriter.MaxFieldLength.UNLIMITED);
        foreach (var item in itemList)
        {
            AddDocument(writer, item);
        }
        writer.Commit();
        writer.Optimize();
        writer.Close();
    }

    private static void AddDocument(IndexWriter writer, QuestionItem item)
    {
        Document document =new Document();
        document.Add(new Field("qid", item.QID.ToString(), Field.Store.YES, Field.Index.ANALYZED));
        document.Add(new Field("title", item.Title, Field.Store.YES,Field.Index.ANALYZED));
        document.Add(new Field("content", item.Content, Field.Store.YES, Field.Index.ANALYZED));
        document.Add(new Field("supply", item.Supply, Field.Store.YES, Field.Index.ANALYZED));
        writer.AddDocument(document);
    }
}

搜索代码:

public class SearchManage
    {
        public static List<QuestionItem> Search(string keyword)
        {
            Analyzer analyzer =new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
            Lucene.Net.Store.FSDirectory fs = Lucene.Net.Store.FSDirectory.Open(new DirectoryInfo("IndexDirectory"));
            IndexSearcher searcher =new IndexSearcher(fs,true);
            MultiFieldQueryParser parser =new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29,new string[] { "title", "content","supply" }, analyzer);
            parser.SetDefaultOperator(QueryParser.Operator.OR);
            Query query = parser.Parse(keyword);

            var hits = searcher.Search(query, 2500);
            List<QuestionItem> itemList =new List<QuestionItem>();
            for (int i =0; i < hits.scoreDocs.Length; i++)
            {
                var doc =searcher.Doc ( hits.scoreDocs[i].doc);
                itemList.Add(new QuestionItem() { 
                    QID=Int32.Parse(doc.Get("qid")),
                    Title=doc.Get("title"),
                    Content=doc.Get("content"),
                    Supply=doc.Get("supply")
                });
            }
            searcher.Close();
            return itemList;
        }
    }

QuestionItem模型是:

public class QuestionItem
{
    public int QID { get;set; }
    public string Title{get;set;}
    public string Content { get; set; }
    public string Supply { get; set; }
}

测试代码是:

public static void Show()
{
    AddIndex();
    List<QuestionItem> itemList = SearchManage.Search("a");
    Console.WriteLine("search result:");
    foreach (var item in itemList)
    {
        Console.WriteLine(item.QID +""+ item.Title +""+ item.Content +""+ item.Supply);
    }
}

private static void AddIndex()
{
    List<QuestionItem> itemList =new List<QuestionItem>() {
        new QuestionItem(){QID=1,Title="a",Content="ab",Supply="abc"},
        new QuestionItem(){QID=2,Title="b",Content="a",Supply="fds a"},
        new QuestionItem(){QID=3,Title="c",Content="c defg",Supply="as dfg hjk"},
        new QuestionItem(){QID=4,Title="d",Content="def a b",Supply="kjhgf ds a"},
        new QuestionItem(){QID=5,Title="e",Content="ef ab c",Supply="a sdf g hjkl"}
    };
    IndexManage.AddIndex(itemList);
}

现在的问题是: 搜索“a”,没有结果,但对于“ab”,“b”,“c”有结果,一个非常奇怪的问题,谁能帮帮我?

1 个答案:

答案 0 :(得分:2)

StandardAnalyzer使用默认停用词列表,其中一个是'a'。如果你不想要停用词,你可以使用带有空集的构造函数作为第二个参数:

Analyzer ana = new StandardAnalyzer(LUCENE_30, Collections.emptySet());

或.net这样:

Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29, new Hashtable());