使用filepath作为搜索字段时无法获得搜索结果 - Lucene.NET 2.9

时间:2012-03-04 22:45:34

标签: c# .net lucene.net

我的目标是索引文件的PATH,以便我可以在之后进行搜索。 我有以下代码:

 class Program
{
    static void Main(string[] args)
    {
        Directory directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex"));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
        var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
        writer.Optimize();
        writer.Commit();
        writer.Close();
        String text1 = "C:\\Users\\Marto\\Desktop\\folder1\\file1.txt";
        WriteDocument(text1);
        SearchSomething("C:\\Users\\Marto\\Desktop\\folder1\\file1.txt");
        Console.ReadLine();
    }

    private static void WriteDocument(String text)
    {
        Directory directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex"));
        string[] DEFAULT_STOP_WORDS = { };
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29, DEFAULT_STOP_WORDS);
        var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
        var doc = new Document();
        doc.Add(new Field("path", text, Field.Store.YES, Field.Index.NOT_ANALYZED));//text = "C:\\Users\\Marto\\Desktop\\folder1\\file1.txt"
        writer.AddDocument(doc);
        writer.Optimize();
        writer.Commit();
        writer.Close();
    }

    private static void SearchSomething(String searchText)
    {
        Directory directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex"));
        string[] DEFAULT_STOP_WORDS = { };
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29, DEFAULT_STOP_WORDS);
        var parser = new QueryParser(Version.LUCENE_29, "path", analyzer);
        Query query = parser.Parse(searchText);
        //searchText = "C:\\Users\\Marto\\Desktop\\folder1\\file1.txt"
        //but query = {C:usersmartodesktopfolder1file1.txt}
        var searcher = new IndexSearcher(directory, true);
        TopDocs topDocs = searcher.Search(query, 500);
        int results = topDocs.ScoreDocs.Length;
        Console.WriteLine("Found {0} results", results);
        for (int i = 0; i < results; i++)
        {
            ScoreDoc scoreDoc = topDocs.ScoreDocs[i];
            float score = scoreDoc.Score;
            int docId = scoreDoc.Doc;
            Document doc = searcher.Doc(docId);
            Console.WriteLine("Result num {0}, score {1}", i + 1, score);
            Console.WriteLine("Text found: {0}\r\n", doc.Get("path"));
        }
        searcher.Close();
        directory.Close();
    }

}

但是当我搜索“C:\ Users \ Marto \ Desktop \ folder1 \ file1.txt”时,查询会删除字符\并且不会返回任何结果。

我也尝试使用其他Analyzer,但我得到了相同的结果。 如果有任何可以帮助我的话,我会非常感激!

4 个答案:

答案 0 :(得分:0)

您可以按如下方式创建“路径”字段:

new Field("path", text, Field.Store.YES, Field.Index.NOT_ANALYZED)

你的最后一个论点(Field.Index.NOT_ANALYZED)告诉Lucene不要索引这个字段。 Lucene是基于索引的搜索引擎,因此没有索引意味着无法搜索。要解决此问题,只需将此参数更改为Field.Index.ANALYZED

new Field("path", text, Field.Store.YES, Field.Index.ANALYZED)

答案 1 :(得分:0)

这是因为您在索引时指定了Field.Index.NOT_ANALYZED,但是您在搜索时使用了分析器。

在lucene中,您需要始终使用索引数据的相同方法进行搜索。在创建QueryParser时,请使用KeywordAnalyzer而不是StandardAnalyzer,或在索引时将字段更改为Field.Index.ANALYZED

答案 2 :(得分:0)

好吧,最后我明白了。 解决方案是使用Field.Index.NOT_ANALYZED,以便分析器不处理它。然后我使用了BooleanQuery,我添加了TermQuery(而不是使用常规的Query),因此无需解析。

非常感谢您的想法。

答案 3 :(得分:0)

解决方案:

class Program
{
    static void Main(string[] args)
    {
        Directory directory = FSDirectory.Open(new DirectoryInfo(this.IndexPath));
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
        var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
        writer.Optimize();
        writer.Commit();
        writer.Close();
        String text1 = "C:\\Users\\Marto\\Desktop\\folder1\\file1.txt";
        WriteDocument(text1);
        SearchSomething("C:\\Users\\Marto\\Desktop\\folder1\\file1.txt");
        Console.ReadLine();
    }

    private static void WriteDocument(String text)
    {
        Directory directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex"));
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
        IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
        Document doc = new Document();
        doc.Add(new Field("path", text, Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.AddDocument(doc);
        writer.Optimize();
        writer.Commit();
        writer.Close();
    }

    private static void SearchSomething(String searchText)
    {
        Directory directory = FSDirectory.Open(new DirectoryInfo("LuceneIndex"));
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
        IndexSearcher searcher = new IndexSearcher(directory, true);

        int results = 0;
        if (searcher.MaxDoc() > 0)
        {
            BooleanQuery booleanQuery = new BooleanQuery();
            Lucene.Net.Search.Query query1 = new WildcardQuery(new Term("path", searchText));
            booleanQuery.Add(query1, BooleanClause.Occur.SHOULD);

            TopDocs topDocs = searcher.Search(booleanQuery, searcher.MaxDoc());
            results = topDocs.ScoreDocs.Length;

            Console.WriteLine("Found {0} results", results);
            for (int i = 0; i < results; i++)
            {
                ScoreDoc scoreDoc = topDocs.ScoreDocs[i];
                float score = scoreDoc.Score;
                int docId = scoreDoc.Doc;
                Document doc = searcher.Doc(docId);
                Console.WriteLine("Result num {0}, score {1}", i + 1, score);
                Console.WriteLine("Text found: {0}\r\n", doc.Get("path"));
            }

        }
        searcher.Close();
        directory.Close();
    }

}

感谢所有的想法!!!