用Lucene开始匹配字符串的索引

时间:2013-09-08 11:34:03

标签: java string-matching lucene fuzzy-search

我正在使用Lucene在大文本文件中查找模糊匹配字符串。我的问题是我需要的是匹配的起始和结束索引。我该如何实现它?我在我的java代码中使用Lucene,它的版本是3。

这是我当前的代码,它没有给我任何索引,只是说明找到了多少个实例。我查看了一些关于突出显示的问题,但没有一个在我的代码中有效。这样的课程是否存在?!!在我看来,它是一类javax,而不是这个工具,是吗?

也许值得一提的是,我的情况目前还不是很复杂,查询和主要文本之间的唯一区别是\ n \ r \ n \ t \ t \ t \ t空间或类似的东西,并且没有字符的变化(如漫游)在主要文本与查询中的rom)。但是,由于我以后可能需要更复杂的项目,我更喜欢使用Lucene。

TNX,

package pkg2_lucene_fuzzy_search;

    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Iterator;
    import org.apache.lucene.queryParser.*;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopScoreDocCollector;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.LockObtainFailedException;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.store.RAMDirectory;
    import org.apache.lucene.util.Version;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;

    public class LuceneSimple {

private static void addDoc(IndexWriter w, String value) throws IOException {
    Document doc = new Document();
    doc.add(new Field("title", value, Field.Store.YES, Field.Index.ANALYZED));
    w.addDocument(doc);
}

public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {


    String whole_novel = "my text";


    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

    Directory index = new RAMDirectory();


    IndexWriter w = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

    w.setRAMBufferSizeMB(200);
    System.out.println(index.getClass() + " RamBuff:" + w.getRAMBufferSizeMB());


    addDoc(w, whole_novel);
    w.close();


        String querystr = "\"Dear Mary!\"";            

        Query q = new QueryParser(Version.LUCENE_30, "title",analyzer).parse(querystr);


        //search
        int hitsPerPage = 400;

        IndexSearcher searcher = new IndexSearcher(index, true);

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage,true);

        searcher.search(q, collector);

        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        System.out.println("Found " + hits.length + " hits.");
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d =searcher.doc(docId);                                           

            System.out.println((i + 1) + ". " + d.get("title"));
        }

        searcher.close();
    }

   }

0 个答案:

没有答案