Lucene搜索价值

时间:2016-02-16 06:41:41

标签: java lucene

package com.org.test;

import java.io.File;
import java.io.FileReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Testclass {

    public static void main(String[] args) 
    {
        Testclass tclass = new Testclass();
        tclass.searchExactWord("+water-");
    }

    private void searchExactWord(String singleWord) 
    {
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);        
        String dictionaryPath = "c://finaldictionary.txt";

        try
        {   
            //create Index          
            File dir = new File("c://spellchecker//");
            Directory indexDirectory = FSDirectory.open(dir);

            IndexWriter indexWriter = new IndexWriter(indexDirectory, new IndexWriterConfig(Version.LUCENE_40, analyzer));

            File dictFile = new File(dictionaryPath);
            FileReader freader = new FileReader(dictFile);

            Document document = new Document();

            FieldType fieldtype = new FieldType();
            fieldtype.setIndexed(true);
            fieldtype.setOmitNorms(true);
            fieldtype.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
            fieldtype.setStored(false);
            fieldtype.setTokenized(true);
            fieldtype.freeze();

            document.add(new Field("dictionary", freader, fieldtype));

            indexWriter.addDocument(document);

            indexWriter.close();

            //search Index
            IndexReader reader = DirectoryReader.open(indexDirectory);
            IndexSearcher searcher = new IndexSearcher(reader);

            TopScoreDocCollector collector = TopScoreDocCollector.create(5, true);

            QueryParser queryParser = new QueryParser(Version.LUCENE_40, "dictionary", analyzer);           
            Query query = queryParser.parse(singleWord.trim());

            searcher.search(query, collector);          
            ScoreDoc[] hits = collector.topDocs().scoreDocs;            

            for (int i = 0; i < hits.length; i++) 
            {
                System.out.println(hits[i].score);              
            }

            System.out.println(hits.length);

        }
        catch(Exception ex)
        {
            ex.printStackTrace();
        }
    }

}

我正在尝试使用Lucene进行基于字典的搜索。字典是文本文件(finaldictionary)格式。问题是,当我尝试搜索/查找时,Lucene进行模糊搜索,它只提供点击次数。 有没有办法确定lucene是否找到了确切的值 从字典?

1 个答案:

答案 0 :(得分:0)

执行模糊搜索。对于您提供的输入,结果查询将为:

+dictionary:water

它不会执行模糊匹配,它只会找到包含术语&#34; water&#34;的文档。

如果您希望在索引中找到术语&#34; + water - &#34;,那么您没有使用正确的分析器来支持它。 StandardAnalyzer旨在分离单词,并且主要消除功能。您必须以支持搜索要求的方式分析您的字段。

完成此操作后,您需要转义QueryParser个字符(在这种情况下为+),或者不要使用QueryParser完全,例如:

Query query = new TermQuery(new Term("dictionary", "+water-"));