package com.org.test;
import java.io.File;
import java.io.FileReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Testclass {
public static void main(String[] args)
{
Testclass tclass = new Testclass();
tclass.searchExactWord("+water-");
}
private void searchExactWord(String singleWord)
{
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
String dictionaryPath = "c://finaldictionary.txt";
try
{
//create Index
File dir = new File("c://spellchecker//");
Directory indexDirectory = FSDirectory.open(dir);
IndexWriter indexWriter = new IndexWriter(indexDirectory, new IndexWriterConfig(Version.LUCENE_40, analyzer));
File dictFile = new File(dictionaryPath);
FileReader freader = new FileReader(dictFile);
Document document = new Document();
FieldType fieldtype = new FieldType();
fieldtype.setIndexed(true);
fieldtype.setOmitNorms(true);
fieldtype.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
fieldtype.setStored(false);
fieldtype.setTokenized(true);
fieldtype.freeze();
document.add(new Field("dictionary", freader, fieldtype));
indexWriter.addDocument(document);
indexWriter.close();
//search Index
IndexReader reader = DirectoryReader.open(indexDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(5, true);
QueryParser queryParser = new QueryParser(Version.LUCENE_40, "dictionary", analyzer);
Query query = queryParser.parse(singleWord.trim());
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++)
{
System.out.println(hits[i].score);
}
System.out.println(hits.length);
}
catch(Exception ex)
{
ex.printStackTrace();
}
}
}
我正在尝试使用Lucene进行基于字典的搜索。字典是文本文件(finaldictionary)格式。问题是,当我尝试搜索/查找时,Lucene进行模糊搜索,它只提供点击次数。 有没有办法确定lucene是否找到了确切的值 从字典?
答案 0 :(得分:0)
不执行模糊搜索。对于您提供的输入,结果查询将为:
+dictionary:water
它不会执行模糊匹配,它只会找到包含术语&#34; water&#34;的文档。
如果您希望在索引中找到术语&#34; + water - &#34;,那么您没有使用正确的分析器来支持它。 StandardAnalyzer
旨在分离单词,并且主要消除功能。您必须以支持搜索要求的方式分析您的字段。
完成此操作后,您需要转义QueryParser
个字符(在这种情况下为+
),或者不要使用QueryParser
完全,例如:
Query query = new TermQuery(new Term("dictionary", "+water-"));