我是Apache Lucene的新手。我试图使用Apache Lucene提供有效的简单搜索代码。通过许多文本文件搜索单词。我想通过每个文本文件和单词之前得到这个单词的位置,之后。我想要快速的方法。这是我的代码,它不能识别找到结果之前/之后的单词。在此先感谢。此代码也很好吗?谈论效率。
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Store;
import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Scanner;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
public class Main {
public static void main(String[] args) {
indexDirectory();
String userInput = "";
Scanner getInputWord = new Scanner(System.in);
System.out.print("Please type a word to search for.\nInput: ");
userInput = getInputWord.next();
search(userInput);
System.out.print("Search again? (type Y or N) Y = Yes, N = No: ");
userInput = getInputWord.next();
while (!userInput.equals("n") && !userInput.equals("N")) {
System.out.print("Input: ");
userInput = getInputWord.next();
search(userInput);
System.out.print("Search again? (Y or N): ");
userInput = getInputWord.next();
}
}
private static void indexDirectory() {
// Apache Lucene Indexing Directory .txt files
try {
Path path;
path = Paths.get("C:\\Users\\Yasser\\Desktop\\index");
Directory directory = FSDirectory.open(path);
IndexWriterConfig config = new IndexWriterConfig(new SimpleAnalyzer());
IndexWriter indexWriter = new IndexWriter(directory, config);
indexWriter.deleteAll();
File f = new File("C:\\seminar\\test_data"); // current directory
for (File file : f.listFiles()) {
// System.out.println("indexed " + file.getCanonicalPath());
Document doc = new Document();
doc.add(new TextField("FileName", file.getName(), Store.YES));
FileInputStream is = new FileInputStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
StringBuffer stringBuffer = new StringBuffer();
String line = null;
while ((line = reader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
reader.close();
doc.add(new TextField("contents", stringBuffer.toString(), Store.YES));
indexWriter.addDocument(doc);
}
indexWriter.close();
directory.close();
System.out.println("indexing finished");
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
private static void search(String text) {
// Apache Lucene searching text inside .txt files
try {
Path path = Paths.get("C:\\Users\\yasser\\Desktop\\index");
Directory directory = FSDirectory.open(path);
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
FuzzyQuery query = new FuzzyQuery(new Term("contents", text), 2);
TopDocs topDocs = indexSearcher.search(query, 10);
int i = 0;
if (topDocs.totalHits > 0) {
System.out.println("Found " + topDocs.totalHits + " result(s).");
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
i = i + 1;
System.out.println("Result #" + i + " " + document.get("FileName"));
}
} else {
System.out.println("No maches found!");
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}