Question

我是Apache Lucene的新手。我试图使用Apache Lucene提供有效的简单搜索代码。通过许多文本文件搜索单词。我想通过每个文本文件和单词之前得到这个单词的位置，之后。我想要快速的方法。这是我的代码，它不能识别找到结果之前/之后的单词。在此先感谢。此代码也很好吗？谈论效率。

import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Store;
import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Scanner;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;

public class Main {

public static void main(String[] args) {
    indexDirectory();

    String userInput = "";
    Scanner getInputWord = new Scanner(System.in);

    System.out.print("Please type a word to search for.\nInput: ");
    userInput = getInputWord.next();
    search(userInput);

    System.out.print("Search again? (type Y or N) Y = Yes, N = No: ");
    userInput = getInputWord.next();
    while (!userInput.equals("n") && !userInput.equals("N")) {
        System.out.print("Input: ");
        userInput = getInputWord.next();
        search(userInput);
        System.out.print("Search again? (Y or N): ");
        userInput = getInputWord.next();
    }
}

private static void indexDirectory() {
    // Apache Lucene Indexing Directory .txt files
    try {
        Path path;
        path = Paths.get("C:\\Users\\Yasser\\Desktop\\index");
        Directory directory = FSDirectory.open(path);

        IndexWriterConfig config = new IndexWriterConfig(new SimpleAnalyzer());
        IndexWriter indexWriter = new IndexWriter(directory, config);
        indexWriter.deleteAll();
        File f = new File("C:\\seminar\\test_data"); // current directory
        for (File file : f.listFiles()) {
            // System.out.println("indexed " + file.getCanonicalPath());
            Document doc = new Document();
            doc.add(new TextField("FileName", file.getName(), Store.YES));

            FileInputStream is = new FileInputStream(file);
            BufferedReader reader = new BufferedReader(new InputStreamReader(is));
            StringBuffer stringBuffer = new StringBuffer();
            String line = null;
            while ((line = reader.readLine()) != null) {
                stringBuffer.append(line).append("\n");
            }
            reader.close();
            doc.add(new TextField("contents", stringBuffer.toString(), Store.YES));
            indexWriter.addDocument(doc);
        }
        indexWriter.close();
        directory.close();
        System.out.println("indexing finished");
    } catch (Exception e) {
        // TODO: handle exception
        e.printStackTrace();
    }
}

private static void search(String text) {
    // Apache Lucene searching text inside .txt files
    try {
        Path path = Paths.get("C:\\Users\\yasser\\Desktop\\index");

        Directory directory = FSDirectory.open(path);
        IndexReader indexReader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        FuzzyQuery query = new FuzzyQuery(new Term("contents", text), 2);
        TopDocs topDocs = indexSearcher.search(query, 10);
        int i = 0;
        if (topDocs.totalHits > 0) {
            System.out.println("Found " + topDocs.totalHits + " result(s).");
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                Document document = indexSearcher.doc(scoreDoc.doc);
                i = i + 1;
                System.out.println("Result #" + i + " " + document.get("FileName"));
            }
        } else {
            System.out.println("No maches found!");
        }
    } catch (Exception e) {
        // TODO: handle exception
        e.printStackTrace();
    }
   }
  }

Apache Lucene有效的搜索方式

0 个答案: