Lucene Luke搜索等价

时间:2019-12-15 22:38:10

标签: java lucene luke

我正在使用Lucene 8.3,并试图建立索引,然后搜索CSV文档。我已经将大部分代码直接从Lucene的官方演示中删除,但仍然无法进行任何搜索。

我觉得问题出在查询或字段类型中,但是我不确定,因为确切的查询在Luke中可以正常工作。 Luke还为所有正确的字段和字段值建立了索引。

例如,在Luke中搜索mu*会返回106次匹配的正确结果,但我的代码中却有0次匹配。

LukeResults

我在这里做什么错了?

package CSVParsing;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.jetbrains.annotations.NotNull;

public class LuceneTester {

    public static final String FILE_NAME = "SLA_Classroom_Schedules_Fall_2019.csv";
    public static final String FILE_PATH = "src/main/resources/";
    public static final int MAX_SEARCH = 10;

    public static void main(String[] args) {
        LuceneTester tester = new LuceneTester();
        tester.createIndex();
    }

    private void createIndex() {
        try {
            Directory dir = FSDirectory.open(Paths.get(FILE_PATH));

            Analyzer analyzer = new StandardAnalyzer();
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

            IndexWriter writer = new IndexWriter(dir, iwc);
            writer.deleteAll();
            writer.commit();

            for (Document doc : indexCSVDoc(Paths.get(FILE_PATH + FILE_NAME))) {
                writer.addDocument(doc);
            }
            writer.commit();
            writer.close();

            IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(FILE_PATH)));
            IndexSearcher searcher = new IndexSearcher(reader);

            // QUERY
            Query query = new QueryParser("Subject", analyzer).parse("mu*");

            // SEARCH
            TopDocs topDocs = searcher.search(query, 763);
            ScoreDoc[] hits = topDocs.scoreDocs;
            System.out.println("Found " + hits.length + " hits.");

        } catch (IOException | ParseException e) {
            System.out.println(e);
        }
    }

    @NotNull
    private ArrayList<Document> indexCSVDoc(@NotNull Path file) {
        ArrayList<Document> indexedDocs = new ArrayList<>();
        try {
            BufferedReader fileReader = Files.newBufferedReader(file);
            String [] fields = fileReader.readLine().split("[,]");

            for (String line = fileReader.readLine(); line != null; line = fileReader.readLine()) {
                indexedDocs.add(indexCSVLine(fields, line.split("[,]")));
            }

            fileReader.close();
        } catch (IOException e) {
            System.out.println(e);
        }

        return indexedDocs;
    }

    @NotNull
    private Document indexCSVLine(@NotNull String [] fields, @NotNull String [] lineToIndex) {
        Document doc = new Document();
        for (int i = 0; i < fields.length; i++) {
            doc.add(new TextField(fields[i], lineToIndex[i], Field.Store.YES));
        }

        return doc;
    }
}

0 个答案:

没有答案