我正在使用Lucene 8.3,并试图建立索引,然后搜索CSV文档。我已经将大部分代码直接从Lucene的官方演示中删除,但仍然无法进行任何搜索。
我觉得问题出在查询或字段类型中,但是我不确定,因为确切的查询在Luke中可以正常工作。 Luke还为所有正确的字段和字段值建立了索引。
例如,在Luke中搜索mu*
会返回106次匹配的正确结果,但我的代码中却有0次匹配。
我在这里做什么错了?
package CSVParsing;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.jetbrains.annotations.NotNull;
public class LuceneTester {
public static final String FILE_NAME = "SLA_Classroom_Schedules_Fall_2019.csv";
public static final String FILE_PATH = "src/main/resources/";
public static final int MAX_SEARCH = 10;
public static void main(String[] args) {
LuceneTester tester = new LuceneTester();
tester.createIndex();
}
private void createIndex() {
try {
Directory dir = FSDirectory.open(Paths.get(FILE_PATH));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(dir, iwc);
writer.deleteAll();
writer.commit();
for (Document doc : indexCSVDoc(Paths.get(FILE_PATH + FILE_NAME))) {
writer.addDocument(doc);
}
writer.commit();
writer.close();
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(FILE_PATH)));
IndexSearcher searcher = new IndexSearcher(reader);
// QUERY
Query query = new QueryParser("Subject", analyzer).parse("mu*");
// SEARCH
TopDocs topDocs = searcher.search(query, 763);
ScoreDoc[] hits = topDocs.scoreDocs;
System.out.println("Found " + hits.length + " hits.");
} catch (IOException | ParseException e) {
System.out.println(e);
}
}
@NotNull
private ArrayList<Document> indexCSVDoc(@NotNull Path file) {
ArrayList<Document> indexedDocs = new ArrayList<>();
try {
BufferedReader fileReader = Files.newBufferedReader(file);
String [] fields = fileReader.readLine().split("[,]");
for (String line = fileReader.readLine(); line != null; line = fileReader.readLine()) {
indexedDocs.add(indexCSVLine(fields, line.split("[,]")));
}
fileReader.close();
} catch (IOException e) {
System.out.println(e);
}
return indexedDocs;
}
@NotNull
private Document indexCSVLine(@NotNull String [] fields, @NotNull String [] lineToIndex) {
Document doc = new Document();
for (int i = 0; i < fields.length; i++) {
doc.add(new TextField(fields[i], lineToIndex[i], Field.Store.YES));
}
return doc;
}
}