我创建了500个文件副本,并使Lucene Apache将它们全部编入索引。我将它们命名为" 0.txt"," 1.txt"," 2.txt" ..." 499.txt"。当我搜索特定单词时,它返回0到9的文件,而它应该返回0到499。
这是我的索引器:
public class Indexer {
private IndexWriter writer;
public Indexer(String indexDirectoryPath) throws IOException {
new File(indexDirectoryPath).mkdirs();
Directory indexDirectory = FSDirectory.open(new File(indexDirectoryPath));
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
writer = new IndexWriter(indexDirectory, config);
}
public void close() throws CorruptIndexException, IOException {
writer.close();
}
private Document getDocument(File file) throws IOException {
Document document = new Document();
Field contentField = new Field(LuceneConstants.CONTENTS, new FileReader(file));
Field fileNameField = new Field(LuceneConstants.FILE_NAME, file.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED);
Field filePathField = new Field(LuceneConstants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES,
Field.Index.NOT_ANALYZED);
document.add(contentField);
document.add(fileNameField);
document.add(filePathField);
return document;
}
public void indexFile(File file) throws IOException {
Document document = getDocument(file);
writer.addDocument(document);
}}
这是我的搜索者:
public class Searcher {
IndexSearcher indexSearcher;
QueryParser queryParser;
Query query;
@SuppressWarnings("deprecation")
public Searcher(String indexDirectoryPath) throws IOException {
Directory indexDirectory = FSDirectory
.open(new File(indexDirectoryPath));
indexSearcher = new IndexSearcher(indexDirectory);
queryParser = new QueryParser(Version.LUCENE_36,
LuceneConstants.CONTENTS, new StandardAnalyzer(
Version.LUCENE_36));
}
public TopDocs search(String searchQuery) throws IOException,
ParseException {
query = queryParser.parse(QueryParser.escape(searchQuery));
return indexSearcher.search(query, LuceneConstants.MAX_SEARCH);
}
public Document getDocument(ScoreDoc scoreDoc)
throws CorruptIndexException, IOException {
return indexSearcher.doc(scoreDoc.doc);
}
public void close() throws IOException {
indexSearcher.close();
}}
这就是我所说的:
Indexer indexer = new Indexer(DIR_INDEX);
for (int i = 0; i < 500; i++) {
indexer.indexFile(new File("files/" + i + ".txt"));
}
indexer.close();
Searcher searcher = new Searcher(DIR_INDEX);
TopDocs hits = searcher.search("Saude");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
org.apache.lucene.document.Document doc = searcher.getDocument(scoreDoc);
System.out.println(doc.get(LuceneConstants.FILE_PATH));
}
searcher.close();
} catch (Exception e) {
e.printStackTrace();
}
答案 0 :(得分:0)
致电时
indexSearcher.search(query, LuceneConstants.MAX_SEARCH);
第二个参数是返回的最大命中数;检查那个值,大概是10。