目录C:\ logs中有简单的txt日志文件。现在我们要索引这些日志文件,然后使用regex搜索索引。我的代码如下。它使用关键字“未知错误”进行搜索。但是以下代码返回“contents: null ”。
System.out.println("contents: "+ dochit.get("content"));
有何评论?
完整的代码:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
public class indexOOO {
public static void main(String[] args) throws Exception{
//fileDir is the directory that contains the text files to be indexed
File fileDir = new File("C:\\logs");
Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_45);
IndexWriterConfig indexwriterconfig = new IndexWriterConfig(Version.LUCENE_45, luceneAnalyzer);
//indexDir is the directory that hosts Lucene's index files
Directory indexDir = FSDirectory.open(new File("C:\\logs\\index"));
IndexWriter indexwriter = new IndexWriter(indexDir,indexwriterconfig);
File[] textFiles = fileDir.listFiles();
long startTime = new Date().getTime();
//Add documents to the index
for(int i = 0; i < textFiles.length; i++){
if(textFiles[i].isFile() && (textFiles[i].getName().endsWith(".log") || textFiles[i].getName().endsWith(".txt"))) {
System.out.println("File " + textFiles[i].getCanonicalPath()
+ " is being indexed");
FileReader textReader = new FileReader(textFiles[i]);
Document document = new Document();
document.add(new TextField("content",textReader));
document.add(new TextField("path",textFiles[i].getPath(),Field.Store.YES));
indexwriter.addDocument(document);
}
}
indexwriter.close();
long endTime = new Date().getTime();
System.out.println("It took " + (endTime - startTime)
+ " milliseconds to create an index for the files in the directory "
+ fileDir.getPath());
//2. search...
int hitsPerPage=10;
IndexReader reader = null;
try{
reader = DirectoryReader.open(indexDir);
} catch (IOException e) {
e.printStackTrace();
}
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
//if argument is inputed, use it, otherwise search with the keyword "Unknown error" below
String queryString = args.length > 0 ? args[0] : "Unknown error";
Query query = new QueryParser(Version.LUCENE_45, "content", analyzer).parse(queryString);
System.out.println("Searching for:" + "content" + "->" + queryString);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
searcher.search(query,collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
// 3. display results
if (hits.length> 0) {
System.out.println("Found: " + hits.length + " results!");
for (int i = 0; i < hits.length; i++) {//output
int docId = hits[i].doc;
Document dochit = searcher.doc(docId);
System.out.println("contents: "+ dochit.get("content"));
}
} else{
System.out.println("0 result!");
}
reader.close();
}
}
答案 0 :(得分:0)
默认情况下,不存储TextField
,可以搜索未存储的字段,但无法从索引中检索。如果您希望能够从索引中检索内容字段,则必须将其存储,就像您的“路径”字段一样。但是,它看起来像设计是从索引中检索路径,并使用该路径从文件本身检索内容。