使用Lucene索引和搜索日志文件 - 用作grep?

时间:2013-11-19 08:34:10

标签: regex apache search lucene indexing

目录C:\ logs中有简单的txt日志文件。现在我们要索引这些日志文件,然后使用regex搜索索引。我的代码如下。它使用关键字“未知错误”进行搜索。但是以下代码返回“contents: null ”。

 System.out.println("contents: "+ dochit.get("content"));

有何评论?

完整的代码:

 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileReader;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.IOException;
 import java.util.Date;

 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.search.TopScoreDocCollector;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;

 public class indexOOO {

  public static void main(String[] args) throws Exception{
    //fileDir is the directory that contains the text files to be indexed
    File fileDir  = new File("C:\\logs");

    Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_45);
    IndexWriterConfig indexwriterconfig = new IndexWriterConfig(Version.LUCENE_45, luceneAnalyzer);
    //indexDir is the directory that hosts Lucene's index files
    Directory indexDir = FSDirectory.open(new File("C:\\logs\\index"));

    IndexWriter indexwriter = new IndexWriter(indexDir,indexwriterconfig);
    File[] textFiles  = fileDir.listFiles();
    long startTime = new Date().getTime();

    //Add documents to the index
    for(int i = 0; i < textFiles.length; i++){
      if(textFiles[i].isFile() && (textFiles[i].getName().endsWith(".log") || textFiles[i].getName().endsWith(".txt"))) {
        System.out.println("File " + textFiles[i].getCanonicalPath() 
               + " is being indexed");

        FileReader textReader = new FileReader(textFiles[i]);
        Document document = new Document();
        document.add(new TextField("content",textReader));
        document.add(new TextField("path",textFiles[i].getPath(),Field.Store.YES));
        indexwriter.addDocument(document);
      }
    }

    indexwriter.close();
    long endTime = new Date().getTime();

    System.out.println("It took " + (endTime - startTime) 
               + " milliseconds to create an index for the files in the directory "
               + fileDir.getPath());

    //2. search...
    int hitsPerPage=10;
    IndexReader reader = null;
    try{
       reader = DirectoryReader.open(indexDir);
    } catch (IOException e) {
        e.printStackTrace();
    }
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);

    //if argument is inputed, use it, otherwise search with the keyword "Unknown error" below 
    String queryString = args.length > 0 ? args[0] : "Unknown error";
    Query query = new QueryParser(Version.LUCENE_45, "content", analyzer).parse(queryString);
    System.out.println("Searching for:" + "content" + "->" + queryString);

    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    searcher.search(query,collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs; 

    // 3. display results
        if (hits.length> 0) {   
            System.out.println("Found: " + hits.length + " results!");  
            for (int i = 0; i < hits.length; i++) {//output
                int docId = hits[i].doc;
                Document dochit = searcher.doc(docId);
                 System.out.println("contents: "+ dochit.get("content"));
            }
        } else{
            System.out.println("0 result!"); 
        }  

        reader.close();
    }  
 }

1 个答案:

答案 0 :(得分:0)

默认情况下,不存储TextField,可以搜索未存储的字段,但无法从索引中检索。如果您希望能够从索引中检索内容字段,则必须将其存储,就像您的“路径”字段一样。但是,它看起来像设计是从索引中检索路径,并使用该路径从文件本身检索内容。