索引文件时出错

时间:2014-02-11 11:44:02

标签: java apache indexing lucene

package lia.meetlucene;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.FileReader;

public class Indexer {

  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      throw new IllegalArgumentException("Usage: java " + Indexer.class.getName()
        + " <index dir> <data dir>");
    }
    String indexDir = args[0];         //1
    String dataDir = args[1];          //2

    long start = System.currentTimeMillis();
    Indexer indexer = new Indexer(indexDir);
    int numIndexed;
    try {
      numIndexed = indexer.index(dataDir);
    } finally {
      indexer.close();
    }
    long end = System.currentTimeMillis();

    System.out.println("Indexing " + numIndexed + " files took "
      + (end - start) + " milliseconds");
  }

  private IndexWriter writer;

  public Indexer(String indexDir) throws IOException {
    Directory dir = FSDirectory.open(new File(indexDir));
    writer = new IndexWriter(dir,            //3
                 new StandardAnalyzer(       //3
                     Version.LUCENE_30),//3
                 true,                       //3
                             IndexWriter.MaxFieldLength.UNLIMITED); //3
  }

  public void close() throws IOException {
    writer.close();                             //4
  }

  public int index(String dataDir)
    throws Exception {
try{
    File[] files = new File(dataDir).listFiles();

    for (File f: files) {                
    ************************************************        
         if(f.isDirectory())           // I added this if block which is causing error
        {
            index(dataDir);
        }
    ************************************************
       else if (!f.isDirectory() &&
          !f.isHidden() &&
          f.exists() &&
          f.canRead()
          ) {
        indexFile(f);
      }
    }
}
      catch (IOException e) {
            e.printStackTrace();
        }
    return writer.numDocs();                     //5
  }


  protected Document getDocument(File f) throws Exception {
    Document doc = new Document();
    doc.add(new Field("contents", new FileReader(f)));      //7
    doc.add(new Field("filename", f.getName(),              //8
                Field.Store.YES, Field.Index.NOT_ANALYZED));//8
    doc.add(new Field("fullpath", f.getCanonicalPath(),     //9
                Field.Store.YES, Field.Index.NOT_ANALYZED));//9
    return doc;
  }

  private void indexFile(File f) throws Exception {
    System.out.println("Indexing " + f.getCanonicalPath());
    Document doc = getDocument(f);
    writer.addDocument(doc);                              //10
  }
}

这是 Lucene Action Book中提供的程序。它仅索引父文件夹中不在子文件夹中的文件。所以我添加了一个if块来递归查找子文件夹中的文件。但是在运行这个程序之后,它正在创建write.lock文件,即使在关闭命令提示符后它也会继续创建索引文件。代码有什么问题?

我是LuceneJava的新手,之前我尝试使用apache commons来查找子文件夹,但我收到的包不存在错误(package org.apache.commons.io does not exist error)。

1 个答案:

答案 0 :(得分:0)

是的,它会继续运行,因为你一直在传递相同的路径。因此,您无法使用close()方法,即write.lock保持存在的原因。

这是您当前的代码。

if(f.isDirectory())           
{
     index(dataDir); // dataDir is the orginal path
}

你要做的就是这样:

if(f.isDirectory())           
{
     index(f.getAbsolutePath());
}