Apache lucene搜索代码打印为null

时间:2013-03-08 09:39:05

标签: java java-ee lucene

 Directory directory = FSDirectory.open(indexDir);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);

        QueryParser parser = new QueryParser(Version.LUCENE_41, "contents", analyzer);
        Query query = parser.parse(queryStr);
        System.out.println("Searching for: " + query.toString("contents"));
        TopDocs results = searcher.search(query, maxHits);

        ScoreDoc[] hits = results.scoreDocs;
        int numTotalHits = results.totalHits;

        System.out.println("\n\n\n-----------------------Results--------------------------\n\n\n");
       System.out.println(numTotalHits + " total matching documents");


        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println(i+":File name is"+d.get("filename"));
        }

        System.out.println("Found " + hits.length);

我在搜索模块中使用了上述代码。现在代码工作正常,但我得到输出

390:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2012-12-31.txt
391:File name isnull
392:File name isnull
393:File name isnull
394:File name isnull
395:File name isnull
396:File name isnull
397:File name isnull
398:File name isnull
399:File name isnull
400:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-09.txt
401:File name isnull
402:File name isnull
403:File name isnull
404:File name isnull
405:File name isnull
406:File name isnull
407:File name isnull
408:File name isnull
409:File name isnull
410:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-10.txt

这里我只打印具有查询字符串的文件名但是我得到的结果太多而且大多数结果都有文件名null为什么会发生这种情况呢?

索引我正在使用此代码

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;



public class SimpleFileIndexer {

    public static void main() throws Exception {

        File dataDir = new File("/home/maclean/Installations/apache-tomcat-7.0.21/logs");
        File indexDir = new File("/home/maclean/NetBeansProjects/LogSearchEngine/Result");

        SimpleFileIndexer indexer = new SimpleFileIndexer();

        int numIndex = indexer.index(indexDir, dataDir);

        System.out.println("Total files indexed " + numIndex);

    }

    private int index(File indexDir, File dataDir) throws Exception {
    // API and code to convert text into indexable/searchable tokens.
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
    //To store an index on disk
    Directory directory = FSDirectory.open(indexDir);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, analyzer);
        int numIndexed;
        try (IndexWriter indexWriter = new IndexWriter(directory, config)) {
            indexDirectory(indexWriter, dataDir);
            numIndexed = indexWriter.maxDoc();
            indexWriter.close();

        }

        return numIndexed;


    }

    private void indexDirectory(IndexWriter indexWriter, File dataDir) throws IOException {

        File[] files = dataDir.listFiles();
        for (int i = 0; i < files.length; i++) {
            File f = files[i];
            if (f.isDirectory()) {
                indexDirectory(indexWriter, f);
            }
            else {
                indexFileWithIndexWriter(indexWriter, f);
            }
        }

    }

    private void indexFileWithIndexWriter(IndexWriter indexWriter, File file) throws IOException {

        FileInputStream fis = null;
        if (file.isHidden() || file.isDirectory() || !file.canRead() || !file.exists()) {
            return;
        }

        System.out.println("Indexing file " + file.getCanonicalPath());

        try {
          fis = new FileInputStream(file);
        } catch (FileNotFoundException fnfe) {
          System.out.println("File Not Found"+fnfe);

       }

        Document doc = new Document();
        doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
        doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

        if (indexWriter.getConfig().getOpenMode() == OpenMode.CREATE) {
          // New index, so we just add the document (no old document can be there):
           System.out.println("adding " + file);
          indexWriter.addDocument(doc);
       } else {
          // Existing index (an old copy of this document may have been indexed) so 
       // we use updateDocument instead to replace the old one matching the exact 
           // path, if present:
            System.out.println("updating " + file);
            indexWriter.updateDocument(new Term("path", file.getPath()), doc);
          }


         fis.close();




    }

}*

1 个答案:

答案 0 :(得分:1)

hits数组比numTotalHits长,所以你的for循环限制应该是numTotalHits而不是hits.length。