为什么Lucene程序会退出OutOfMemoryError?

时间:2015-11-16 01:41:25

标签: memory lucene out-of-memory information-retrieval

请有人帮助我。当我使用一小部分数据时,我的程序运行正常,但是我的实际数据是40MB,它给出了这个错误:java.lang.OutOfMemoryError: Java heap space

这是我的代码;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; 
import org.apache.lucene.document.StringField; 
import org.apache.lucene.document.TextField;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;


public class RechercheEngine {

    static  ArrayList<String> resultat = new ArrayList<String>();
    final static String file ="Ressources/file_collection.txt";
    static LireFichierCollection rf = new LireFichierCollection(); 
    static LireRequete ls = new LireRequete();

public static void indexerEtRechercherDocument( boolean exchange) throws IOException, org.apache.lucene.queryparser.classic.ParseException{


    System.out.println("Analyzing documents...");
    Directory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter indexWriter  =  new IndexWriter(directory, config);

    Document doc = new Document(); 
    rf.readFile(file);
    System.out.println("Indexing documents...");
        for(Map.Entry <String, String> entry : rf.hashMap.entrySet()) {
            doc.add(new StringField("DocKey",entry.getKey(), Field.Store.YES));
            Stemming st = new Stemming();
            if (exchange){    
                doc.add(new TextField("DocContent",  st.stemmingAvecStopWord(entry.getValue()), Field.Store.NO));
            }
            else{
                doc.add(new TextField("DocContent",  entry.getValue(), Field.Store.NO));
            }
            indexWriter.addDocument(doc);
        }
    indexWriter.close();
    System.out.println("Indexing documents done");

    ls.readList();
    System.out.println("Researching documents...");

        for(Map.Entry <String, String> entry : ls.map.entrySet()) {
            Stemming st = new Stemming();
            Query query;
            if (exchange){    
                query = new QueryParser("DocContent", analyzer).parse(st.stemmingAvecStopWord(entry.getValue()));
            }
            else{
                query = new QueryParser("DocContent", analyzer).parse(entry.getValue());
            }
            IndexReader reader=DirectoryReader.open(directory);
            IndexSearcher searcher = new IndexSearcher(reader);
            ScoreDoc[] hits = searcher.search(query, 2).scoreDocs;
            for (int i = 0; i < hits.length; i++) {
                int docId = hits[i].doc;
                Document hitDoc = searcher.doc(docId);
                 String docKey = hitDoc.get("DocKey");
                HashMap<String, String> X= new HashMap<String, String>();
                for (IndexableField field : hitDoc.getFields())
                {
                    X.put(field.name(), field.stringValue() );
                }

             resultat.add(entry.getKey()+ " " + docKey+ " " + hits[i].score);
            }
            reader.close();
        }
}

}

0 个答案:

没有答案