为什么Lucene.NET在索引大文件时会导致OutOfMemoryException?

时间:2014-01-27 12:57:12

标签: lucene.net lucene

我已为IndexWriter添加了上面提到的代码。

我已经设置了

writer.SetRAMBufferSizeMB(32);
writer.MergeFactor = 1000;
writer.SetMaxFieldLength(Int32.MaxValue);
writer.UseCompoundFile = false;

避免OutOfMemoryException(OOMException)的所有属性。

此处的代码writer.AddDocument(document);显示了OOM异常。

你能指导我为什么我这个错误吗? 任何人都可以帮我解决这个问题吗?

我的机器配置:
系统类型:64位操作系统 RAM:4 GB(可用3.86 GB)
处理器:Intel i5 - 3230M CPU @ 2.60GHz

using System;
using System.Data.SqlClient;
using Lucene.Net.Documents;
using System.Data;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.QueryParsers;

namespace ConsoleApplication1
{
    class Program
    {
        static String searchTerm = "";
        static void Main(string[] args) {
            /**
             * This will create dataset according to
             * connectingString and query
             **/
            Console.WriteLine("Connecting to Sql database server.");
            String connectionString = "Data Source=proxy-pc;Initial Catalog=Snomed; User          ID=SA;password=admin";
            String query = "SELECT * FROM DESCRIPTION";
            String INDEX_DIRECTORY = "c:\\DatabaseIndex";

            Console.WriteLine("Creating dataset.");
            DataSet dataSet = createDataset(connectionString, query);
            Console.WriteLine("Created dataset successfully.");

            Console.WriteLine("Creating document.");
            Document document = createDocument(dataSet);
            Console.WriteLine("Created document successfully.");

            var version = Lucene.Net.Util.Version.LUCENE_30;
            var length = Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED;
            Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(version);
            Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(@INDEX_DIRECTORY));
            Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, length);
            writer.SetMergeScheduler(new Lucene.Net.Index.SerialMergeScheduler());
            writer.SetRAMBufferSizeMB(32);
            writer.MergeFactor = 1000;
            writer.SetMaxFieldLength(Int32.MaxValue);
            writer.UseCompoundFile = false;
            Console.WriteLine("Before Adding document");
            **writer.AddDocument(document); **
             Console.WriteLine("Indexing...");
            writer.Optimize();
            writer.Dispose();
            Console.WriteLine("Indexing finished");

            if (searchTerm == "")
            {
                searchTerm = "(keyword)";
            }

            Console.WriteLine("Searching '" + searchTerm + "'...");

            var occurance = searchKeyword(INDEX_DIRECTORY, version, searchTerm);

            if (occurance != -1)
            {
                Console.WriteLine("Your search found : " + occurance);
            }
            else
            {
                Console.WriteLine("Invalid index directory.");
            }

            Console.Read();
        }

        /**
         *   Method works as a searcher
        **/
        private static int searchKeyword(String index_Directory_Path, Lucene.Net.Util.Version version, String searchWord) {
            if (index_Directory_Path != null)
            {
                var standAnalyzer = new StandardAnalyzer(version);
                IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(index_Directory_Path));

                // parse the query, "term" is the default field to search
                var parser = new QueryParser(version, "term", standAnalyzer);
                Query searchQuery = parser.Parse(searchWord);

                // search
                TopDocs hits = searcher.Search(searchQuery, 100);
                var total = hits.TotalHits;
                return total;
            }

            else
            {
                return -1;
            }
        }

        static DataSet createDataset(String connectionString, String query) {
            DataSet ds = new DataSet();

            using (SqlConnection connection = new SqlConnection(connectionString))
            using (SqlCommand command = new SqlCommand(query, connection))
            using (SqlDataAdapter adapter = new SqlDataAdapter(command))
            {
                adapter.Fill(ds);
            }

            return ds;
        }

        static Lucene.Net.Documents.Document createDocument(DataSet dataSet) {
            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
            using (dataSet)
            {
                foreach (DataTable table in dataSet.Tables)
                {
                    foreach (DataRow row in table.Rows)
                    {
                        String id = row["id"].ToString();
                        String rTime = row["rTime"].ToString();
                        String active = row["active"].ToString();
                        String mId = row["mId"].ToString();
                        String cId = row["cId"].ToString();
                        String lCode = row["lCode"].ToString();
                        String tId = row["tId"].ToString();
                        String detail = row["detail"].ToString();
                        String sId = row["sId"].ToString();

                        Field idField = new Field("id", id, Field.Store.YES, Field.Index.ANALYZED);
                        Field rTimeField = new Field("rTime", rTime, Field.Store.YES, Field.Index.ANALYZED);
                        Field activeField = new Field("active", active, Field.Store.YES, Field.Index.ANALYZED);
                        Field mIdField = new Field("mId", mId, Field.Store.YES, Field.Index.ANALYZED);
                        Field cIdField = new Field("cId", cId, Field.Store.YES, Field.Index.ANALYZED);
                        Field lCodeField = new Field("lCode", lCode, Field.Store.YES, Field.Index.ANALYZED);
                        Field tIdField = new Field("tId", tId, Field.Store.YES, Field.Index.ANALYZED);
                        Field detailField = new Field("detail", detail, Field.Store.YES, Field.Index.ANALYZED);
                        Field sIdField = new Field("sId", sId, Field.Store.YES, Field.Index.ANALYZED);

                        doc.Add(idField);
                        doc.Add(rTimeField);
                        doc.Add(activeField);
                        doc.Add(mIdField);
                        doc.Add(cIdField);
                        doc.Add(lCodeField);
                        doc.Add(tIdField);
                        doc.Add(detailField);
                        doc.Add(sIdField);
                    }
                }
            }

            return doc;
        }
    }
}

1 个答案:

答案 0 :(得分:1)

看起来您将整个数据库添加为单个文档。

您是否尝试将每行添加为单独的文档?您也许可以将“createDocument”更改为“createDocuments”并每行生成一个Lucene.Net文档。这会使你当前的大部分代码保持不变......

希望这有帮助,