OrientDB1.0.1 - 无法处理3400万条记录

时间:2017-05-23 12:24:27

标签: orientdb

我们正在使用OrientDB 1.0.1,我需要处理一个包含3400万条记录的类/表。在我的8 GB内存的机器上,我在插入大约1百万条记录后得到“线程异常”主“java.lang.OutOfMemoryError:超出GC开销限制”错误。 96%的物理内存被占用。将JVM堆大小增加到8 GB,有助于处理200万条记录,但却失败了200万条记录。我尝试了16 GB的物理机器,处理了380万条记录后出现了这个问题。我需要处理所有3400万条记录并找到唯一的ID。这个问题似乎很明显,当orientdb的缓冲区被填满时。

int skipRecordCount=0;
String queryStr = "select id from Table1 WHERE id is not null SKIP +Integer.toString(skipRecordCount)+" LIMIT 10000";
Set uniqueIdsSet = new TreeSet(); 
List idsResult = odb.db.query(new OSQLSynchQuery(queryStr)); 
while (!idsResult.isEmpty()) 
{ 
    for (ODocument id: idsResult) 
    { 
        uniqueIdsSet.add(id.field("id").toString()); 
    } 
    skipRecordCount+=10000; 
    queryStr = "select id from Table1 WHERE id is not null SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000"; 
    idsResult = odb.db.query(new OSQLSynchQuery(idsQueryStr)); 
}

1 个答案:

答案 0 :(得分:1)

我已经为2.2.20

创建了一个测试用例

我添加了一个索引哈希notunique,没有null,并成功执行所有测试,包含200万条记录,随机ID,在3分钟内,使用内存testdb,在16gb系统(osx)上,12gb heap max(不需要!),进程大小为4.9gb

当我更改为34百万时,测试用例的插入阶段,对于随机ID,在33分钟内完成,进程大小为7.2gb(直接3gb),然后在15分钟内创建索引,进程大小为8.2 gb(直接4gb),并在TreeSet中添加了唯一ID,快速完成了测试用例

用于

"CREATE INDEX test.id NOTUNIQUE_HASH_INDEX METADATA {ignoreNullValues : true}"

"SELECT key FROM index:test.id WHERE key NOT IN [NULL] SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000"
import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx;
import com.orientechnologies.orient.core.metadata.schema.OClass;
import com.orientechnologies.orient.core.metadata.schema.OSchema;
import com.orientechnologies.orient.core.metadata.schema.OType;
import com.orientechnologies.orient.core.record.impl.ODocument;
import com.orientechnologies.orient.core.sql.OCommandSQL;
import com.orientechnologies.orient.core.sql.query.OSQLSynchQuery;

import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.Random;

public class SelectUniqueIDs {

public static void createTreeSet (ODatabaseDocumentTx db) {

    int skipRecordCount=0;
    String queryStr = "select id from test WHERE id is not null SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000";
    Set uniqueIdsSet = new TreeSet(); 
    List<ODocument> idsResult = db.query(new OSQLSynchQuery(queryStr)); 
    while (!idsResult.isEmpty()) 
    { 
        for (ODocument id: idsResult) 
        { 
            uniqueIdsSet.add(id.field("id").toString()); 
        } 
        skipRecordCount+=10000; 

        // queryStr = "SELECT id FROM test WHERE id IS NOT NULL SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000"; 
        // Using above query index on test.id is not used as we see from message, starting from skip 50000 
        // INFO: $ANSI{green {db=test}} [TIP] Query 'SELECT id FROM test WHERE id IS NOT NULL SKIP 50000 LIMIT 10000' fetched more than 50000 records: to speed up the execution, create an index or change the query to use an existent index

        queryStr = "SELECT key FROM index:test.id WHERE key NOT IN [NULL] SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000"; 
        idsResult = db.query(new OSQLSynchQuery(queryStr)); 
    }
    System.out.println("**** Inserted "+uniqueIdsSet.size()+" ids in uniqueIdsSet TreeSet ****");
}
  public static final void main(String[] args) {

    int i;

    long maxMemory = Runtime.getRuntime().maxMemory();
    long totalMemory = Runtime.getRuntime().totalMemory();
    long freeMemory = Runtime.getRuntime().freeMemory();
    System.out.println("**** Initial Heap maxMemory="+maxMemory+" , totalMemory="+totalMemory+" , freeMemory="+freeMemory+" ****");

    final ODatabaseDocumentTx db = new ODatabaseDocumentTx("memory:testdb").create();
    final OSchema schema = db.getMetadata().getSchema();
    final OClass clazz = schema.createClass("test");
    clazz.createProperty("id", OType.DOUBLE);

    for (i=0 ;  i < 34000000; i++){
        int r =(int)(Math.random() * 100000 + 1);
        db.command(new OCommandSQL("INSERT INTO test(id) VALUES ("+r+")")).execute();
    }

    db.command(new OCommandSQL("CREATE INDEX test.id NOTUNIQUE_HASH_INDEX METADATA {ignoreNullValues : true}")).execute();

    maxMemory = Runtime.getRuntime().maxMemory();
    totalMemory = Runtime.getRuntime().totalMemory();
    long insertMemory = Runtime.getRuntime().freeMemory();
    System.out.println("**** Inserted "+i+" ids; Heap maxMemory="+maxMemory+" , totalMemory="+totalMemory+" , freeMemory="+insertMemory+" ****");

    createTreeSet(db);

    final List<ODocument> count = db.query(new OSQLSynchQuery("SELECT count(*) as ids FROM test"));
    Long ids = (Long) count.get(0).field("ids");

    maxMemory = Runtime.getRuntime().maxMemory();
    totalMemory = Runtime.getRuntime().totalMemory();
    long countMemory = Runtime.getRuntime().freeMemory();
    System.out.println("**** Counted "+ids+" ids; Heap maxMemory="+maxMemory+" , totalMemory="+totalMemory+" , freeMemory="+countMemory+" ****");

    final List<ODocument> docs = db.query(new OSQLSynchQuery("SELECT FROM test LIMIT 100"));

    for (i=0 ;  i < 10; i++){
        Double value = (Double) docs.get(i).field("id");
        System.out.print(i+"="+value+" ");
    }

    System.out.println();
    maxMemory = Runtime.getRuntime().maxMemory();
    totalMemory = Runtime.getRuntime().totalMemory();
    long selectMemory = Runtime.getRuntime().freeMemory();
    System.out.println("**** Selected "+i+" ids;  Heap maxMemory="+maxMemory+" , totalMemory="+totalMemory+" , freeMemory="+selectMemory+" ****");


  }
}