我们正在使用OrientDB 1.0.1,我需要处理一个包含3400万条记录的类/表。在我的8 GB内存的机器上,我在插入大约1百万条记录后得到“线程异常”主“java.lang.OutOfMemoryError:超出GC开销限制”错误。 96%的物理内存被占用。将JVM堆大小增加到8 GB,有助于处理200万条记录,但却失败了200万条记录。我尝试了16 GB的物理机器,处理了380万条记录后出现了这个问题。我需要处理所有3400万条记录并找到唯一的ID。这个问题似乎很明显,当orientdb的缓冲区被填满时。
int skipRecordCount=0;
String queryStr = "select id from Table1 WHERE id is not null SKIP +Integer.toString(skipRecordCount)+" LIMIT 10000";
Set uniqueIdsSet = new TreeSet();
List idsResult = odb.db.query(new OSQLSynchQuery(queryStr));
while (!idsResult.isEmpty())
{
for (ODocument id: idsResult)
{
uniqueIdsSet.add(id.field("id").toString());
}
skipRecordCount+=10000;
queryStr = "select id from Table1 WHERE id is not null SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000";
idsResult = odb.db.query(new OSQLSynchQuery(idsQueryStr));
}
答案 0 :(得分:1)
我已经为2.2.20
创建了一个测试用例我添加了一个索引哈希notunique,没有null,并成功执行所有测试,包含200万条记录,随机ID,在3分钟内,使用内存testdb,在16gb系统(osx)上,12gb heap max(不需要!),进程大小为4.9gb
当我更改为34百万时,测试用例的插入阶段,对于随机ID,在33分钟内完成,进程大小为7.2gb(直接3gb),然后在15分钟内创建索引,进程大小为8.2 gb(直接4gb),并在TreeSet中添加了唯一ID,快速完成了测试用例
用于
"CREATE INDEX test.id NOTUNIQUE_HASH_INDEX METADATA {ignoreNullValues : true}"
和
"SELECT key FROM index:test.id WHERE key NOT IN [NULL] SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000"
import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx;
import com.orientechnologies.orient.core.metadata.schema.OClass;
import com.orientechnologies.orient.core.metadata.schema.OSchema;
import com.orientechnologies.orient.core.metadata.schema.OType;
import com.orientechnologies.orient.core.record.impl.ODocument;
import com.orientechnologies.orient.core.sql.OCommandSQL;
import com.orientechnologies.orient.core.sql.query.OSQLSynchQuery;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.Random;
public class SelectUniqueIDs {
public static void createTreeSet (ODatabaseDocumentTx db) {
int skipRecordCount=0;
String queryStr = "select id from test WHERE id is not null SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000";
Set uniqueIdsSet = new TreeSet();
List<ODocument> idsResult = db.query(new OSQLSynchQuery(queryStr));
while (!idsResult.isEmpty())
{
for (ODocument id: idsResult)
{
uniqueIdsSet.add(id.field("id").toString());
}
skipRecordCount+=10000;
// queryStr = "SELECT id FROM test WHERE id IS NOT NULL SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000";
// Using above query index on test.id is not used as we see from message, starting from skip 50000
// INFO: $ANSI{green {db=test}} [TIP] Query 'SELECT id FROM test WHERE id IS NOT NULL SKIP 50000 LIMIT 10000' fetched more than 50000 records: to speed up the execution, create an index or change the query to use an existent index
queryStr = "SELECT key FROM index:test.id WHERE key NOT IN [NULL] SKIP "+Integer.toString(skipRecordCount)+" LIMIT 10000";
idsResult = db.query(new OSQLSynchQuery(queryStr));
}
System.out.println("**** Inserted "+uniqueIdsSet.size()+" ids in uniqueIdsSet TreeSet ****");
}
public static final void main(String[] args) {
int i;
long maxMemory = Runtime.getRuntime().maxMemory();
long totalMemory = Runtime.getRuntime().totalMemory();
long freeMemory = Runtime.getRuntime().freeMemory();
System.out.println("**** Initial Heap maxMemory="+maxMemory+" , totalMemory="+totalMemory+" , freeMemory="+freeMemory+" ****");
final ODatabaseDocumentTx db = new ODatabaseDocumentTx("memory:testdb").create();
final OSchema schema = db.getMetadata().getSchema();
final OClass clazz = schema.createClass("test");
clazz.createProperty("id", OType.DOUBLE);
for (i=0 ; i < 34000000; i++){
int r =(int)(Math.random() * 100000 + 1);
db.command(new OCommandSQL("INSERT INTO test(id) VALUES ("+r+")")).execute();
}
db.command(new OCommandSQL("CREATE INDEX test.id NOTUNIQUE_HASH_INDEX METADATA {ignoreNullValues : true}")).execute();
maxMemory = Runtime.getRuntime().maxMemory();
totalMemory = Runtime.getRuntime().totalMemory();
long insertMemory = Runtime.getRuntime().freeMemory();
System.out.println("**** Inserted "+i+" ids; Heap maxMemory="+maxMemory+" , totalMemory="+totalMemory+" , freeMemory="+insertMemory+" ****");
createTreeSet(db);
final List<ODocument> count = db.query(new OSQLSynchQuery("SELECT count(*) as ids FROM test"));
Long ids = (Long) count.get(0).field("ids");
maxMemory = Runtime.getRuntime().maxMemory();
totalMemory = Runtime.getRuntime().totalMemory();
long countMemory = Runtime.getRuntime().freeMemory();
System.out.println("**** Counted "+ids+" ids; Heap maxMemory="+maxMemory+" , totalMemory="+totalMemory+" , freeMemory="+countMemory+" ****");
final List<ODocument> docs = db.query(new OSQLSynchQuery("SELECT FROM test LIMIT 100"));
for (i=0 ; i < 10; i++){
Double value = (Double) docs.get(i).field("id");
System.out.print(i+"="+value+" ");
}
System.out.println();
maxMemory = Runtime.getRuntime().maxMemory();
totalMemory = Runtime.getRuntime().totalMemory();
long selectMemory = Runtime.getRuntime().freeMemory();
System.out.println("**** Selected "+i+" ids; Heap maxMemory="+maxMemory+" , totalMemory="+totalMemory+" , freeMemory="+selectMemory+" ****");
}
}