我正在尝试使用MongoDB集合中的NodeJS程序插入一百万条记录(虚拟)。但遗憾的是我的进程耗尽内存:
这是我在JavaScript中编写并通过节点
运行的代码var MongoClient = require('mongodb').MongoClient;
MongoClient.connect('mongodb://localhost:27017/course', function(err, db) {
if(err) throw err;
db.collection('students').drop();
var types = ['exam', 'quiz', 'homework', 'homework'];
// For 1 Million Records
for (var i = 0; i < 1000000; i++) {
// Each student taking 10 classes
for (var class_counter = 0; class_counter < 10; class_counter ++) {
scores = [];
// Each Class has 4 grades
// and each class has 4 grades
for (var j = 0; j < 4; j++) {
scores.push({'type':types[j],'score':Math.random()*100});
}
// // there are 500 different classes that they can take
class_id = Math.floor(Math.random()*501); // get a class id between 0 and 500
record = {'student_id':i, 'scores':scores, 'class_id':class_id};
db.collection('students').insert(record);
}
}
});
这是我的错误跟踪:
AMAC02PC0PHG3QP:25_Indexes_Insert macadmin$ node app.js
<--- Last few GCs --->
28373 ms: Scavenge 1397.8 (1457.4) -> 1397.8 (1457.4) MB, 1.1 / 0 ms (+ 151.3 ms in 1 steps since last GC) [allocation failure] [incremental marking delaying mark-sweep].
29444 ms: Mark-sweep 1397.8 (1457.4) -> 1397.7 (1457.4) MB, 1071.5 / 0 ms (+ 427.1 ms in 14 steps since start of marking, biggest step 202.5 ms) [last resort gc].
30486 ms: Mark-sweep 1397.7 (1457.4) -> 1397.6 (1457.4) MB, 1041.4 / 0 ms [last resort gc].
<--- JS stacktrace --->
==== JS stack trace =========================================
Security context: 0x23473037399 <JS Object>
1: /* anonymous */(aka /* anonymous */) [/Users/macadmin/Desktop/NodeJS_MongoDB/25_Indexes_Insert/app.js:~3] [pc=0x3f5d2b92c716] (this=0x23473004131 <undefined>,err=0x23473004131 <undefined>,db=0x1f851bb90029 <JS Object>)
2: /* anonymous */(aka /* anonymous */) [/Users/macadmin/Desktop/NodeJS_MongoDB/25_Indexes_Insert/node_modules/mongodb/lib/mongo_client.js:455] [pc=0x3f5d2b4da8bd] (thi...
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
Abort trap: 6
我的笔记本电脑配置:
// Macbook Pro
// OS X 10.9.5
// 2.5 Ghz Intel Core i7
// 16 GB Ram DDR3
// SSD
答案 0 :(得分:4)
如果您的MongoDB服务器是2.6或更高版本,最好利用允许执行 Bulk API的写入命令 bulk insert 操作,它们只是服务器顶部的抽象,可以轻松构建批量操作。这些批量操作主要有两种形式:
注意,对于比2.6更旧的服务器,API将下转换操作。但是,不可能将100%下变频,因此可能存在一些无法正确报告正确数字的边缘情况。
在您的情况下,您可以像这样实施 Bulk API :
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect("mongodb://localhost:27017/course", function(err, db) {
// Handle error
if(err) throw err;
// Get the collection and bulk api artefacts
var col = db.collection('students'),
types = ['exam', 'quiz', 'homework', 'homework'],
bulk = col.initializeOrderedBulkOp(), // Initialize the Ordered Batch
counter = 0;
// Drop the collection
col.drop();
// Representing a long loop with 1 Million Records
for (var i = 0; i < 1000000; i++) {
var scores = [],
class_id = 0,
record = {};
// Each student taking 10 classes
for (var class_counter = 0; class_counter < 10; class_counter ++) {
// Each Class has 4 grades
// and each class has 4 grades
for (var j = 0; j < 4; j++) {
scores.push({ 'type': types[j], 'score': Math.random()*100 });
}
// there are 500 different classes that they can take
class_id = Math.floor(Math.random() * 501); // get a class id between 0 and 500
record['student_id'] = i;
record['scores'] = scores;
record['class_id'] = class_id;
}
bulk.insert(record);
counter++;
if (counter % 1000 == 0 ) {
bulk.execute(function(err, result) {
// re-initialise batch operation
bulk = col.initializeOrderedBulkOp();
});
}
}
if (counter % 1000 != 0 ){
bulk.execute(function(err, result) {
// do something with result
db.close();
});
}
});
- 更新 -
感谢@MarkusWMahlberg,为了生成虚拟内容,您可能需要尝试包 mgenerate 。