我的Node.js应用程序中有一个JavaScript堆内存不足。我试图通过一次调用在MongoDB中插入408 000个数据。我有两个循环。第一个循环从1到24,第二个循环(在第一个循环内)从1到17 000.这些数据是NetCDF文件的结果。我正在解析此文件中的数据,我正在构建模型对象,并在MongoDB中插入这些数据。
我在StackOverflow上看到关于这个问题的一些帖子然后我看到我可以使用--max_old_space_size
增加节点内存。但我不知道这是不是好方法。也许你有一些建议来优化我的代码?
这是我的循环:
for (var time_pos = 0; time_pos < 24; time_pos++) {
// This array contains 17 000 data
var dataSliced = file.root.variables['pm10_conc'].readSlice(
time_pos, time_size,
level_pos, level_size,
lat_from, lat_size,
lng_from, lng_size
);
// Loop : 0 to 17 000
for (var i = 0; i < dataSliced.length; i++) {
var pollution = new Pollution();
latitude = current_lat;
longitude = currrent_lng;
country = country_name;
model = model_name;
data_type = type_name;
level = 0;
datetime = date;
pollutants.pm10.description = description;
pollutants.pm10.units = units;
pollutants.pm10.concentration = dataSliced[i];
pollution.save(function(err){
if (err) throw err;
console.log("Data saved");
})
}
}
这是我的错误:
<--- Last few GCs --->
56782 ms: Mark-sweep 1366.6 (1436.9) -> 1366.6 (1436.9) MB, 1943.5 / 0.0 ms [allocation failure] [GC in old space requested].
58617 ms: Mark-sweep 1366.6 (1436.9) -> 1366.6 (1436.9) MB, 1834.9 / 0.0 ms [allocation failure] [GC in old space requested].
60731 ms: Mark-sweep 1366.6 (1436.9) -> 1368.6 (1417.9) MB, 2114.3 / 0.0 ms [last resort gc].
62707 ms: Mark-sweep 1368.6 (1417.9) -> 1370.7 (1417.9) MB, 1975.8 / 0.0 ms [last resort gc].
<--- JS stacktrace --->
==== JS stack trace =========================================
Security context: 0x3a7c3fbcfb51 <JS Object>
1: fnWrapper [/var/www/html/Project/node_modules/hooks-fixed/hooks.js:185] [pc=0x6ccee7825d4] (this=0x3a7c3fbe6119 <JS Global Object>)
2: fn [/var/www/html/Project/node_modules/mongoose/lib/schema.js:~250] [pc=0x6ccee7d8ffe] (this=0xd29dd7fea11 <a model with map 0x994a88e5849>,next=0x1cbe49858589 <JS Function fnWrapper (SharedFunctionInfo 0x3d8ecc066811)>,done=0x1cbe498586...
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory
1: node::Abort() [node]
2: 0x1098b2c [node]
3: v8::Utils::ReportApiFailure(char const*, char const*) [node]
4: v8::internal::V8::FatalProcessOutOfMemory(char const*, bool) [node]
5: v8::internal::Factory::NewTransitionArray(int) [node]
6: v8::internal::TransitionArray::Insert(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Map>, v8::internal::SimpleTransitionFlag) [node]
7: v8::internal::Map::CopyReplaceDescriptors(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::DescriptorArray>, v8::internal::Handle<v8::internal::LayoutDescriptor>, v8::internal::TransitionFlag, v8::internal::MaybeHandle<v8::internal::Name>, char const*, v8::internal::SimpleTransitionFlag) [node]
8: v8::internal::Map::CopyAddDescriptor(v8::internal::Handle<v8::internal::Map>, v8::internal::Descriptor*, v8::internal::TransitionFlag) [node]
9: v8::internal::Map::CopyWithField(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::FieldType>, v8::internal::PropertyAttributes, v8::internal::Representation, v8::internal::TransitionFlag) [node]
10: v8::internal::Map::TransitionToDataProperty(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyAttributes, v8::internal::Object::StoreFromKeyed) [node]
11: v8::internal::LookupIterator::PrepareTransitionToDataProperty(v8::internal::Handle<v8::internal::JSObject>, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyAttributes, v8::internal::Object::StoreFromKeyed) [node]
12: v8::internal::StoreIC::LookupForWrite(v8::internal::LookupIterator*, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
13: v8::internal::StoreIC::UpdateCaches(v8::internal::LookupIterator*, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
14: v8::internal::StoreIC::Store(v8::internal::Handle<v8::internal::Object>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
15: v8::internal::Runtime_StoreIC_Miss(int, v8::internal::Object**, v8::internal::Isolate*) [node]
16: 0x6ccee4092a7
Aborted
[nodemon] app crashed - waiting for file changes before starting...
您知道是否有办法优化我的代码或增加节点内存是最好的方法吗?
我是一个有效的解决方案。我试图使用mongoose insertMany()
,但我再次发生了致命的错误分配失败。
然后我删除了new Pollution
并将数据推送到数组中。之后,我使用collection.insert
和async each
这样:
var pollution = [];
for (var time_pos = 0; time_pos < 24; time_pos++) {
// This array contains 17 000 data
var dataSliced = file.root.variables['pm10_conc'].readSlice(
time_pos, time_size,
level_pos, level_size,
lat_from, lat_size,
lng_from, lng_size
);
async.each(dataSliced, function (item, next){
pollution.push({
'longitude' :current_lat,
'latitude' :current_lng,
'country' :country_name,
'model' :model_name,
'data_type' :type_name",
'level' :0,
'datetime' : date,
'pollution': {
'pm10': {
'description': description,
'units': units,
'concentration': item
}
}
});
}
}
Pollution.collection.insert(pollution, function(err, docs){
if (err) throw err;
console.log("Data saved");
});
如果您有更好的解决方案,可以发布答案。
答案 0 :(得分:1)
我希望这对你和其他人有所帮助......: - )
我一直在研究哪种方法最好将数据导入Mongodb。我使用了Mongoimport以及Mongoose和insertMany method(使用Native Mongodb)。我已经读到最好keep the batch sizes to about 100 for best performance
。这是我使用insertMany的解决方案。使用Mongoimport是非常简单的(只需一行代码)。所以我认为没有必要在这里发帖。
在我的示例中,602.198记录首先被解析为一个对象数组,然后第二次成功导入到Mongodb。
将解析后的对象导入Mongodb需要一些内存,因此通常需要使用以下命令允许节点使用更多内存,可以阅读更多here。
node --max_old_space_size=8000 partImportNew.js
为了提高效率,我将对象数组分成批处理,并使用Promise.all解析,当迭代参数中的所有promise都已解析时。
如果您有更大的文件和内存不足,即使您按节点增加内存容量,也可以拆分文件。之前删除标题,然后将它们添加到csv解析器中。
拆分文件:
$ split -l numberoflines filename
ex. split -l 1000000 term2.csv
让我们说term2.csv有5.000.001行,没有标题。 从上面的例子中,你将得到6个文件,5个文件,每个文件有100万行,一个文件有一行。
看看我是如何在mongodb.js文件的function bulkImportToMongo
中解决它的。
<强>控制台强>
➜ database git:(master) ✗ node --max_old_space_size=8000 partImport.js
Connected to db!
Time to parse file: : 5209.325ms
Disconnected from db!
Time to import parsed objects to db: : 153606.545ms
➜ database git:(master) ✗
<强> parseCSV.js 强>
const csv = require("fast-csv");
function promiseCSV(filePath, options) {
return new Promise((resolve, reject) => {
console.time("Time to parse file");
var records = [];
csv
.fromPath(filePath, options)
.on("data", record => {
records.push(record);
})
.on("end", () => {
console.timeEnd("Time to parse file");
resolve(records);
});
});
}
module.exports = promiseCSV;
<强> mongodb.js 强>
const mongoose = require("mongoose");
mongoose.Promise = global.Promise;
function connectToMongo(databaseName) {
mongoose.connect(`mongodb://localhost:27017/${databaseName}`, {
keepAlive: true,
reconnectTries: Number.MAX_VALUE,
useMongoClient: true
});
console.log("Connected to db!");
}
function disconnectFromMongo() {
mongoose.disconnect();
console.log("Disconnected from db!");
}
function bulkImportToMongo(arrayToImport, mongooseModel) {
const Model = require(`../../../models/${mongooseModel}`);
const batchSize = 100;
let batchCount = Math.ceil(arrayToImport.length / batchSize);
let recordsLeft = arrayToImport.length;
let ops = [];
let counter = 0;
for (let i = 0; i < batchCount; i++) {
let batch = arrayToImport.slice(counter, counter + batchSize);
counter += batchSize;
ops.push(Model.insertMany(batch));
}
return Promise.all(ops);
}
module.exports.bulkImportToMongo = bulkImportToMongo;
module.exports.connectToMongo = connectToMongo;
module.exports.disconnectFromMongo = disconnectFromMongo;
<强> partImport.js 强>
const path = require("path");
const parseCSV = require("./helpers/parseCSV");
const {
connectToMongo,
disconnectFromMongo,
bulkImportToMongo
} = require("./helpers/mongodb");
const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
delimiter: ";",
noheader: true,
headers: [
"facility",
"partNumber",
"partName",
"partDescription",
"netWeight",
"customsTariff"
]
};
connectToMongo("autoMDM");
parseCSV(filePath, options)
.then(records => {
console.time("Time to import parsed objects to db");
return bulkImportToMongo(records, "parts.js");
})
/* .then(result =>
console.log("Total batches inserted: ", result, result.length)
) */
.then(() => {
disconnectFromMongo();
console.timeEnd("Time to import parsed objects to db");
})
.catch(error => console.log(error));