我有400,000行要进入,我需要将其分解。不幸的是,在完成所有操作之前,我无法退出此脚本。当然,它总是耗尽记忆。我认为在.on('end',function()设置一个值会很有用但是.on数据完成后我看不到那个值。
'use strict';
var mongoose = require('mongoose');
var fs = require('fs');
var parse = require('csv-parse');
var Schema = mongoose.Schema;
var done;
mongoose.connect('mongodb://127.0.0.1:27017/auth');
var userSchema = new mongoose.Schema({
username: {
type: String,
unique: true
},
password: String,
email: {
type: String,
unique: true
},
isActive: String,
roles: {
account: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Account'
}
},
timeCreated: {
type: Date,
default: Date.now
},
search: [String]
});
var accountSchema = new mongoose.Schema({
user: {
id: {
type: mongoose.Schema.Types.ObjectId,
ref: 'User'
},
name: {
type: String,
default: ''
}
},
isVerified: {
type: String,
default: ''
},
verificationToken: {
type: String,
default: ''
},
name: {
first: {
type: String,
default: ''
},
middle: {
type: String,
default: ''
},
last: {
type: String,
default: ''
},
full: {
type: String,
default: ''
}
},
company: {
type: String,
default: ''
},
phone: {
type: String,
default: ''
},
zip: {
type: String,
default: ''
},
memberid: {
type: String,
default: ''
},
status: {
id: {
type: String,
ref: 'Status'
},
name: {
type: String,
default: ''
},
userCreated: {
id: {
type: mongoose.Schema.Types.ObjectId,
ref: 'User'
},
name: {
type: String,
default: ''
},
time: {
type: Date,
default: Date.now
}
}
},
userCreated: {
id: {
type: mongoose.Schema.Types.ObjectId,
ref: 'User'
},
name: {
type: String,
default: ''
},
time: {
type: Date,
default: Date.now
}
},
search: [String]
});
var User = mongoose.model('User', userSchema);
var Account = mongoose.model('Account', accountSchema);
fs.createReadStream('./ipart')
.pipe(parse({
delimiter: ','
}))
.on("data-invalid", function(data) {})
.on('data', function(csvrow) {
var u = {
isActive: 'yes',
username: csvrow[0],
email: csvrow[0],
search: [
csvrow[1] + ' ' + csvrow[2],
csvrow[0],
]
};
User.create(u, function(err, createdUser) {
if (err) {
console.log(err);
return;
}
var user = createdUser;
var displayName = csvrow[1] + ' ' + csvrow[2] || '';
var nameParts = displayName.split(' ');
var acct = {
isVerified: 'no',
'name.first': nameParts[0],
'name.last': nameParts[1] || '',
'name.full': displayName,
user: {
id: user._id,
name: user.username
},
search: [
nameParts[0],
nameParts[1] || ''
]
};
Account.create(acct, function(err, account) {
if (err) {
return workflow.emit('exception', err);
}
var fieldstoset = {
roles: {
account: account._id
}
};
User.findByIdAndUpdate(account.user.id, fieldstoset, function(err, user) {
if (err) throw err;
});
});
});
})
.on('end', function() {
console.log('complete');
});
答案 0 :(得分:0)
你真的需要使用批量插入,我在某个地方找到了这个代码并为你粘贴
var Potato = mongoose.model('Potato', PotatoSchema);
var potatoBag = [/* a humongous amount of potato objects */];
Potato.collection.insert(potatoBag, onInsert);
function onInsert(err, docs) {
if (err) {
// TODO: handle error
} else {
console.info('%d potatoes were successfully stored.', docs.length);
}
}
答案 1 :(得分:0)
我建议您分解将CSV数据导入以下步骤的整个逻辑: 1.编写一个简单的脚本文件,将CSV导入临时集合,如下所示:
YourImportScript
#!/bin/bash
mongoimport -d YourDBName -c YourTempCollectionName --drop --type csv --file pathToYourCSVFile.csv --headerline
2。在创建用户之前运行脚本:
var exec = require('child_process').exec;
function importCSV(callback) {
exec("./pathToYourImportScript/YourImportScript", function (error, stdout, stderr) {
console.log(stdout);
if (error !== null)
console.log('exec error: ' + error);
});
callback()
}
MongoImport会很快导入CSV。
async.series([ function (callback) { //CSV Import function }, function (callback) { //User Manupulation function }]);
最好将标题放入CSV列,因为您可以在从临时集合中导入文档时创建模型,并且更容易通过username:myCSVModel.username
等列标题获取用户的属性username: csvrow[0]
。