Question

我有400,000行要进入，我需要将其分解。不幸的是，在完成所有操作之前，我无法退出此脚本。当然，它总是耗尽记忆。我认为在.on（'end'，function（）设置一个值会很有用但是.on数据完成后我看不到那个值。

'use strict';
var mongoose = require('mongoose');
var fs = require('fs');
var parse = require('csv-parse');
var Schema = mongoose.Schema;
var done;

mongoose.connect('mongodb://127.0.0.1:27017/auth');

var userSchema = new mongoose.Schema({
  username: {
    type: String,
    unique: true
  },
  password: String,
  email: {
    type: String,
    unique: true
  },
  isActive: String,
  roles: {
    account: {
      type: mongoose.Schema.Types.ObjectId,
      ref: 'Account'
    }
  },
  timeCreated: {
    type: Date,
    default: Date.now
  },
  search: [String]
});

var accountSchema = new mongoose.Schema({
  user: {
    id: {
      type: mongoose.Schema.Types.ObjectId,
      ref: 'User'
    },
    name: {
      type: String,
      default: ''
    }
  },
  isVerified: {
    type: String,
    default: ''
  },
  verificationToken: {
    type: String,
    default: ''
  },
  name: {
    first: {
      type: String,
      default: ''
    },
    middle: {
      type: String,
      default: ''
    },
    last: {
      type: String,
      default: ''
    },
    full: {
      type: String,
      default: ''
    }
  },
  company: {
    type: String,
    default: ''
  },
  phone: {
    type: String,
    default: ''
  },
  zip: {
    type: String,
    default: ''
  },
  memberid: {
    type: String,
    default: ''
  },
  status: {
    id: {
      type: String,
      ref: 'Status'
    },
    name: {
      type: String,
      default: ''
    },
    userCreated: {
      id: {
        type: mongoose.Schema.Types.ObjectId,
        ref: 'User'
      },
      name: {
        type: String,
        default: ''
      },
      time: {
        type: Date,
        default: Date.now
      }
    }
  },
  userCreated: {
    id: {
      type: mongoose.Schema.Types.ObjectId,
      ref: 'User'
    },
    name: {
      type: String,
      default: ''
    },
    time: {
      type: Date,
      default: Date.now
    }
  },
  search: [String]
});

var User = mongoose.model('User', userSchema);
var Account = mongoose.model('Account', accountSchema);

fs.createReadStream('./ipart')
  .pipe(parse({
    delimiter: ','
  }))
  .on("data-invalid", function(data) {})
  .on('data', function(csvrow) {
    var u = {
      isActive: 'yes',
      username: csvrow[0],
      email: csvrow[0],
      search: [
        csvrow[1] + ' ' + csvrow[2],
        csvrow[0],
      ]
    };

    User.create(u, function(err, createdUser) {
      if (err) {
        console.log(err);
        return;
      }
      var user = createdUser;
      var displayName = csvrow[1] + ' ' + csvrow[2] || '';
      var nameParts = displayName.split(' ');
      var acct = {
        isVerified: 'no',
        'name.first': nameParts[0],
        'name.last': nameParts[1] || '',
        'name.full': displayName,
        user: {
          id: user._id,
          name: user.username
        },
        search: [
          nameParts[0],
          nameParts[1] || ''
        ]
      };

      Account.create(acct, function(err, account) {
        if (err) {
          return workflow.emit('exception', err);
        }
        var fieldstoset = {
          roles: {
            account: account._id
          }
        };
        User.findByIdAndUpdate(account.user.id, fieldstoset, function(err, user) {
          if (err) throw err;
        });
      });
    });
  })
  .on('end', function() {
    console.log('complete');
  });

Answer 1

你真的需要使用批量插入，我在某个地方找到了这个代码并为你粘贴

         var Potato = mongoose.model('Potato', PotatoSchema);
         var potatoBag = [/* a humongous amount of potato objects */];
         Potato.collection.insert(potatoBag, onInsert);
         function onInsert(err, docs) {
           if (err) {
             // TODO: handle error
           } else {
             console.info('%d potatoes were successfully stored.', docs.length);
           }
         }

Answer 2

我建议您分解将CSV数据导入以下步骤的整个逻辑： 1.编写一个简单的脚本文件，将CSV导入临时集合，如下所示：

YourImportScript

#!/bin/bash 
mongoimport -d YourDBName -c YourTempCollectionName --drop --type csv --file pathToYourCSVFile.csv --headerline

2。在创建用户之前运行脚本：

var exec = require('child_process').exec;
function importCSV(callback) {
    exec("./pathToYourImportScript/YourImportScript", function (error, stdout, stderr) {
        console.log(stdout);
        if (error !== null)
            console.log('exec error: ' + error);
        });

        callback()
    }

MongoImport会很快导入CSV。

从临时集合中获取文档并将其插入到用户集合中。您还可以使用异步模块来整齐地控制代码模式的流程：

async.series([
                    function (callback) {
                      //CSV Import function 
                    },
                    function (callback) {
        //User Manupulation function
        }]);

最好将标题放入CSV列，因为您可以在从临时集合中导入文档时创建模型，并且更容易通过username:myCSVModel.username等列标题获取用户的属性username: csvrow[0]。

如何在mongo db write

2 个答案: