为什么我的代码使用insertMany()跳过一些记录并多次插入相同的记录?

时间:2017-09-26 20:53:07

标签: javascript mongoose promise

我在csv文件中有9577条唯一记录。

此代码插入9800条记录并插入并非所有记录,但重复其中一些记录。知道为什么它不插入唯一的9577记录,还有一些重复吗?下面我还插入代码的剩余部分,以便全面了解

function bulkImportToMongo(arrayToImport, mongooseModel) {
  const Model = require(`../../../models/${mongooseModel}`);
  let batchCount = Math.ceil(arrayToImport.length / 100);
  console.log(arrayToImport.length);
  let ops = [];

  for (let i = 0; i < batchCount; i++) {
    // console.log(i);
    let batch = arrayToImport.slice(i, i + 100);
    console.log(batch.length);
    ops.push(Model.insertMany(batch));
  }
  return ops;

  return Promise.all(ops).then(results => {
    // results is an array of results for each batch
    console.log("results: ", results);
  });
}

我解析csv文件

const Promise = require("bluebird");
const csv = require("fast-csv");
const path = require("path");
const fs = Promise.promisifyAll(require("fs"));

const promiseCSV = Promise.method((filePath, options) => {
  return new Promise((resolve, reject) => {
    var records = [];
    csv
      .fromPath(filePath, options)
      .on("data", record => {
        records.push(record);
      })
      .on("end", () => {
        // console.log(records);
        resolve(records);
      });
  });
});

以下是连接它们的脚本:

const path = require("path");
const promiseCSV = require("./helpers/ImportCSVFiles");
const {
  connectToMongo,
  bulkImportToMongo
} = require("./helpers/mongoOperations");

const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
  delimiter: ";",
  noheader: true,
  headers: [
    "facility",
    "partNumber",
    "partName",
    "partDescription",
    "netWeight",
    "customsTariff"
  ]
};

connectToMongo("autoMDM");
promiseCSV(filePath, options).then(records => {
  bulkImportToMongo(records, "parts.js");
});

2 个答案:

答案 0 :(得分:0)

//看起来你的问题只是i ++。也许你的意思是我+ = 100?

for (let i = 0; i < batchCount; i+=100 /* NOT i++ */) {
    //...
}

答案 1 :(得分:0)

我解决了。

我希望这有助于其他......: - )

我有两个错误,在函数promiseCSV(更改为parseCSV),第二个我在bulkImportToMongo中有错误的逻辑。

完整的解决方案:

我解析并导入了602.198个对象,这是使用8GB内存的MacBook Pro上的节点--max_old_space_size = 8000花了多长时间。

<强>控制台

➜  database git:(master) ✗ node --max_old_space_size=8000  partImport.js
Connected to db!
Time to parse file: : 5209.325ms
Disconnected from db!
Time to import parsed objects to db: : 153606.545ms
➜  database git:(master) ✗

<强> parseCSV.js

const csv = require("fast-csv");

function promiseCSV(filePath, options) {
  return new Promise((resolve, reject) => {
    console.time("Time to parse file");
    var records = [];
    csv
      .fromPath(filePath, options)
      .on("data", record => {
        records.push(record);
      })
      .on("end", () => {
        console.timeEnd("Time to parse file");
        resolve(records);
      });
  });
}

module.exports = promiseCSV;

<强> mongodb.js

const mongoose = require("mongoose");
mongoose.Promise = global.Promise;

function connectToMongo(databaseName) {
  mongoose.connect(`mongodb://localhost:27017/${databaseName}`, {
    keepAlive: true,
    reconnectTries: Number.MAX_VALUE,
    useMongoClient: true
  });
  console.log("Connected to db!");
}

function disconnectFromMongo() {
  mongoose.disconnect();
  console.log("Disconnected from db!");
}

function bulkImportToMongo(arrayToImport, mongooseModel) {
  const Model = require(`../../../models/${mongooseModel}`);
  const batchSize = 100;
  let batchCount = Math.ceil(arrayToImport.length / batchSize);
  let recordsLeft = arrayToImport.length;
  let ops = [];
  let counter = 0;
  for (let i = 0; i < batchCount; i++) {
    let batch = arrayToImport.slice(counter, counter + batchSize);
    counter += batchSize;
    ops.push(Model.insertMany(batch));
  }
  return Promise.all(ops);
}

module.exports.bulkImportToMongo = bulkImportToMongo;
module.exports.connectToMongo = connectToMongo;
module.exports.disconnectFromMongo = disconnectFromMongo;

<强> partImport.js

const path = require("path");
const parseCSV = require("./helpers/parseCSV");
const {
  connectToMongo,
  disconnectFromMongo,
  bulkImportToMongo
} = require("./helpers/mongodb");

const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
  delimiter: ";",
  noheader: true,
  headers: [
    "facility",
    "partNumber",
    "partName",
    "partDescription",
    "netWeight",
    "customsTariff"
  ]
};

connectToMongo("autoMDM");
parseCSV(filePath, options)
  .then(records => {
    console.time("Time to import parsed objects to db");
    return bulkImportToMongo(records, "parts.js");
  })
  /*   .then(result =>
    console.log("Total batches inserted: ", result, result.length)
  ) */
  .then(() => {
    disconnectFromMongo();
    console.timeEnd("Time to import parsed objects to db");
  })
  .catch(error => console.log(error));