insertMany处理重复错误

时间:2017-11-06 16:09:18

标签: node.js mongodb mongoose

我想将(对象数组)批量插入到我的文档中,但我想防止重复记录,无法找到使用insertMany执行此操作的方法。

const Song = require('../models/song');
Song.insertMany([{id:1, name:"something"},{id:2, name:"something else"])
    .then((result) => {
      res.json({
        result
      })
    })

以上代码有效,但如果记录相同,它仍会被插入。

1 个答案:

答案 0 :(得分:1)

实际上,MongoDB"默认"如果有一个"唯一键,则不会创建重复数据"涉及,其中_id(mongoose为id别名,但被insertMany()忽略,所以你需要小心),但有一个更大的故事,你真的需要注意

这里的基本问题是" mongoose" insertMany()以及底层驱动程序的实现目前都是有点" borked"说得客气一点。这就是驱动程序在" Bulk"中传递错误响应的方式有点不一致。操作,这实际上是由" mongoose"不是真的"寻找合适的地方"对于实际的错误信息。

"快速"您缺少的部分是{" Bulk"添加了{ ordered: false }.insertMany()简单地包含呼叫的操作。设置此项可确保"批次"请求实际上已经提交了#34;完全"并且在发生错误时不会停止执行。

但是因为" mongoose"不能很好地处理这个问题(驱动程序"始终如一")我们实际上需要寻找可能的"错误"在"响应"而不是"错误"潜在回调的结果。

作为示范:

const mongoose = require('mongoose'),
      Schema = mongoose.Schema;

mongoose.Promise = global.Promise;
mongoose.set('debug',true);

const uri = 'mongodb://localhost/test',
      options = { useMongoClient: true };

const songSchema = new Schema({
  _id: Number,
  name: String
});

const Song = mongoose.model('Song', songSchema);

function log(data) {
  console.log(JSON.stringify(data, undefined, 2))
}

let docs = [
  { _id: 1, name: "something" },
  { _id: 2, name: "something else" },
  { _id: 2, name: "something else entirely" },
  { _id: 3, name: "another thing" }
];

mongoose.connect(uri,options)
  .then( () => Song.remove() )
  .then( () =>
    new Promise((resolve,reject) =>
      Song.collection.insertMany(docs,{ ordered: false },function(err,result) {
        if (result.hasWriteErrors()) {
          // Log something just for the sake of it
          console.log('Has Write Errors:');
          log(result.getWriteErrors());

          // Check to see if something else other than a duplicate key, and throw
          if (result.getWriteErrors().some( error => error.code != 11000 ))
            reject(err);
        }
        resolve(result);    // Otherwise resolve
      })
    )
  )
  .then( results => { log(results); return true; } )
  .then( () => Song.find() )
  .then( songs => { log(songs); mongoose.disconnect() })
  .catch( err => { console.error(err); mongoose.disconnect(); } );

或者可能有点好,因为当前的LTS node.js有async/await

const mongoose = require('mongoose'),
      Schema = mongoose.Schema;

mongoose.Promise = global.Promise;
mongoose.set('debug',true);

const uri = 'mongodb://localhost/test',
      options = { useMongoClient: true };

const songSchema = new Schema({
  _id: Number,
  name: String
});

const Song = mongoose.model('Song', songSchema);

function log(data) {
  console.log(JSON.stringify(data, undefined, 2))
}

let docs = [
  { _id: 1, name: "something" },
  { _id: 2, name: "something else" },
  { _id: 2, name: "something else entirely" },
  { _id: 3, name: "another thing" }
];

(async function() {

  try {
    const conn = await mongoose.connect(uri,options);

    await Song.remove();

    let results = await new Promise((resolve,reject) => {
      Song.collection.insertMany(docs,{ ordered: false },function(err,result) {
        if (result.hasWriteErrors()) {
          // Log something just for the sake of it
          console.log('Has Write Errors:');
          log(result.getWriteErrors());

          // Check to see if something else other than a duplicate key, then throw
          if (result.getWriteErrors().some( error => error.code != 11000 ))
            reject(err);
        }
        resolve(result);    // Otherwise resolve

      });
    });

    log(results);

    let songs = await Song.find();
    log(songs);

  } catch(e) {
    console.error(e);
  } finally {
    mongoose.disconnect();
  }


})()

无论如何,你会得到相同的结果,表明写作都是继续进行的,而且我们恭敬地"忽略"与重复键相关的错误"或者称为错误代码11000。 "安全处理"是我们期望这样的错误并丢弃它们,同时寻找其他错误"我们可能只想关注。我们还看到其余代码继续并通过执行后续.find()调用列出实际插入的所有文档:

Mongoose: songs.remove({}, {})
Mongoose: songs.insertMany([ { _id: 1, name: 'something' }, { _id: 2, name: 'something else' }, { _id: 2, name: 'something else entirely' }, { _id: 3, name: 'another thing' } ], { ordered: false })
Has Write Errors:
[
  {
    "code": 11000,
    "index": 2,
    "errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 2 }",
    "op": {
      "_id": 2,
      "name": "something else entirely"
    }
  }
]
{
  "ok": 1,
  "writeErrors": [
    {
      "code": 11000,
      "index": 2,
      "errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 2 }",
      "op": {
        "_id": 2,
        "name": "something else entirely"
      }
    }
  ],
  "writeConcernErrors": [],
  "insertedIds": [
    {
      "index": 0,
      "_id": 1
    },
    {
      "index": 1,
      "_id": 2
    },
    {
      "index": 2,
      "_id": 2
    },
    {
      "index": 3,
      "_id": 3
    }
  ],
  "nInserted": 3,
  "nUpserted": 0,
  "nMatched": 0,
  "nModified": 0,
  "nRemoved": 0,
  "upserted": [],
  "lastOp": {
    "ts": "6485492726828630028",
    "t": 23
  }
}
Mongoose: songs.find({}, { fields: {} })
[
  {
    "_id": 1,
    "name": "something"
  },
  {
    "_id": 2,
    "name": "something else"
  },
  {
    "_id": 3,
    "name": "another thing"
  }
]

为什么这个过程呢?原因是底层调用实际上返回了errresult,如回调实现中所示,但返回的内容不一致。这样做的主要原因是你实际上看到了"结果",它不仅具有成功操作的结果,而且还有错误信息。

除错误信息外,还有nInserted: 3表示"批次"实际上是写的。您可以在此处忽略insertedIds,因为此特定测试涉及实际提供_id值。如果不同的房产有'#34; unique"导致错误的约束,那么这里唯一的值是来自实际成功写入的值。有点误导,但很容易测试和自己看。

如前所述,捕获的是“不确定性”#34;这可以用另一个例子来证明(async/await只是为了简化列表):

const mongoose = require('mongoose'),
      Schema = mongoose.Schema;

mongoose.Promise = global.Promise;
mongoose.set('debug',true);

const uri = 'mongodb://localhost/test',
      options = { useMongoClient: true };

const songSchema = new Schema({
  _id: Number,
  name: String
});

const Song = mongoose.model('Song', songSchema);

function log(data) {
  console.log(JSON.stringify(data, undefined, 2))
}

let docs = [
  { _id: 1, name: "something" },
  { _id: 2, name: "something else" },
  { _id: 2, name: "something else entirely" },
  { _id: 3, name: "another thing" },
  { _id: 4, name: "different thing" },
  //{ _id: 4, name: "different thing again" }
];

(async function() {

  try {
    const conn = await mongoose.connect(uri,options);

    await Song.remove();

    try {
      let results = await Song.insertMany(docs,{ ordered: false });
      console.log('what? no result!');
      log(results);   // not going to get here
    } catch(e) {
      // Log something for the sake of it
      console.log('Has write Errors:');

      // Check to see if something else other than a duplicate key, then throw
      // Branching because MongoError is not consistent
      if (e.hasOwnProperty('writeErrors')) {
        log(e.writeErrors);
        if(e.writeErrors.some( error => error.code !== 11000 ))
          throw e;
      } else if (e.code !== 11000) {
        throw e;
      } else {
        log(e);
      }

    }

    let songs = await Song.find();
    log(songs);

  } catch(e) {
    console.error(e);
  } finally {
    mongoose.disconnect();
  }


})()

完全相同的事情,但要注意错误如何记录在这里:

Has write Errors:
{
  "code": 11000,
  "index": 2,
  "errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 2 }",
  "op": {
    "__v": 0,
    "_id": 2,
    "name": "something else entirely"
  }
}

请注意,没有"成功"信息,即使我们通过执行后续.find()并获得输出来获得列表的相同延续。这是因为实现仅对"抛出错误"在拒绝中,永远不会通过实际的result部分。因此,即使我们要求ordered: false,我们也无法获得有关已完成内容的信息,除非我们自己包装回调并实现逻辑,如初始列表中所示。

另一个重要的"不一致"当出现"多个错误时发生#34;因此,取消注释_id: 4的附加值会给我们:

Has write Errors:
[
  {
    "code": 11000,
    "index": 2,
    "errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 2 }",
    "op": {
      "__v": 0,
      "_id": 2,
      "name": "something else entirely"
    }
  },
  {
    "code": 11000,
    "index": 5,
    "errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 4 }",
    "op": {
      "__v": 0,
      "_id": 4,
      "name": "different thing again"
    }
  }
]

在这里你可以看到代码"分支"存在e.writeErrors时,如果出现一个错误则不存在。相比之下,早期的response对象同时具有hasWriteErrors()getWriteErrors()方法,无论是否存在任何错误。这就是更一致的界面以及您应该使用它而不是单独检查err响应的原因。

MongoDB 3.x驱动程序修复

此行为实际上在即将发布的驱动程序3.x版本中得到修复,该版本与MongoDB 3.6服务器版本一致。行为发生了变化,err响应更类似于标准result,但当然被归类为BulkWriteError响应,而不是目前的MongoError

在发布之前(当然,直到将依赖关系和更改传播到" mongoose"实现),然后建议的行动方针是要知道有用的信息在{{ 1}}和 result。实际上,您的代码可能应该在err中查找hasErrors(),然后回退以检查result,以便满足在驱动程序中实现的更改。

  

作者注意:此内容和相关阅读的大部分内容已在Function insertMany() unordered: proper way to get both the errors and the result?MongoDB Node.js native driver silently swallows bulkWrite exception上得到解答。但是在这里重复和详细说明,直到它最终汇入人们,这是你在当前驱动程序实现中处理异常的方式。当你查看正确的位置并编写代码来处理它时,它确实有效。