我的收藏有以下记录:
import random,time
def example():
while random.randint(0,10) != 1:
time.sleep(1)
print "down"
example()
在插入新记录之前,我需要检查是否已存在具有相同field1和field2值的记录。然后丢弃该请求(如果已存在)。如果我一次插入一条记录,我可以设法做到这一点。如果我正在进行批量插入(即插入文档数组时),我该如何处理?
我需要查找{ "_id":"1", "field1":"foo","field2":"xyz", "field3":"something" ...}
{ "_id":"2", "field1":"bar","field2":"xyz", "field3":"something" ...}
{ "_id":"3", "field1":"foo","field2":"abc", "field3":"something" ...}
{ "_id":"4", "field1":"bar","field2":"lmn", "field3":"something" ...}
组合的数组
EX:
[field1, field2]
预期结果:
queryArray=[ { "field1":"foo","field2":"xyz"},
{ "field1":"bar","field2":"lmn"} ]
答案 0 :(得分:4)
在两个字段
上创建unique compound indexdb.collection.createIndex( { "field1": 1, "field2": 1 }, { "unique": true } )
使用 insertMany()
方法执行批量插入,但将ordered
选项设置为false,因为这将确保尝试所有写入操作,即使存在错误。有序操作在发生错误后停止,而无序操作继续处理队列中的任何剩余写入操作:
var queryArray = [
{ "field1": "foo", "field2": "xyz" },
{ "field1": "bar", "field2": "lmn" }
];
try { db.collection.insertMany(queryArray, { "ordered": false }); }
catch (e) { print (e); }
这将输出文件
{
"acknowledged" : true,
"insertedIds" : [
ObjectId("57443e6fa58e5654f3a6c5ae"),
ObjectId("57443e6fa58e5654f3a6c5af")
]
}
如果操作以写入问题运行,则生成的文档显示确认为true的字段;如果禁用写入关注,则显示为false,并为每个成功插入的文档显示_id
数组。
因为queryArray中的文档不包含_id
,所以mongod会为每个文档创建并添加_id字段,并为其指定唯一的ObjectId值。由于您在两个字段field1
和field2
上强制执行唯一性,因此上面显示了尝试写入,因为操作是无序的,因此它继续处理任何剩余的写操作。
假设您已删除了有序选项(默认情况下它已设置为true),您将从操作中获得以下输出:
var queryArray = [
{ "field1": "foo", "field2": "xyz" },
{ "field1": "bar", "field2": "lmn" }
];
try { db.collection.insertMany(queryArray); }
catch (e) { print (e); }
控制台输出:
{
"name" : "BulkWriteError",
"message" : "write error at item 0 in bulk operation",
"ok" : undefined,
"nInserted" : 0,
"nUpserted" : 0,
"nMatched" : 0,
"nModified" : 0,
"nRemoved" : 0,
"getUpsertedIds" : function () {
return bulkResult.upserted;
},
"getUpsertedIdAt" : function (index) {
return bulkResult.upserted[index];
},
"getRawResponse" : function () {
return bulkResult;
},
"hasWriteErrors" : function () {
return bulkResult.writeErrors.length > 0;
},
"getWriteErrorCount" : function () {
return bulkResult.writeErrors.length;
},
"getWriteErrorAt" : function (index) {
if(index < bulkResult.writeErrors.length) {
return bulkResult.writeErrors[index];
}
return null;
},
"getWriteErrors" : function () {
return bulkResult.writeErrors;
},
"hasWriteConcernError" : function () {
return bulkResult.writeConcernErrors.length > 0;
},
"getWriteConcernError" : function () {
if(bulkResult.writeConcernErrors.length == 0) {
return null;
} else if(bulkResult.writeConcernErrors.length == 1) {
// Return the error
return bulkResult.writeConcernErrors[0];
} else {
// Combine the errors
var errmsg = "";
for(var i = 0; i < bulkResult.writeConcernErrors.length; i++) {
var err = bulkResult.writeConcernErrors[i];
errmsg = errmsg + err.errmsg;
// TODO: Something better
if (i != bulkResult.writeConcernErrors.length - 1) {
errmsg = errmsg + " and ";
}
}
return new WriteConcernError({ errmsg : errmsg, code : WRITE_CONCERN_FAILED });
}
},
"tojson" : function (indent, nolint) {
return tojson(bulkResult, indent, nolint);
},
"toString" : function () {
return "BulkWriteError(" + this.tojson() + ")";
},
"shellPrint" : function () {
return this.toString();
},
"hasErrors" : function () {
return this.hasWriteErrors() || this.hasWriteConcernError();
},
"toSingleResult" : function () {
if(singleBatchType == null) throw Error(
"Cannot output single WriteResult from multiple batch result");
return new WriteResult(bulkResult, singleBatchType, writeConcern);
},
"stack" : "BulkWriteError({\n\t\"writeErrors\" : [\n\t\t{\n\t\t\t\"index\" : 0,\n\t\t\t\"code\" : 11000,\n\t\t\t\"errmsg\" : \"E11000 duplicate key error index: test.collection.$field1_1_field2_1 dup key: { : \\\"foo\\\", : \\\"xyz\\\" }\",\n\t\t\t\"op\" : {\n\t\t\t\t\"_id\" : ObjectId(\"574441aea58e5654f3a6c5b6\"),\n\t\t\t\t\"field1\" : \"foo\",\n\t\t\t\t\"field2\" : \"xyz\"\n\t\t\t}\n\t\t}\n\t],\n\t\"writeConcernErrors\" : [ ],\n\t\"nInserted\" : 0,\n\t\"nUpserted\" : 0,\n\t\"nMatched\" : 0,\n\t\"nModified\" : 0,\n\t\"nRemoved\" : 0,\n\t\"upserted\" : [ ]\n})\nBulkWriteError@src/mongo/shell/bulk_api.js:372:44\nBulkWriteResult/this.toError@src/mongo/shell/bulk_api.js:335:16\nBulk/this.execute@src/mongo/shell/bulk_api.js:1162:1\nDBCollection.prototype.insertMany@src/mongo/shell/crud_api.js:279:5\n@(shell):1:7\n",
"toResult" : function () {
return new BulkWriteResult(bulkResult, singleBatchType, writeConcern);
}
}
强调返回的写入错误:
"E11000 duplicate key error index: test.collection.$field1_1_field2_1 dup key: { : \\\"foo\\\", : \\\"xyz\\\" }\"
除了 insertMany()
方法之外,您还可以尝试使用 Bulk()
API方法,尤其是您需要调用创建唯一复合索引后执行无序批量插入的initializeUnorderedBulkOp()
方法。
对于上述情况,请考虑以下示例:
db.collection('collectionName', function(err, collection) {
var bulk = collection.initializeUnorderedBulkOp();
counter = 0;
queryArray.forEach(function (doc){
bulk.insert(doc);
counter++;
if (counter % 1000 == 0) {
bulk.execute(function(err, result) {
// you could do something with results, check for duplicate errors
bulk = collection.initializeUnorderedBulkOp(); // re-initialise
});
}
});
// Clean-up remaining operations in the queue
if (counter % 1000 != 0 ) {
bulk.execute(function(err, result) {
// you could do something with results, check for duplicate errors
console.log(result);
});
}
});
答案 1 :(得分:0)
通过https://docs.mongodb.com/manual/reference/method/Bulk.find.update/#Bulk.find.update,看起来我们可以使用批量操作方法。 但是,请注意,您仍然需要为查询数组中的每个文档调用bulk.find()。upsert()。updateOne(),因为您要查询并更新queryArray中的相应文档。
尽管如此,使用bulk可能会更好,因为您可以在准备之后一次执行查询,而不是逐个执行每个查询。
e.g。使用MongoDB shell
> //verifying db is empty
> db.items.find()
>
> //creating my bulk update function
> var myUpdate = function (rec) { this.find(rec).upsert().updateOne( {$set: rec} ); return null; }
>
> //initializing docs array with 4 documents
> var docs =
... [
... { "_id":"1", "field1":"foo","field2":"xyz", "field3":"something31"},
... { "_id":"2", "field1":"bar","field2":"xyz", "field3":"something32"},
... { "_id":"3", "field1":"foo","field2":"abc", "field3":"something33"},
... { "_id":"4", "field1":"bar","field2":"lmn", "field3":"something34", "field4": "something44" }
... ]
>
> //initializing the bulk operation object
> var bulk = db.items.initializeUnorderedBulkOp();
>
> //checking current state of bulk object
> bulk
{ "nInsertOps" : 0, "nUpdateOps" : 0, "nRemoveOps" : 0, "nBatches" : 0 }
>
> //maping each doc in the docs array to my update function
> docs.map(myUpdate, bulk);
[ null, null, null, null ]
>
> //checking current state of bulk object
> bulk
{ "nInsertOps" : 0, "nUpdateOps" : 4, "nRemoveOps" : 0, "nBatches" : 1 }
>
>
> //now actually bulk updating the db
> bulk.execute();
BulkWriteResult({
"writeErrors" : [ ],
"writeConcernErrors" : [ ],
"nInserted" : 0,
"nUpserted" : 4,
"nMatched" : 0,
"nModified" : 0,
"nRemoved" : 0,
"upserted" : [
{
"index" : 0,
"_id" : "1"
},
{
"index" : 1,
"_id" : "2"
},
{
"index" : 2,
"_id" : "3"
},
{
"index" : 3,
"_id" : "4"
}
]
})
>
>
> //checking for newly insert docs
> db.items.find();
{ "_id" : "1", "field1" : "foo", "field2" : "xyz", "field3" : "something31" }
{ "_id" : "2", "field1" : "bar", "field2" : "xyz", "field3" : "something32" }
{ "_id" : "3", "field1" : "foo", "field2" : "abc", "field3" : "something33" }
{ "_id" : "4", "field1" : "bar", "field2" : "lmn", "field3" : "something34", "field4" : "something44" }
>
>
> //now preparing to upsert new docs (might be existing docs, per your example)
> var newDocs =
... [
... { "field1":"foo","field2":"xyz"},
... { "field1":"bar","field2":"lmn"}
... ]
>
>
> //initializing the bulk object
> var bulk = db.items.initializeUnorderedBulkOp();
>
> //mapping the myUpdate function to each new document in the newDocs array
> newDocs.map(myUpdate, bulk);
[ null, null ]
>
>
> //now actually bulk updating the db
> bulk.execute();
BulkWriteResult({
"writeErrors" : [ ],
"writeConcernErrors" : [ ],
"nInserted" : 0,
"nUpserted" : 0,
"nMatched" : 2,
"nModified" : 0,
"nRemoved" : 0,
"upserted" : [ ]
})
>
> //notice how the nMatched = 2, and nModified = 0
>
> //verifying that nothing changed in the db
> db.items.find( {$or: newDocs} )
{ "_id" : "1", "field1" : "foo", "field2" : "xyz", "field3" : "something31" }
{ "_id" : "4", "field1" : "bar", "field2" : "lmn", "field3" : "something34", "field4" : "something44" }
>
>