我有一个如下所示的作者集合:
{ "_id" : ObjectId("332ddf"),
"authors" : "Mark Twain",
"publisher" : "NY",
"books" : [ "The Adventures of Tom Sawyer", "The Prince and the Pauper" ] }
{ "_id" : ObjectId("4ef342"),
"authors" : "F. Scott Fitzgerald",
"publisher" : "NY",
"books" : [ "The Adventures of Tom Sawyer",
"The Great Gatsby", "This Side of Paradise" ] }
我正在尝试使用聚合来创建新集合。为了聚合,我尝试了:
db.author.aggregate([
{ "$unwind": "$books"},
{ $project:
{
book: "$books",
authors: ["$authors"]
}
}
])
但我得到duplicate key error collection
来解开书籍。如何使用聚合获得所需的集合?
答案 0 :(得分:2)
实际上你不需要$project
阶段。您需要做的就是使用$unwind
运算符“denormalize”“Books”数组,然后使用“book”$group
将文档“{3}}”并使用$push
运算符返回“作者”数组”
var cursor = db.getCollection('authors').aggregate([
{ "$unwind": "$books" },
{ "$group": {
"_id": "$books",
"authors": { "$push": "$authors" }
}}
])
聚合查询产生如下内容:
{ "_id" : "This Side of Paradise", "authors" : [ "F. Scott Fitzgerald" ] }
{ "_id" : "The Great Gatsby", "authors" : [ "F. Scott Fitzgerald" ] }
{ "_id" : "The Prince and the Pauper", "authors" : [ "Mark Twain" ] }
{
"_id" : "The Adventures of Tom Sawyer",
"authors" : [
"Mark Twain",
"F. Scott Fitzgerald"
]
}
顺便说一句,预期结果中的_id
复合字段没有多大意义,所以我删除了它,但如果您确实觉得需要它,那么只需将"_id": "$books"
替换为"_id": { "book": "$books" }
小组赛。
现在让我们看看如何插入到另一个集合中。在下面的评论中提到的一种方法是使用$out
运算符,它必须是聚合管道中的最后一个阶段。
{ "$out": "newCollection" }
如果您需要在将文档插入新集合之前处理结果客户端,则应使用“批量”操作。
var requests = [];
var count = 0;
cursor.forEach(function(document) {
// Do something with the document and push a new operation to the stack
requests.push({ insertOne : document });
count++;
if(count % 1000 === 0) {
db.newCollection.bulkWrite(requests);
requests = [];
count = 0;
}
});
db.newCollection.bulkWrite(requests);
var bulk = db.newCollection.initializeUnorderedBulkOp();
var count = 0;
cursor.forEach(function(document) {
// Do something with the document and push a new operation to the stack
bulk.insert(document);
count++;
if (count % 1000 === 0) {
bulk.execute();
bulk = db.newCollection.initializeUnorderedBulkOp();
}
})
if (count > 0 ) {
bulk.execute();
}