我有两个我想要合并的集合操作。第一个操作返回,例如:
{ "_id" : "Colors", "count" : 12 }
{ "_id" : "Animals", "count" : 6 }
并且第二个操作返回,例如:
{ "_id" : "Red", "count" : 10 }
{ "_id" : "Blue", "count" : 9 }
{ "_id" : "Green", "count" : 9 }
{ "_id" : "White", "count" : 7 }
{ "_id" : "Yellow", "count" : 7 }
{ "_id" : "Orange", "count" : 7 }
{ "_id" : "Black", "count" : 5 }
{ "_id" : "Goose", "count" : 4 }
{ "_id" : "Chicken", "count" : 3 }
{ "_id" : "Grey", "count" : 3 }
{ "_id" : "Cat", "count" : 3 }
{ "_id" : "Rabbit", "count" : 3 }
{ "_id" : "Duck", "count" : 3 }
{ "_id" : "Turkey", "count" : 2 }
{ "_id" : "Elephant", "count" : 2 }
{ "_id" : "Shark", "count" : 2 }
{ "_id" : "Fish", "count" : 2 }
{ "_id" : "Tiger", "count" : 2 }
{ "_id" : "Purple", "count" : 1 }
{ "_id" : "Pink", "count" : 1 }
如何组合这两项操作以实现以下目标?
{ "_id" : "Colors", "count" : 12, "items" :
[
{ "_id" : "Red", "count" : 10 },
{ "_id" : "Blue", "count" : 9 },
{ "_id" : "Green", "count" : 9 },
{ "_id" : "White", "count" : 7 },
{ "_id" : "Yellow", "count" : 7 },
{ "_id" : "Orange", "count" : 7 },
{ "_id" : "Black", "count" : 5 },
{ "_id" : "Grey", "count" : 3 },
{ "_id" : "Purple", "count" : 1 },
{ "_id" : "Pink", "count" : 1 }
]
},
{ "_id" : "Animals", "count" : 6, "items" :
[
{ "_id" : "Goose", "count" : 4 },
{ "_id" : "Chicken", "count" : 3 },
{ "_id" : "Cat", "count" : 3 },
{ "_id" : "Rabbit", "count" : 3 },
{ "_id" : "Duck", "count" : 3 },
{ "_id" : "Turkey", "count" : 2 },
{ "_id" : "Elephant", "count" : 2 },
{ "_id" : "Shark", "count" : 2 },
{ "_id" : "Fish", "count" : 2 },
{ "_id" : "Tiger", "count" : 2 }
]
}
模式
var ListSchema = new Schema({
created: {
type: Date,
default: Date.now
},
title: {
type: String,
default: '',
trim: true,
required: 'Title cannot be blank'
},
items: {
type: Array,
default: [String],
trim: true
},
creator: {
type: Schema.ObjectId,
ref: 'User'
}
});
操作1
db.lists.aggregate(
[
{ $group: { _id: "$title", count: { $sum: 1 } } },
{ $sort: { count: -1 } }
]
)
操作2
db.lists.aggregate(
[
{ $unwind: "$items" },
{ $group: { _id: "$items", count: { $sum: 1 } } },
{ $sort: { count: -1 } }
]
)
答案 0 :(得分:3)
这实际上取决于您在重复过程中所获得的结果类型。你问的事情似乎表明你正在寻找"方面的数量"结果,但我稍后会谈到这一点。
作为基本结果,作为一种方法,这没有任何问题:
Thing.aggregate(
[
{ "$group": {
"_id": {
"type": "$type", "name": "$name"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.type",
"count": { "$sum": "$count" },
"names": {
"$push": { "name": "$_id.name", "count": "$count" }
}
}}
],
function(err,results) {
console.log(JSON.stringify(results, undefined, 2));
callback(err);
}
)
哪个应该给你这样的结果:
[
{
"_id": "colours",
"count": 50102,
"names": [
{ "name": "Green", "count": 9906 },
{ "name": "Yellow", "count": 10093 },
{ "name": "Red", "count": 10083 },
{ "name": "Orange", "count": 9997 },
{ "name": "Blue", "count": 10023 }
]
},
{
"_id": "animals",
"count": 49898,
"names": [
{ "name": "Tiger", "count": 9710 },
{ "name": "Lion", "count": 10058 },
{ "name": "Elephant", "count": 10069 },
{ "name": "Monkey", "count": 9963 },
{ "name": "Bear", "count": 10098 }
]
}
]
这里最基本的方法是在两个阶段中简单地$group
,其中第一阶段将密钥组合聚合到最低(最细粒度)分组级别,然后处理{{1}再次基本上"加起来"最高(最小颗粒)分组级别的总计,也因此将较低的结果添加到项目数组中。
但这不是"分开"因为它会在" facet计数",所以这样做会变得有点复杂,而且会更加疯狂。但首先是例子:
$group
这将产生如下输出:
Thing.aggregate(
[
{ "$group": {
"_id": {
"type": "$type",
"name": "$name"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.type",
"count": { "$sum": "$count" },
"names": {
"$push": { "name": "$_id.name", "count": "$count" }
}
}},
{ "$group": {
"_id": null,
"types": {
"$push": {
"type": "$_id", "count": "$count"
}
},
"names": { "$push": "$names" }
}},
{ "$unwind": "$names" },
{ "$unwind": "$names" },
{ "$group": {
"_id": "$types",
"names": { "$push": "$names" }
}},
{ "$project": {
"_id": 0,
"facets": {
"types": "$_id",
"names": "$names",
},
"data": { "$literal": [] }
}}
],
function(err,results) {
console.log(JSON.stringify(results[0], undefined, 2));
callback(err);
}
);
虽然是"可能",那种"杂耍"在管道中进行此处生成此输出格式并不是非常有效。与第一个示例相比,这里有很多开销只是为了简单地将结果拆分成它们自己的数组响应并且独立于分组键。对于更多的方面而言,这显然变得更加复杂。生成。
正如输出中暗示的那样,人们通常会问的问题是" facet计数"是结果"数据"除了聚合方面之外,还包括在响应中(可能是分页的)。所以进一步的复杂性应该在这里显而易见:
{
"facets": {
"types": [
{ "type": "colours", "count": 50102 },
{ "type": "animals", "count": 49898 }
],
"names": [
{ "name": "Green", "count": 9906 },
{ "name": "Yellow", "count": 10093 },
{ "name": "Red", "count": 10083 },
{ "name": "Orange", "count": 9997 },
{ "name": "Blue", "count": 10023 },
{ "name": "Tiger", "count": 9710 },
{ "name": "Lion", "count": 10058 },
{ "name": "Elephant", "count": 10069 },
{ "name": "Monkey", "count": 9963 },
{ "name": "Bear", "count": 10098 }
]
},
"data": []
}
这种操作的要求基本上是"东西"将每条数据转换为单个对象。在大多数情况下,当然你想要结果中的实际数据(在此示例中使用100,000),遵循这种方法变得完全不切实际,并且几乎肯定会超过16MB的BSON文档限制大小。
在这种情况下,你想要产生结果和" facets"这些数据在响应中,那么这里最好的方法是将每个聚合和输出页面作为单独的查询操作运行,并且" stream"输出JSON(或其他格式)返回给接收客户端。
作为一个自包含的例子:
{ "$group": {
"_id": null,
(...)
输出如:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/things');
var data = {
"colours": [
"Red","Blue","Green","Yellow","Orange"
],
"animals": [
"Lion","Tiger","Bear","Elephant","Monkey"
]
},
dataKeys = Object.keys(data);
var thingSchema = new Schema({
"name": String,
"type": String
});
var Thing = mongoose.model( 'Thing', thingSchema );
var writer = process.stdout;
mongoose.connection.on("open",function(err) {
if (err) throw err;
async.series(
[
function(callback) {
process.stderr.write("removing\n");
Thing.remove({},callback);
},
function(callback) {
process.stderr.write("inserting\n");
var bulk = Thing.collection.initializeUnorderedBulkOp(),
count = 0;
async.whilst(
function() { return count < 100000; },
function(callback) {
var keyLen = dataKeys.length,
keyIndex = Math.floor(Math.random(keyLen)*keyLen),
type = dataKeys[keyIndex],
types = data[type],
typeLen = types.length,
nameIndex = Math.floor(Math.random(typeLen)*typeLen),
name = types[nameIndex];
var obj = { "type": type, "name": name };
bulk.insert(obj);
count++;
if ( count % 1000 == 0 ) {
process.stderr.write('insert count: ' + count + "\n");
bulk.execute(function(err,resp) {
bulk = Thing.collection.initializeUnorderedBulkOp();
callback(err);
});
} else {
callback();
}
},
callback
);
},
function(callback) {
writer.write("{ \n \"page\": 1,\n \"pageSize\": 25,\n")
writer.write(" \"facets\": {\n"); // open object response
var stream = Thing.collection.aggregate(
[
{ "$group": {
"_id": "$name",
"count": { "$sum": 1 }
}}
],
{
"cursor": {
"batchSize": 1000
}
}
);
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"names\": [\n");
} else {
writer.write(",\n");
}
data = { "name": data._id, "count": data.count };
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ],\n");
var stream = Thing.collection.aggregate(
[
{ "$group": {
"_id": "$type",
"count": { "$sum": 1 }
}}
],
{
"cursor": {
"batchSize": 1000
}
}
);
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"types\": [\n");
} else {
writer.write(",\n");
}
data = { "name": data._id, "count": data.count };
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ]\n },\n");
var stream = Thing.find({}).limit(25).stream();
var counter = 0;
stream.on("data",function(data) {
stream.pause();
if ( counter == 0 ) {
writer.write(" \"data\": [\n");
} else {
writer.write(",\n");
}
writer.write(" " + JSON.stringify(data));
counter++;
stream.resume();
});
stream.on("end",function() {
writer.write("\n ]\n}\n");
callback();
});
});
});
}
],
function(err) {
if (err) throw err;
process.exit();
}
);
});
这里有一些注意事项,特别是mongoose .aggregate()
并不真正直接支持标准节点流接口。 .cursor()
可以在聚合方法上使用{
"page": 1,
"pageSize": 25,
"facets": {
"names": [
{"name":"Red","count":10007},
{"name":"Tiger","count":10012},
{"name":"Yellow","count":10119},
{"name":"Monkey","count":9970},
{"name":"Elephant","count":10046},
{"name":"Bear","count":10082},
{"name":"Orange","count":9982},
{"name":"Green","count":10005},
{"name":"Blue","count":9884},
{"name":"Lion","count":9893}
],
"types": [
{"name":"colours","count":49997},
{"name":"animals","count":50003}
]
},
"data": [
{"_id":"55bf141f3edc150b6abdcc02","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc81b","type":"colours","name":"Blue"},
{"_id":"55bf141f3edc150b6abdc81c","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc81d","type":"animals","name":"Bear"},
{"_id":"55bf141f3edc150b6abdc81e","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc81f","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc820","type":"colours","name":"Green"},
{"_id":"55bf141f3edc150b6abdc821","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc822","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc823","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc824","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc825","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc826","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc827","type":"colours","name":"Blue"},
{"_id":"55bf141f3edc150b6abdc828","type":"animals","name":"Tiger"},
{"_id":"55bf141f3edc150b6abdc829","type":"colours","name":"Red"},
{"_id":"55bf141f3edc150b6abdc82a","type":"animals","name":"Monkey"},
{"_id":"55bf141f3edc150b6abdc82b","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc82c","type":"animals","name":"Tiger"},
{"_id":"55bf141f3edc150b6abdc82d","type":"animals","name":"Bear"},
{"_id":"55bf141f3edc150b6abdc82e","type":"colours","name":"Yellow"},
{"_id":"55bf141f3edc150b6abdc82f","type":"animals","name":"Lion"},
{"_id":"55bf141f3edc150b6abdc830","type":"animals","name":"Elephant"},
{"_id":"55bf141f3edc150b6abdc831","type":"colours","name":"Orange"},
{"_id":"55bf141f3edc150b6abdc832","type":"animals","name":"Elephant"}
]
}
方法,但&#34;流&#34; core API method隐含的内容在此处提供了更多控制权,因此.each()
此处获取基础driver object的方法更为可取。希望未来的mongoose版本会考虑这一点。
所以,如果你的最终目标是这样的&#34;方面数&#34;除了这里展示的结果,然后每个聚合和结果最有意义的是&#34; stream&#34;在所示的方式。如果没有这个,聚合变得过于复杂,并且很可能超过BSON限制,就像在这种情况下的其他情况一样。