这个问题来自(正如我常常做的那样)仔细阅读SO所提出的问题,因此,为我自己提出另一个问题。因此除了学习练习在努力寻找问题的解决方案之外,我发现另一个问题会出现,比如这个。
OP尚未接受original question,并且实际上尚未澄清“他们”想要实现的目标。但我确实以简单和长形式提供了我的解释。
最后,这个过程让我想知道,考虑到解决方案的长形式,下一个(目前期待的2.6)MongoDB版本是否会引入一些新功能,使用已经引入的其他聚合运算符。
所以案件如下:
{
"tracked_item_type" : "Software",
"tracked_item_name" : "Word",
"duration" : 9540
}
{
"tracked_item_type" : "Software",
"tracked_item_name" : "Excel",
"duration" : 4000
}
{
"tracked_item_type" : "Software",
"tracked_item_name" : "Notepad",
"duration" : 4000
}
{
"tracked_item_type" : "Site",
"tracked_item_name" : "Facebook",
"duration" : 7920
}
{
"tracked_item_type" : "Site",
"tracked_item_name" : "Twitter",
"duration" : 5555
}
{
"tracked_item_type" : "Site",
"tracked_item_name" : "Digital Blasphemy",
"duration" : 8000
}
每种类型的前两名结果,按总持续时间排序。即使这是一个小样本,持续时间也被认为是许多项目的 $ sum 。
{
"tracked_item_type": "Site",
"tracked_item_name": "Digital Blasphemy",
"duration" : 8000
}
{
"tracked_item_type": "Site",
"tracked_item_name": "Facebook",
"duration" : 7920
}
{
"tracked_item_type": "Software",
"tracked_item_name": "Word",
"duration" : 9540
}
{
"tracked_item_type": "Software",
"tracked_item_name": "Notepad",
"duration" : 4000
}
这是解决问题的冗长方法
db.collection.aggregate([
// Group on the types and "sum" of duration
{"$group": {
"_id": {
"tracked_item_type": "$tracked_item_type",
"tracked_item_name": "$tracked_item_name"
},
"duration": {"$sum": "$duration"}
}},
// Sort by type and duration descending
{"$sort": { "_id.tracked_item_type": 1, "duration": -1 }},
/* The fun part */
// Re-shape results to "sites" and "software" arrays
{"$group": {
"_id": null,
"sites": {"$push":
{"$cond": [
{"$eq": ["$_id.tracked_item_type", "Site" ]},
{ "_id": "$_id", "duration": "$duration" },
null
]}
},
"software": {"$push":
{"$cond": [
{"$eq": ["$_id.tracked_item_type", "Software" ]},
{ "_id": "$_id", "duration": "$duration" },
null
]}
}
}},
// Remove the null values for "software"
{"$unwind": "$software"},
{"$match": { "software": {"$ne": null} }},
{"$group": {
"_id": "$_id",
"software": {"$push": "$software"},
"sites": {"$first": "$sites"}
}},
// Remove the null values for "sites"
{"$unwind": "$sites"},
{"$match": { "sites": {"$ne": null} }},
{"$group": {
"_id": "$_id",
"software": {"$first": "$software"},
"sites": {"$push": "$sites"}
}},
// Project out software and limit to the *top* 2 results
{"$unwind": "$software"},
{"$project": {
"_id": 0,
"_id": { "_id": "$software._id", "duration": "$software.duration" },
"sites": "$sites"
}},
{"$limit" : 2},
// Project sites, grouping multiple software per key, requires a sort
// then limit the *top* 2 results
{"$unwind": "$sites"},
{"$group": {
"_id": { "_id": "$sites._id", "duration": "$sites.duration" },
"software": {"$push": "$_id" }
}},
{"$sort": { "_id.duration": -1 }},
{"$limit": 2}
])
聚合未达到最终结果的点。至少我目前的理解。
{
"result" : [
{
"_id" : {
"_id" : {
"tracked_item_type" : "Site",
"tracked_item_name" : "Digital Blasphemy"
},
"duration" : 8000
},
"software" : [
{
"_id" : {
"tracked_item_type" : "Software",
"tracked_item_name" : "Word"
},
"duration" : 9540
},
{
"_id" : {
"tracked_item_type" : "Software",
"tracked_item_name" : "Notepad"
},
"duration" : 4000
}
]
},
{
"_id" : {
"_id" : {
"tracked_item_type" : "Site",
"tracked_item_name" : "Facebook"
},
"duration" : 7920
},
"software" : [
{
"_id" : {
"tracked_item_type" : "Software",
"tracked_item_name" : "Word"
},
"duration" : 9540
},
{
"_id" : {
"tracked_item_type" : "Software",
"tracked_item_name" : "Notepad"
},
"duration" : 4000
}
]
}
],
"ok" : 1
}
这一切似乎都非常合理(对我来说),结果虽然不是完整,但可以在代码中进行后处理以便按摩它成为所需的形式。
但实际上,这似乎是一个练习,并且通过使用任何即将推出的功能进行聚合(或可能是另一个)来实现是否可以实现这一点我找不到想要的结果形式的技术。
所以请随意回答任何关于如何实现这一目标的建议/指示。
答案 0 :(得分:3)
这是一个聚合,在每个类别中按持续时间查找前两个(它确实打破了“关系”,这似乎与您的示例输出一致):
var pregroup = { "$group" : {
"_id" : {
"type" : "$tracked_item_type",
"name" : "$tracked_item_name"
},
"duration" : {
"$sum" : "$duration"
}
}
};
var sort = { "$sort" : { "_id.type" : 1, "duration" : -1 } };
var group1 = { "$group" : {
"_id" : "$_id.type",
"num1" : {
"$first" : {
"name" : "$_id.name",
"dur" : "$duration"
}
},
"other" : {
"$push" : {
"name" : "$_id.name",
"dur" : "$duration"
}
},
"all" : {
"$push" : {
"name" : "$_id.name",
"dur" : "$duration"
}
}
}
};
var unwind = { "$unwind" : "$other" };
project = {
"$project" : {
"keep" : {
"$ne" : [
"$num1.name",
"$other.name"
]
},
"num1" : 1,
"all" : 1,
"other" : 1
}
};
var match = { "$match" : { "keep" : true } };
var sort2 = { "$sort" : { "_id" : 1, "other.dur" : -1 } };
var group2 = { "$group" : {
"_id" : "$_id",
"numberOne" : {
"$first" : "$num1"
},
"numberTwo" : {
"$first" : "$other"
},
"all" : {
"$first" : "$all"
}
}
};
unwind2 = { "$unwind" : "$all" };
project2 = { "$project" : {
"_id" : 0,
"tracked_item_type" : "$_id",
"tracked_item_name" : {
"$cond" : [
{
"$or" : [
{
"$eq" : [
"$all.name",
"$numberOne.name"
]
},
{
"$eq" : [
"$all.name",
"$numberTwo.name"
]
}
]
},
"$all.name",
null
]
},
"duration" : {
"$cond" : [
{
"$or" : [
{
"$eq" : [
"$all.name",
"$numberOne.name"
]
},
{
"$eq" : [
"$all.name",
"$numberTwo.name"
]
}
]
},
"$all.dur",
null
]
}
}
}
match2 = { "$match" : { "tracked_item_name" : { "$ne" : null } } };
使用您的示例数据运行它:
db.top2.aggregate(pregroup, sort, group1, unwind, project, match, sort2, group2, unwind2, project2, match2).toArray()
[
{
"tracked_item_type" : "Software",
"tracked_item_name" : "Word",
"duration" : 9540
},
{
"tracked_item_type" : "Software",
"tracked_item_name" : "Notepad",
"duration" : 4000
},
{
"tracked_item_type" : "Site",
"tracked_item_name" : "Digital Blasphemy",
"duration" : 8000
},
{
"tracked_item_type" : "Site",
"tracked_item_name" : "Facebook",
"duration" : 7920
}
]
这适用于任意数量的域(不同的跟踪项类型值),您无需事先知道所有域名。然而,将它推广到前三,前四,前五等将为每个额外的顶部“N”值增加四个阶段 - 不是很实用或漂亮。
请vote up this jira ticket在聚合框架中获得更多原生的“前N”功能。
答案 1 :(得分:0)
我没想到,但有一个答案可以在 2.6 的实施中找到,其中包括一些很棒的新Set Operators。
所以我想到的(最终)是问题归结为两个单独的列表需要一个,所以如何合并< / strong>这些项目使它们都属于一个字段。所以 运算符 首先是新片段,我将在部分解释: 结果文件: 因此该文档可以说是结果的更好形式,而不是我之前离开的地方,
考虑到项目不再重复,基本上有两个列表,我们希望合并为一个。因此,现在要做的就是使用促进此合并的运算符: 这就把我们带到了这里: 基本上就是这样。现在我们只有四个项目,所以只需要一点“展开”,投影和排序,我们就会得到我正在寻找的确切结果。 所以这就是整个事情,只是为了记录: 显然有一个 drop-off 点,整个方法变得不切实际,因为一般前提是能够将 因此,对于每个“类别”,大的结果数量,那么处理“可能是一种更实用的方法类别“,然后只需将这些结果中的每个限制为所需的前两个项目。 但作为练习,至少我现在知道可以完成。希望这对某人有用。 我仍然有兴趣看看是否有人有其他方法。$setUnion
。>
// So this part just "normalizes" a little so we get one record that essentially has
// two arrays in it
{"$group": {
_id: { _id: null, software: "$software" },
sites: {$push:"$_id" }
}},
{
"_id" : {
"_id" : null,
"software" : [
{
"_id" : {
"tracked_item_type" : "Software",
"tracked_item_name" : "Word"
},
"duration" : 9540
},
{
"_id" : {
"tracked_item_type" : "Software",
"tracked_item_name" : "Notepad"
},
"duration" : 4000
}
]
},
"sites" : [
{
"_id" : {
"tracked_item_type" : "Site",
"tracked_item_name" : "Digital Blasphemy"
},
"duration" : 8000
},
{
"_id" : {
"tracked_item_type" : "Site",
"tracked_item_name" : "Facebook"
},
"duration" : 7920
}
]
}
// Then we just project with a new field, and the "$setUnion" of the two arrays
{"$project": {
"_id": 0,
"records": {"$setUnion": ["$_id.software", "$sites"]}
}},
{
"records" : [
{
"_id" : {
"tracked_item_type" : "Site",
"tracked_item_name" : "Facebook"
},
"duration" : 7920
},
{
"_id" : {
"tracked_item_type" : "Software",
"tracked_item_name" : "Word"
},
"duration" : 9540
},
{
"_id" : {
"tracked_item_type" : "Site",
"tracked_item_name" : "Digital Blasphemy"
},
"duration" : 8000
},
{
"_id" : {
"tracked_item_type" : "Software",
"tracked_item_name" : "Notepad"
},
"duration" : 4000
}
]
}
db.collection.aggregate([
// Group on the types and "sum" of duration
{"$group": {
"_id": {
"tracked_item_type": "$tracked_item_type",
"tracked_item_name": "$tracked_item_name"
},
"duration": {"$sum": "$duration"}
}},
// Sort by type and duration descending
{"$sort": { "_id.tracked_item_type": 1, "duration": -1 }},
/* The fun part */
// Re-shape results to "sites" and "software" arrays
{"$group": {
"_id": null,
"sites": {"$push":
{"$cond": [
{"$eq": ["$_id.tracked_item_type", "Site" ]},
{ "_id": "$_id", "duration": "$duration" },
null
]}
},
"software": {"$push":
{"$cond": [
{"$eq": ["$_id.tracked_item_type", "Software" ]},
{ "_id": "$_id", "duration": "$duration" },
null
]}
}
}},
// Remove the null values for "software"
{"$unwind": "$software"},
{"$match": { "software": {"$ne": null} }},
{"$group": {
"_id": "$_id",
"software": {"$push": "$software"},
"sites": {"$first": "$sites"}
}},
// Remove the null values for "sites"
{"$unwind": "$sites"},
{"$match": { "sites": {"$ne": null} }},
{"$group": {
"_id": "$_id",
"software": {"$first": "$software"},
"sites": {"$push": "$sites"}
}},
// Project out software and limit to the *top* 2 results
{"$unwind": "$software"},
{"$project": {
"_id": 0,
"_id": { "_id": "$software._id", "duration": "$software.duration" },
"sites": "$sites"
}},
{"$limit" : 2},
// Project sites, grouping multiple software per key, requires a sort
// then limit the *top* 2 results
{"$unwind": "$sites"},
{"$group": {
"_id": { "_id": "$sites._id", "duration": "$sites.duration" },
"software": {"$push": "$_id" }
}},
{"$sort": { "_id.duration": -1 }},
{"$limit": 2},
// So this part just "normalizes" a little so we get one record that
// essentially has two arrays in it
{"$group": {
_id: { _id: null, software: "$software" },
sites: {$push:"$_id" }
}},
// Then we just project with a new field, and the "$setUnion" of the two arrays
{"$project": {
"_id": 0,
"records": {"$setUnion": ["$_id.software", "$sites"]}
}},
// Unwind the array to documents
{"$unwind": "$records"},
// Shape the final output
{"$project": {
"tracked_item_type": "$records._id.tracked_item_type",
"tracked_item_name": "$records._id.tracked_item_name",
"duration": "$records.duration"
}},
// Final sort on the result
{"$sort": { "tracked_item_type": 1, "duration": -1 }}
])
$push
所有文档按顺序放入自己的数组中顶部结果可以被拉出,最终会在这些结果上调用$limit
。