我最近写了一个我试过的问题,但它导致了一个不同的问题。 original question和@NeilLunn帮助我解决了这个问题。
他创建的脚本然后我修改以供我使用,如下所示:
db.getCollection('widget_documents').aggregate([
{ "$unwind": { "path": "$graph_data", "includeArrayIndex": "index" } },
{ "$group": {
"_id": {
"group": "$displayname",
"index": "$index"
},
"graph_data": { "$avg": "$graph_data.value" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.group",
"graph_data": { "$push": "$graph_data" }
}},
{ "$sort": { "_id": 1 } }
])
应该展开一个数组,然后创建每个逗号分隔值的平均值并保持它们的相同位置。但是,值都是null,我无法解决如何修复。见下面的结果:
/* 1 */
{
"_id" : "Accommodation & Functions",
"graph_data" : [
null,
null
]
},
/* 2 */
{
"_id" : "Agriculture & Forestry",
"graph_data" : [
null,
null
]
},
/* 3 */
{
"_id" : "Business & Professional Services",
"graph_data" : [
null,
null
]
}
我包括用于此聚合的数据集的exert。我想要的结果是两个平均值。我还想在每个组中包含一个包含文档数量的字段。
/* 1 createdAt:20/04/2018, 16:12:27*/
{
"_id" : ObjectId("5ad968ab72f71f12a8298435"),
"object_class" : "De-normalised Datapoint",
"object_type" : "website-traffic",
"object_creation_date" : ISODate("2016-10-25T13:37:33.173+13:00"),
"party_uuid" : "b92ffd39-4382-4c48-86a5-3fe5f36aaa70",
"subscription_uuid" : "4f6731ca-0e1e-4808-91f8-8aa46f2f27ec",
"profile_id" : "8198633",
"extras" : [
{
"label_key" : "d.3",
"value_1" : 43,
"value_2" : 519743
},
{
"label_key" : "d.4",
"value_1" : 25,
"value_2" : 236700
},
{
"label_key" : "d.5",
"value_1" : 33,
"value_2" : 134790
},
{
"label_key" : "d.6",
"value_1" : 12,
"value_2" : 0
},
{
"label_key" : "d.7",
"value_1" : 10,
"value_2" : 2407250
},
{
"label_key" : "d.1",
"value_1" : 32,
"value_2" : 54143
},
{
"label_key" : "d.2",
"value_1" : 35,
"value_2" : 224333
},
{
"label_key" : "d.3",
"value_1" : 33,
"value_2" : 70071
},
{
"label_key" : "d.4",
"value_1" : 28,
"value_2" : 505857
},
{
"label_key" : "d.5",
"value_1" : 19,
"value_2" : 11941
},
{
"label_key" : "d.6",
"value_1" : 9,
"value_2" : 205000
},
{
"label_key" : "d.7",
"value_1" : 12,
"value_2" : 21400
},
{
"label_key" : "d.1",
"value_1" : 25,
"value_2" : 4600
},
{
"label_key" : "d.2",
"value_1" : 1,
"value_2" : 10000
}
],
"graph_data" : [
{
"data_set_name" : "unique.visits",
"value" : [
35,
20,
31,
11,
8,
28,
30,
26,
21,
17,
8,
7,
20,
0
]
},
{
"data_set_name" : "repeat.visits",
"value" : [
8,
5,
2,
1,
2,
4,
5,
7,
7,
2,
1,
5,
5,
1
]
}
],
"displayname" : "Accommodation & Functions"
},
/* 2 createdAt:20/04/2018, 16:12:27*/
{
"_id" : ObjectId("5ad968ab72f71f12a8298436"),
"object_class" : "De-normalised Datapoint",
"object_type" : "website-traffic",
"object_creation_date" : ISODate("2016-10-06T11:53:58.960+13:00"),
"party_uuid" : "f5b3ca48-52c3-4f3d-b84f-8240e0a4b844",
"subscription_uuid" : "fbfe4f05-3eba-4db5-822c-6996cec71683",
"profile_id" : "71567572",
"extras" : [
{
"label_key" : "d.4",
"value_1" : 212,
"value_2" : 534000
},
{
"label_key" : "d.5",
"value_1" : 246,
"value_2" : 220000
},
{
"label_key" : "d.6",
"value_1" : 60,
"value_2" : 179000
},
{
"label_key" : "d.7",
"value_1" : 36,
"value_2" : 344000
},
{
"label_key" : "d.1",
"value_1" : 152,
"value_2" : 332000
},
{
"label_key" : "d.2",
"value_1" : 227,
"value_2" : 426000
},
{
"label_key" : "d.3",
"value_1" : 314,
"value_2" : 434000
},
{
"label_key" : "d.4",
"value_1" : 223,
"value_2" : 389000
},
{
"label_key" : "d.5",
"value_1" : 268,
"value_2" : 269000
},
{
"label_key" : "d.6",
"value_1" : 145,
"value_2" : 261000
},
{
"label_key" : "d.7",
"value_1" : 39,
"value_2" : 202000
},
{
"label_key" : "d.1",
"value_1" : 245,
"value_2" : 336000
},
{
"label_key" : "d.2",
"value_1" : 203,
"value_2" : 180000
},
{
"label_key" : "d.3",
"value_1" : 174,
"value_2" : 223000
}
],
"graph_data" : [
{
"data_set_name" : "unique.visits",
"value" : [
93,
184,
27,
23,
92,
95,
187,
125,
174,
110,
24,
137,
110,
111
]
},
{
"data_set_name" : "repeat.visits",
"value" : [
119,
62,
33,
13,
60,
132,
127,
98,
94,
35,
15,
108,
93,
63
]
}
],
"displayname" : "Retail & Shopping"
},
/* 3 createdAt:20/04/2018, 16:12:27*/
{
"_id" : ObjectId("5ad968ab72f71f12a8298437"),
"object_class" : "De-normalised Datapoint",
"object_type" : "website-traffic",
"object_creation_date" : ISODate("2016-11-14T17:29:30.155+13:00"),
"party_uuid" : "b79eebdb-0bab-45c7-b6ef-1faec1c1c3bb",
"subscription_uuid" : "865768db-49de-4cc9-86f5-960de932e589",
"profile_id" : "71567572",
"extras" : [
{
"label_key" : "d.2",
"value_1" : 163,
"value_2" : 219024
},
{
"label_key" : "d.3",
"value_1" : 261,
"value_2" : 335845
},
{
"label_key" : "d.4",
"value_1" : 224,
"value_2" : 506752
},
{
"label_key" : "d.5",
"value_1" : 292,
"value_2" : 459927
},
{
"label_key" : "d.6",
"value_1" : 222,
"value_2" : 100621
},
{
"label_key" : "d.7",
"value_1" : 127,
"value_2" : 141699
},
{
"label_key" : "d.1",
"value_1" : 256,
"value_2" : 568735
},
{
"label_key" : "d.2",
"value_1" : 396,
"value_2" : 354892
},
{
"label_key" : "d.3",
"value_1" : 388,
"value_2" : 481027
},
{
"label_key" : "d.4",
"value_1" : 375,
"value_2" : 612040
},
{
"label_key" : "d.5",
"value_1" : 247,
"value_2" : 186809
},
{
"label_key" : "d.6",
"value_1" : 372,
"value_2" : 91135
},
{
"label_key" : "d.7",
"value_1" : 272,
"value_2" : 123998
},
{
"label_key" : "d.1",
"value_1" : 284,
"value_2" : 525792
}
],
"graph_data" : [
{
"data_set_name" : "unique.visits",
"value" : [
108,
206,
146,
199,
190,
110,
189,
323,
309,
298,
184,
350,
259,
233
]
},
{
"data_set_name" : "repeat.visits",
"value" : [
55,
55,
78,
93,
32,
17,
67,
73,
79,
77,
63,
22,
13,
51
]
}
],
"displayname" : "Cafes, Restaurants, Hotels & Food"
}
它不是完整的集合,但你会得到这个想法。感谢您对此的所有帮助,非常感谢。
答案 0 :(得分:1)
如上所述,您似乎在解释中缺少$unwind
,因为阵列中存在"数组"。因此,那将是:
db.getCollection('widget_documents').aggregate([
{ "$unwind": "$graph_data" },
{ "$unwind": { "path": "$graph_data.value", "includeArrayIndex": "index" } },
{ "$group": {
"_id": {
"group": "$displayname",
"index": "$index"
},
"graph_data": { "$avg": "$graph_data.value" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.group",
"graph_data": { "$push": "$graph_data" }
}},
{ "$sort": { "_id": 1 } }
])
或者如果你真的想要"内部"每个数组条目的"data_set_name"
,然后你想要类似的东西:
db.getCollection('widget_documents').aggregate([
{ "$unwind": "$graph_data" },
{ "$unwind": { "path": "$graph_data.value", "includeArrayIndex": "index" } },
{ "$group": {
"_id": {
"group": {
"displayname": "$displayname",
"data_set": "$graph_data.data_set_name"
},
"index": "$index"
},
"graph_data": { "$avg": "$graph_data.value" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.group",
"graph_data": { "$push": "$graph_data" }
}},
{ "$sort": { "_id": 1 } }
])
其中任何一个都保持阵列"平均值"他们实际上在文件中的每个位置" group&#34 ;,如果有的话。在您的示例中,这只是每个文档,因为每个文档都有唯一的" displayname"值。