一组文档中按位置的数组值的平均值

时间:2018-04-22 03:49:26

标签: mongodb aggregation-framework

我最近写了一个我试过的问题,但它导致了一个不同的问题。 original question和@NeilLunn帮助我解决了这个问题。

他创建的脚本然后我修改以供我使用,如下所示:

db.getCollection('widget_documents').aggregate([
{ "$unwind": { "path": "$graph_data", "includeArrayIndex": "index" } },
{ "$group": {
"_id": {
  "group": "$displayname",
  "index": "$index"
},
"graph_data": { "$avg": "$graph_data.value" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
  "_id": "$_id.group",
  "graph_data": { "$push": "$graph_data" }
}},
{ "$sort": { "_id": 1 } }  
])

应该展开一个数组,然后创建每个逗号分隔值的平均值并保持它们的相同位置。但是,值都是null,我无法解决如何修复。见下面的结果:

/* 1 */
{
"_id" : "Accommodation & Functions",
"graph_data" : [
    null,
    null
]
},

/* 2 */
{
"_id" : "Agriculture & Forestry",
"graph_data" : [
    null,
    null
]
},

/* 3 */
{
"_id" : "Business & Professional Services",
"graph_data" : [
    null,
    null
]
}

我包括用于此聚合的数据集的exert。我想要的结果是两个平均值。我还想在每个组中包含一个包含文档数量的字段。

/* 1 createdAt:20/04/2018, 16:12:27*/
{
"_id" : ObjectId("5ad968ab72f71f12a8298435"),
"object_class" : "De-normalised Datapoint",
"object_type" : "website-traffic",
"object_creation_date" : ISODate("2016-10-25T13:37:33.173+13:00"),
"party_uuid" : "b92ffd39-4382-4c48-86a5-3fe5f36aaa70",
"subscription_uuid" : "4f6731ca-0e1e-4808-91f8-8aa46f2f27ec",
"profile_id" : "8198633",
"extras" : [
    {
        "label_key" : "d.3",
        "value_1" : 43,
        "value_2" : 519743
    },
    {
        "label_key" : "d.4",
        "value_1" : 25,
        "value_2" : 236700
    },
    {
        "label_key" : "d.5",
        "value_1" : 33,
        "value_2" : 134790
    },
    {
        "label_key" : "d.6",
        "value_1" : 12,
        "value_2" : 0
    },
    {
        "label_key" : "d.7",
        "value_1" : 10,
        "value_2" : 2407250
    },
    {
        "label_key" : "d.1",
        "value_1" : 32,
        "value_2" : 54143
    },
    {
        "label_key" : "d.2",
        "value_1" : 35,
        "value_2" : 224333
    },
    {
        "label_key" : "d.3",
        "value_1" : 33,
        "value_2" : 70071
    },
    {
        "label_key" : "d.4",
        "value_1" : 28,
        "value_2" : 505857
    },
    {
        "label_key" : "d.5",
        "value_1" : 19,
        "value_2" : 11941
    },
    {
        "label_key" : "d.6",
        "value_1" : 9,
        "value_2" : 205000
    },
    {
        "label_key" : "d.7",
        "value_1" : 12,
        "value_2" : 21400
    },
    {
        "label_key" : "d.1",
        "value_1" : 25,
        "value_2" : 4600
    },
    {
        "label_key" : "d.2",
        "value_1" : 1,
        "value_2" : 10000
    }
],
"graph_data" : [
    {
        "data_set_name" : "unique.visits",
        "value" : [
            35,
            20,
            31,
            11,
            8,
            28,
            30,
            26,
            21,
            17,
            8,
            7,
            20,
            0
        ]
    },
    {
        "data_set_name" : "repeat.visits",
        "value" : [
            8,
            5,
            2,
            1,
            2,
            4,
            5,
            7,
            7,
            2,
            1,
            5,
            5,
            1
        ]
    }
],
"displayname" : "Accommodation & Functions"
},

/* 2 createdAt:20/04/2018, 16:12:27*/
{
"_id" : ObjectId("5ad968ab72f71f12a8298436"),
"object_class" : "De-normalised Datapoint",
"object_type" : "website-traffic",
"object_creation_date" : ISODate("2016-10-06T11:53:58.960+13:00"),
"party_uuid" : "f5b3ca48-52c3-4f3d-b84f-8240e0a4b844",
"subscription_uuid" : "fbfe4f05-3eba-4db5-822c-6996cec71683",
"profile_id" : "71567572",
"extras" : [
    {
        "label_key" : "d.4",
        "value_1" : 212,
        "value_2" : 534000
    },
    {
        "label_key" : "d.5",
        "value_1" : 246,
        "value_2" : 220000
    },
    {
        "label_key" : "d.6",
        "value_1" : 60,
        "value_2" : 179000
    },
    {
        "label_key" : "d.7",
        "value_1" : 36,
        "value_2" : 344000
    },
    {
        "label_key" : "d.1",
        "value_1" : 152,
        "value_2" : 332000
    },
    {
        "label_key" : "d.2",
        "value_1" : 227,
        "value_2" : 426000
    },
    {
        "label_key" : "d.3",
        "value_1" : 314,
        "value_2" : 434000
    },
    {
        "label_key" : "d.4",
        "value_1" : 223,
        "value_2" : 389000
    },
    {
        "label_key" : "d.5",
        "value_1" : 268,
        "value_2" : 269000
    },
    {
        "label_key" : "d.6",
        "value_1" : 145,
        "value_2" : 261000
    },
    {
        "label_key" : "d.7",
        "value_1" : 39,
        "value_2" : 202000
    },
    {
        "label_key" : "d.1",
        "value_1" : 245,
        "value_2" : 336000
    },
    {
        "label_key" : "d.2",
        "value_1" : 203,
        "value_2" : 180000
    },
    {
        "label_key" : "d.3",
        "value_1" : 174,
        "value_2" : 223000
    }
],
"graph_data" : [
    {
        "data_set_name" : "unique.visits",
        "value" : [
            93,
            184,
            27,
            23,
            92,
            95,
            187,
            125,
            174,
            110,
            24,
            137,
            110,
            111
        ]
    },
    {
        "data_set_name" : "repeat.visits",
        "value" : [
            119,
            62,
            33,
            13,
            60,
            132,
            127,
            98,
            94,
            35,
            15,
            108,
            93,
            63
        ]
    }
],
"displayname" : "Retail & Shopping"
},

/* 3 createdAt:20/04/2018, 16:12:27*/
{
"_id" : ObjectId("5ad968ab72f71f12a8298437"),
"object_class" : "De-normalised Datapoint",
"object_type" : "website-traffic",
"object_creation_date" : ISODate("2016-11-14T17:29:30.155+13:00"),
"party_uuid" : "b79eebdb-0bab-45c7-b6ef-1faec1c1c3bb",
"subscription_uuid" : "865768db-49de-4cc9-86f5-960de932e589",
"profile_id" : "71567572",
"extras" : [
    {
        "label_key" : "d.2",
        "value_1" : 163,
        "value_2" : 219024
    },
    {
        "label_key" : "d.3",
        "value_1" : 261,
        "value_2" : 335845
    },
    {
        "label_key" : "d.4",
        "value_1" : 224,
        "value_2" : 506752
    },
    {
        "label_key" : "d.5",
        "value_1" : 292,
        "value_2" : 459927
    },
    {
        "label_key" : "d.6",
        "value_1" : 222,
        "value_2" : 100621
    },
    {
        "label_key" : "d.7",
        "value_1" : 127,
        "value_2" : 141699
    },
    {
        "label_key" : "d.1",
        "value_1" : 256,
        "value_2" : 568735
    },
    {
        "label_key" : "d.2",
        "value_1" : 396,
        "value_2" : 354892
    },
    {
        "label_key" : "d.3",
        "value_1" : 388,
        "value_2" : 481027
    },
    {
        "label_key" : "d.4",
        "value_1" : 375,
        "value_2" : 612040
    },
    {
        "label_key" : "d.5",
        "value_1" : 247,
        "value_2" : 186809
    },
    {
        "label_key" : "d.6",
        "value_1" : 372,
        "value_2" : 91135
    },
    {
        "label_key" : "d.7",
        "value_1" : 272,
        "value_2" : 123998
    },
    {
        "label_key" : "d.1",
        "value_1" : 284,
        "value_2" : 525792
    }
],
"graph_data" : [
    {
        "data_set_name" : "unique.visits",
        "value" : [
            108,
            206,
            146,
            199,
            190,
            110,
            189,
            323,
            309,
            298,
            184,
            350,
            259,
            233
        ]
    },
    {
        "data_set_name" : "repeat.visits",
        "value" : [
            55,
            55,
            78,
            93,
            32,
            17,
            67,
            73,
            79,
            77,
            63,
            22,
            13,
            51
        ]
    }
],
"displayname" : "Cafes, Restaurants, Hotels & Food"
}

它不是完整的集合,但你会得到这个想法。感谢您对此的所有帮助,非常感谢。

1 个答案:

答案 0 :(得分:1)

如上所述,您似乎在解释中缺少$unwind,因为阵列中存在"数组"。因此,那将是:

db.getCollection('widget_documents').aggregate([
  { "$unwind": "$graph_data" },
  { "$unwind": { "path": "$graph_data.value", "includeArrayIndex": "index" } },
  { "$group": {
    "_id": {
      "group": "$displayname",
      "index": "$index"
    },
    "graph_data": { "$avg": "$graph_data.value" }
  }},
  { "$sort": { "_id": 1 } },
  { "$group": {
    "_id": "$_id.group",
    "graph_data": { "$push": "$graph_data" }
  }},
  { "$sort": { "_id": 1 } }  
])

或者如果你真的想要"内部"每个数组条目的"data_set_name",然后你想要类似的东西:

db.getCollection('widget_documents').aggregate([
  { "$unwind": "$graph_data" },
  { "$unwind": { "path": "$graph_data.value", "includeArrayIndex": "index" } },
  { "$group": {
    "_id": {
      "group": {
        "displayname": "$displayname",
        "data_set": "$graph_data.data_set_name"
      },
      "index": "$index"
    },
    "graph_data": { "$avg": "$graph_data.value" }
  }},
  { "$sort": { "_id": 1 } },
  { "$group": {
    "_id": "$_id.group",
    "graph_data": { "$push": "$graph_data" }
  }},
  { "$sort": { "_id": 1 } }  
])

其中任何一个都保持阵列"平均值"他们实际上在文件中的每个位置" group&#34 ;,如果有的话。在您的示例中,这只是每个文档,因为每个文档都有唯一的" displayname"值。