在展开元素后对多维数组进行分组

时间:2015-07-02 15:22:59

标签: mongodb multidimensional-array mongodb-query aggregation-framework

再次使用mongoDB。我真的很喜欢聚合,但仍然不能“得到它”。

所以这是我的数组:

{
    "_id" : ObjectId("55951b2bf41edfc80b00002a"),
    "orders" : [ 
    {
        "id" : "55929142f41edfdc0f00002f",
        "name" : "XYZ",
        "id_basket" : 1,
        "card" : [ 
            {
                "id" : "250",
                "serial" : "B",
                "type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : {
                            "name" : "Normal",
                            "price" : "10",
                            "price_disp" : "10 €",
                        }
                    },
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : {
                            "name" : "Normal",
                            "price" : "10",
                            "price_disp" : "10 €",
                        }
                    }
                ]
            },
            {
                "id" : "250",
                "serial" : "B",
                "type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : {
                            "name" : "Normal",
                            "price" : "10",
                            "price_disp" : "10 €",
                        }
                    },
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : {
                            "name" : "Normal",
                            "price" : "10",
                            "price_disp" : "10 €",
                        }
                    }
                ]
            }
        ],
        "full_amount" : "40",
    },
    {
        "id" : "55929142f41edfdc0f00002f",
        "name" : "XYZ",
        "id_basket" : 1,
        "card" : [ 
            {
                "id" : "250",
                "serial" : "B",
                "type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : {
                            "name" : "Normal",
                            "price" : "10",
                            "price_disp" : "10 €",
                        }
                    },
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : {
                            "name" : "Normal",
                            "price" : "10",
                            "price_disp" : "10 €",
                        }
                    }
                ]
            },
            {
                "id" : "250",
                "serial" : "B",
                "type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : {
                            "name" : "Normal",
                            "price" : "10",
                            "price_disp" : "10 €",
                        }
                    },
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : {
                            "name" : "Normal",
                            "price" : "10",
                            "price_disp" : "10 €",
                        }
                    }
                ]
            }
        ],
        "full_amount" : "40",
    },
],
"rate" : "0.23",
"date" : "2015-07-02 13:04:34",
"id_user" : 97,
}

我想输出这样的东西:

{
    "_id" : ObjectId("55951b2bf41edfc80b00002a"),
    "orders" : [ 
    {
        "id" : "55929142f41edfdc0f00002f",
        "name" : "XYZ",
        "card" : [ 
            {
                "id" : "250",
                "serial" : "B",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    },
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    }
                ]
            },
            {
                "id" : "250",
                "serial" : "B",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    },
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    }
                ]
            }
        ],
        "full_amount" : "40",
    },
    {
        "id" : "55929142f41edfdc0f00002f",
        "name" : "XYZ",
        "card" : [ 
            {
                "id" : "250",
                "serial" : "B",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    },
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    }
                ]
            },
            {
                "id" : "250",
                "serial" : "B",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    },
                    {
                        "id" : "55927d41f41edfd00f000030",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    }
                ]
            }
        ],
        "full_amount" : "40",
    },
],
"rate" : "0.23",
"date" : "2015-07-02 13:04:34",
}

我尝试了许多组合,包括展开,投射和分组,但未能得到我想要的东西。有人可以帮我这个吗?

1 个答案:

答案 0 :(得分:2)

您可能不应该将聚合框架用于此类任务,而这些任务实际上并未在文档之间“聚合”任何内容。这实际上是一个“投影”任务,因为你所要求的只是“改变”文档的结构,这个任务可能更适合在检索文档后在客户端编码。

这样做的一个很好的理由是,$unwind等操作在性能方面非常昂贵$unwind所做的是为每个存在的数组成员生成文档内容的“副本”,这会导致需要处理更多文档。

将其视为具有“一对多”关系的“SQL Join”,唯一的区别是数据自包含在一个文档中。处理$unwind模拟“加入”结果,为每个“子”(多)文档复制“主”(一)文档内容。

为了对抗人们正在进行的操作,MongoDB 2.6引入了$map运算符,该运算符处理文档本身中的数组元素。

因此,您无需执行多个(或任何)$unwind操作,而只需在$project阶段使用$map处理文档内的数组:

db.collection.aggregate([
  { "$project": {
    "orders": { "$map": {
      "input": "$orders",
      "as": "o",
      "in": {
        "id": "$$o.id",
        "name": "$$o.name",
        "card": { "$map": {
          "input": "$$o.card",
          "as": "c",
          "in": {
            "id": "$$c.id",
            "serial": "$$c.serial",
            "name": "$$c.name",
            "ticket": { "$map": {
              "input": "$$c.ticket",
              "as": "t",
              "in": {
                "id": "$$t.id",
                "name": "$$t.name",
                "price": "$$t.price.price_disp"
              }
            }}
          }
        }},
        "full_amount": "$$o.full_amount"
      }    
    }},
    "rate": 1,
    "date": 1
  }}
])

这里的操作相当简单,因为每个“数组”都被分配了它自己的变量名,对于这样的简单投影操作,所有真正剩下的就是选择你想要的字段。

在早期版本中,使用$unwind进行处理要困难得多:

db.collection.aggregate([
  { "$unwind": "$orders" },
  { "$unwind": "$orders.card" },
  { "$unwind": "$orders.card.ticket" },
  { "$group": {
    "_id": { 
      "_id": "$_id",
      "orders": {
        "id": "$orders.id",
        "name": "$orders.name",
        "card": {
          "id": "$orders.card.id",
          "serial": "$orders.card.serial",
          "name": "$orders.card.name"
        },
        "full_amount": "$orders.full_amount"
      },
      "rate": "$rate",
      "date": "$date"
    },
    "ticket": { 
      "$push": {
        "id": "$orders.card.ticket.id",
        "name": "$orders.card.ticket.name",
        "price": "$orders.card.ticket.price.price_disp"
      }
    }
  }},
  { "$group": {
    "_id": { 
      "_id": "$_id._id",
      "orders": {
        "id": "$_id.orders.id",
        "name": "$_id.orders.name",
        "full_amount": "$_id.orders.full_amount"
      },
      "rate": "$_id.rate",
      "date": "$_id.date"
    },
    "card": { 
      "$push": {
        "id": "$_id.orders.card.id",
        "serial": "$_id.orders.card.serial",
        "name": "$_id.orders.card.name",
        "ticket": "$ticket"
      }
    }
  }},
  { "$group": {
    "_id": "$_id._id",
    "orders": { 
      "$push": {
        "id": "$_id.orders.id",
        "name": "$_id.orders.name",
        "card": "$card",
        "full_amount": "$_id.orders.full_amount"
      }
    },
    "rate": { "$first": "$_id.rate" },
    "date": { "$first": "$_id.date" }
  }}
])

因此,仔细观察,您应该看到,因为您$unwind三次,所以$group “三次”也是必要的,同时仔细分组所有每个“级别”的不同值,并通过$push重新构建数组。

如前所述,根本没有建议

  • “不分组/汇总任何内容”,每个子文档“必须”包含“独特的“ itentifier,因为重组数组所需的”分组“操作。 (参见:注意

  • 此处$unwind操作非常昂贵。所有文档信息由“n”数组X“n”数组元素等重新产生。因此,聚合管道中的数据远远多于您的集合或查询选择本身实际包含的数据。

因此,总而言之,对于“重新格式化数据”的一般处理,您应该在代码中处理每个文档,而不是在聚合管道中“抛出”它。

如果您的文档数据需要“足够”的操作,使得与您认为比拉动整个文档并在客户端中操作更有效的返回结果大小产生“实质性差异”,那么“只有”那么您应该使用$project表单与$map操作一起使用。

边栏

你原来的“标签”在这里提到“PHP”。

包括聚合在内的所有MongoDB查询都没有关于它们的特定语言,只是“数据结构”,并且主要以“本机形式”表示这些语言(PHP,JavaScript,python等),并且“构建器方法“用于那些没有”本机“表达格式的自由结构(C,C#,Java)。

在所有情况下,JSON都有简单的解析器,这是一个常见的“linqua franca”,因为MongoB Shell本身是基于JavaScript的,并且本机地理解JSON结构(作为实际的JavaScript对象)。

因此,在使用此类示例时,请使用以下工具:

  • json_decode:更深入地了解您的原生数据结构是如何构建的。

  • json_encode:为了根据任何代表JSON的示例检查您的本机数据结构。

这里的所有内容都只是简单的“键/值”array()表示法,尽管是嵌套的。但是,了解这些工具并定期使用它们可能是一种好习惯。

注意:

您提供的数据样本非常类似于您“剪切并粘贴”数据以创建多个项目,因为各种“子项”都共享相同的“id”值。

您的“真实”数据不应该这样做!所以我希望它不会,但如果是这样,那就解决它。

为了使第二个例子可行(首先是完全正常的),需要将数据改为包含每个子元素的“唯一”“id”值。

正如我在这里使用的那样:

{
    "_id" : ObjectId("55951b2bf41edfc80b00002a"),
    "orders" : [ 
    {
        "id" : "55929142f41edfdc0f00002a",
        "name" : "XYZ",
        "card" : [ 
            {
                "id" : "250",
                "serial" : "B",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000031",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    },
                    {
                        "id" : "55927d41f41edfd00f000032",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    }
                ]
            },
            {
                "id" : "251",
                "serial" : "B",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000033",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    },
                    {
                        "id" : "55927d41f41edfd00f000034",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    }
                ]
            }
        ],
        "full_amount" : "40",
    },
    {
        "id" : "55929142f41edfdc0f00002b",
        "name" : "XYZ",
        "card" : [ 
            {
                "id" : "252",
                "serial" : "B",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000035",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    },
                    {
                        "id" : "55927d41f41edfd00f000036",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    }
                ]
            },
            {
                "id" : "253",
                "serial" : "B",
                "name" : "Eco",
                "ticket" : [ 
                    {
                        "id" : "55927d41f41edfd00f000037",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    },
                    {
                        "id" : "55927d41f41edfd00f000038",
                        "name" : "ZZZ",
                        "price" : "10 €"
                    }
                ]
            }
        ],
        "full_amount" : "40",
    }
    ],
    "rate" : "0.23",
    "date" : "2015-07-02 13:04:34",
}