对数组中的不同对象值进行分组?

时间:2016-04-19 14:16:06

标签: mongodb mongodb-query aggregation-framework

需要MongoDB查询/聚合:如果我有一个数组,我如何根据数组中的不同值对文档进行分组?例如:

如果我有这些物品:

> db.respondents.insert({person: 1, responses: [{question: 'How old are you?', response: '18-40 yrs'}, {question: 'What is the brand of your car?', response: 'Fiat'} ] } )
> db.respondents.insert({person: 2, responses: [{question: 'How old are you?', response: '18-40 yrs'}, {question: 'What is the brand of your car?', response: 'Volvo'} ] } )
> db.respondents.insert({person: 3, responses: [{question: 'How old are you?', response: '41-65 yrs'}, {question: 'What is the brand of your car?', response: 'Volvo'} ] } )
> db.respondents.insert({person: 4, responses: [{question: 'How old are you?', response: '41-65 yrs'}, {question: 'What is the brand of your car?', response: 'Volvo'} ] } )

并且想写一个查询,告诉我每个年龄组(也就是故障问题)有哪些汽车品牌受访者拥有(也就是基本问题)?

所以答案应该告诉我:

1人年龄组'18 -40'回答'菲亚特'质疑'你的车的品牌是什么?'

1人年龄组'18 -40'回答'沃尔沃'质疑'你的车品牌是什么?'

年龄组'41 -65'的2个人回答'沃尔沃'质疑'你的车的品牌是什么?'

和IRL:

  • 有超过100,000名受访者
  • 每位受访者约有30个“回复”
  • 使用MongoDb 3.0.9

我尝试了很多方法,但不会因为我的失败而烦恼......

2 个答案:

答案 0 :(得分:3)

由于$arrayElemAt$filter等运营商只需一个$group阶段即可实现这一过程,因此您不会拥有MongoDB 3.2,这是一个很小的问题:

db.respondents.aggregate([
  { "$match": { 
    "responses.question": { 
      "$all": [
        "How old are you?",
        "What is the brand of your car?"
      ]
    } 
  }},
  { "$group": {
    "_id": {
      "age": {
        "$arrayElemAt": [
          { "$map": {
            "input": { "$filter": {
              "input": "$responses",
              "as": "res",
              "cond": {
                "$eq": [ "$$res.question", "How old are you?" ]
              }
            }},
            "as": "res",
            "in": "$$res.response"
          }},
          0
        ]
      },
      "car": {
        "$arrayElemAt": [
          { "$map": {
            "input": { "$filter": {
              "input": "$responses",
              "as": "res",
              "cond": {
                "$eq": [ "$$res.question", "What is the brand of your car?" ]
              }
            }},
            "as": "res",
            "in": "$$res.response"
          }},
          0
        ]
      }
    },
    "count": { "$sum": 1 }
  }}
])

在早期版本中,您需要$unwind内容,然后通过$cond有条件地选择所需的响应值:

db.respondents.aggregate([
  { "$match": { 
    "responses.question": { 
      "$all": [
        "How old are you?",
        "What is the brand of your car?"
      ]
    } 
  }},
  { "$unwind": "$responses" },
  { "$match": { 
    "responses.question": { 
      "$in": [
        "How old are you?",
        "What is the brand of your car?"
      ]
    } 
  }},
  { "$group": {
    "_id": "$_id",
    "age": {
      "$max": {
        "$cond": [
          { "$eq": [ "$responses.question", "How old are you?" ] },
          "$responses.response",
          null
        ]
      }
    },
    "car": {
      "$max": {
        "$cond": [
          { "$eq": [ "$responses.question", "What is the brand of your car?" ] },
          "$responses.response",
          null
        ]
      }
    }
  }},
  { "$group": {
    "_id": {
      "age": "$age",
      "car": "$car"
    },
    "count": { "$sum": 1 }
  }}
])

但当然这很有可能,而且常见的结果是:

{ "_id" : { "age" : "41-65 yrs", "car" : "Volvo" }, "count" : 2 }
{ "_id" : { "age" : "18-40 yrs", "car" : "Volvo" }, "count" : 1 }
{ "_id" : { "age" : "18-40 yrs", "car" : "Fiat" }, "count" : 1 }

答案 1 :(得分:1)

我认为没有直截了当的方法。但! 你可以这样做:

db.respondents.aggregate([
  {$unwind:'$responses'},
  {$match:{'responses.question':'How old are you?'}}
]).foreach(function(resp){
  db.responses.update({_id:resp._id},{$set:{ageGroup:resp.responses.response}});
})

它可以工作一段时间但是你会有方便的ageGroup字段并用它进行分组。