在MongoDb中对多个字段进行分组

时间:2015-05-05 14:38:02

标签: mongodb

我如何分组多个字段?我需要在多个独立文档中获得不区分大小写true的唯一计数。 我已经查看了map / reduce和aggregation,我不太清楚什么是最好的方法。

假设我的收藏中有以下数据:

/* 0 */
{
  "_id" : ObjectId("****"),
  "IsPartOfBatch" : false,
  "Data" : {
    "isMail" : "true",
    "A" : "true",
    "B" : "true",   
    "C" : "",
  }
}

/* 1 */
{
  "_id" : ObjectId("****"),
  "IsPartOfBatch" : false,
  "Data" : {
    "isMail" : "true",
    "A" : "true",
    "B" : "true",   
    "C" : "",
    "D" : "TRUE"
  }
}

/* 2 */
{
  "_id" : ObjectId("****"),
  "IsPartOfBatch" : false,
  "Data" : {
    "isMail" : "true",
    "A" : "true",
    "B" : "TRUE",   
    "C" : "",
    "D" : "false"
  }
}

/* 3 */
{
  "_id" : ObjectId("****"),
  "IsPartOfBatch" : false,
  "Data" : {
    "isMail" : "false",
    "A" : "true",
    "B" : "false",   
    "D" : "true"
  }
}

我想输出以下数据,格式化并不重要:

isMail : 3
A : 4
B : 3
C : 0
D : 2
Total : 4

2 个答案:

答案 0 :(得分:1)

使用conditional operator $cond映射" true"如果为1,则为0,您可能会达到预期的效果。这只是因为你的" boolean"值实际上是字符串,并且您在" true"上有案例变体。价值 - 这就是我在下面的代码中使用$toLower的原因:

db.test.sample.aggregate([
 {
  $group: { _id:null,
            isMail: { $sum: { $cond: [{$eq: [{$toLower:"$Data.isMail"}, "true"]}, 1, 0] }},
            A: { $sum: { $cond: [{$eq: [{$toLower:"$Data.A"}, "true"]}, 1, 0] }},
            B: { $sum: { $cond: [{$eq: [{$toLower:"$Data.B"}, "true"]}, 1, 0] }},
            C: { $sum: { $cond: [{$eq: [{$toLower:"$Data.C"}, "true"]}, 1, 0] }},
            D: { $sum: { $cond: [{$eq: [{$toLower:"$Data.D"}, "true"]}, 1, 0] }},
            total: { $sum: 1 },
          }
 },
 {
  $project: {
            _id: 0,
            A: 1, B: 1, C:1, D:1, total:1, isMail:1,
            }
 }
])

制作:

{ "isMail" : 3, "A" : 4, "B" : 3, "C" : 0, "D" : 2, "total" : 4 }

答案 1 :(得分:0)

如果您可以更改架构设计以使数据键成为值,那么它将使您更容易对数据执行某些聚合操作。一个更好的shchema看起来像这样:

{
    "_id" : ObjectId("5548de01180e84997293903f"),
    "IsPartOfBatch" : false,
    "Data" : [ 
        {
            "key" : "isMail",
            "value" : true
        }, 
        {
            "key" : "A",
            "value" : true
        }, 
        {
            "key" : "B",
            "value" : true
        }, 
        {
            "key" : "C",
            "value" : false
        }, 
        {
            "key" : "D",
            "value" : false
        }
    ]
}

让我们使用您在问题中提供的示例数据集:

db.test.insert([
{
    "IsPartOfBatch" : false,
    "Data" : {
        "isMail" : "true",
        "A" : "true",
        "B" : "true",
        "C" : ""
    }
},
{
    "IsPartOfBatch" : false,
    "Data" : {
        "isMail" : "true",
        "A" : "true",
        "B" : "true",
        "C" : "",
        "D" : "TRUE"
    }
},
{
    "IsPartOfBatch" : false,
    "Data" : {
        "isMail" : "true",
        "A" : "true",
        "B" : "TRUE",
        "C" : "",
        "D" : "false"
    }
},
{
    "IsPartOfBatch" : false,
    "Data" : {
        "isMail" : "false",
        "A" : "true",
        "B" : "false",
        "D" : "true"
    }
}
]);

要更改架构以使其遵循上述建议的结构,请使用以下代码段(对于非常大的数据集,性能可能会很慢):

db.test.find({ "Data.isMail": { $type : 2 } }).forEach(function (doc){   
    var data = [];
    if (doc.Data) {
        for(key in doc.Data) {
            var isTrueSet = (doc.Data[key] === "true" || doc.Data[key] === "TRUE")
            var obj = {};
            obj["key"] = key;
            obj["value"] = isTrueSet;
            data.push(obj);
        };
    }
    doc.Data = data;
    db.test.save(doc);  
});

一个简单的 db.test.findOne() 查询会给出结果:

{
    "_id" : ObjectId("5548de01180e84997293903f"),
    "IsPartOfBatch" : false,
    "Data" : [ 
        {
            "key" : "isMail",
            "value" : true
        }, 
        {
            "key" : "A",
            "value" : true
        }, 
        {
            "key" : "B",
            "value" : true
        }, 
        {
            "key" : "C",
            "value" : false
        }, 
        {
            "key" : "D",
            "value" : false
        }
    ]
}

现在,您可以使用聚合框架来获取具有真值的键的计数:

db.test.aggregate([
    {
        "$unwind": "$Data"
    }, 
    {
        "$project": {
            "_id": 0,
            "key": "$Data.key",
            "isTrue": {
                "$cond": [{ "$eq": [ "$Data.value", true ] }, 1, 0]
            }
        }
    },   
    {
        "$group": {
            "_id": "$key",
            "count": { 
                "$sum": "$isTrue"
             }            
        }
    }
])

<强>输出

/* 0 */
{
    "result" : [ 
        {
            "_id" : "D",
            "count" : 2
        }, 
        {
            "_id" : "C",
            "count" : 0
        }, 
        {
            "_id" : "B",
            "count" : 3
        }, 
        {
            "_id" : "A",
            "count" : 4
        }, 
        {
            "_id" : "isMail",
            "count" : 3
        }
    ],
    "ok" : 1
}

然后,您可以使用本机JavaScript函数进一步修改结果,因为MongoDB的聚合框架无法将字段值投影为键,因此您必须依赖JS来执行此操作:

var pipeline = [
    {
        "$unwind": "$Data"
    }, 
    {
        "$project": {
            "_id": 0,
            "key": "$Data.key",
            "isTrue": {
                "$cond": [{ "$eq": [ "$Data.value", true ] }, 1, 0]
            }
        }
    },   
    {
        "$group": {
            "_id": "$key",
            "count": { 
                "$sum": "$isTrue"
             }            
        }
    }], 
    agg = db.test.aggregate(pipeline),
    obj = {},
    result = [];

agg.forEach(function (doc){
    obj[doc._id] = doc.count;
    result.push(obj);
});