我如何分组多个字段?我需要在多个独立文档中获得不区分大小写true
的唯一计数。
我已经查看了map / reduce和aggregation,我不太清楚什么是最好的方法。
假设我的收藏中有以下数据:
/* 0 */
{
"_id" : ObjectId("****"),
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "true",
"C" : "",
}
}
/* 1 */
{
"_id" : ObjectId("****"),
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "true",
"C" : "",
"D" : "TRUE"
}
}
/* 2 */
{
"_id" : ObjectId("****"),
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "TRUE",
"C" : "",
"D" : "false"
}
}
/* 3 */
{
"_id" : ObjectId("****"),
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "false",
"A" : "true",
"B" : "false",
"D" : "true"
}
}
我想输出以下数据,格式化并不重要:
isMail : 3
A : 4
B : 3
C : 0
D : 2
Total : 4
答案 0 :(得分:1)
使用conditional operator $cond
映射" true"如果为1,则为0,您可能会达到预期的效果。这只是因为你的" boolean"值实际上是字符串,并且您在" true"上有案例变体。价值 - 这就是我在下面的代码中使用$toLower
的原因:
db.test.sample.aggregate([
{
$group: { _id:null,
isMail: { $sum: { $cond: [{$eq: [{$toLower:"$Data.isMail"}, "true"]}, 1, 0] }},
A: { $sum: { $cond: [{$eq: [{$toLower:"$Data.A"}, "true"]}, 1, 0] }},
B: { $sum: { $cond: [{$eq: [{$toLower:"$Data.B"}, "true"]}, 1, 0] }},
C: { $sum: { $cond: [{$eq: [{$toLower:"$Data.C"}, "true"]}, 1, 0] }},
D: { $sum: { $cond: [{$eq: [{$toLower:"$Data.D"}, "true"]}, 1, 0] }},
total: { $sum: 1 },
}
},
{
$project: {
_id: 0,
A: 1, B: 1, C:1, D:1, total:1, isMail:1,
}
}
])
制作:
{ "isMail" : 3, "A" : 4, "B" : 3, "C" : 0, "D" : 2, "total" : 4 }
答案 1 :(得分:0)
如果您可以更改架构设计以使数据键成为值,那么它将使您更容易对数据执行某些聚合操作。一个更好的shchema看起来像这样:
{
"_id" : ObjectId("5548de01180e84997293903f"),
"IsPartOfBatch" : false,
"Data" : [
{
"key" : "isMail",
"value" : true
},
{
"key" : "A",
"value" : true
},
{
"key" : "B",
"value" : true
},
{
"key" : "C",
"value" : false
},
{
"key" : "D",
"value" : false
}
]
}
让我们使用您在问题中提供的示例数据集:
db.test.insert([
{
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "true",
"C" : ""
}
},
{
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "true",
"C" : "",
"D" : "TRUE"
}
},
{
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "TRUE",
"C" : "",
"D" : "false"
}
},
{
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "false",
"A" : "true",
"B" : "false",
"D" : "true"
}
}
]);
要更改架构以使其遵循上述建议的结构,请使用以下代码段(对于非常大的数据集,性能可能会很慢):
db.test.find({ "Data.isMail": { $type : 2 } }).forEach(function (doc){
var data = [];
if (doc.Data) {
for(key in doc.Data) {
var isTrueSet = (doc.Data[key] === "true" || doc.Data[key] === "TRUE")
var obj = {};
obj["key"] = key;
obj["value"] = isTrueSet;
data.push(obj);
};
}
doc.Data = data;
db.test.save(doc);
});
一个简单的 db.test.findOne()
查询会给出结果:
{
"_id" : ObjectId("5548de01180e84997293903f"),
"IsPartOfBatch" : false,
"Data" : [
{
"key" : "isMail",
"value" : true
},
{
"key" : "A",
"value" : true
},
{
"key" : "B",
"value" : true
},
{
"key" : "C",
"value" : false
},
{
"key" : "D",
"value" : false
}
]
}
现在,您可以使用聚合框架来获取具有真值的键的计数:
db.test.aggregate([
{
"$unwind": "$Data"
},
{
"$project": {
"_id": 0,
"key": "$Data.key",
"isTrue": {
"$cond": [{ "$eq": [ "$Data.value", true ] }, 1, 0]
}
}
},
{
"$group": {
"_id": "$key",
"count": {
"$sum": "$isTrue"
}
}
}
])
<强>输出强>
/* 0 */
{
"result" : [
{
"_id" : "D",
"count" : 2
},
{
"_id" : "C",
"count" : 0
},
{
"_id" : "B",
"count" : 3
},
{
"_id" : "A",
"count" : 4
},
{
"_id" : "isMail",
"count" : 3
}
],
"ok" : 1
}
然后,您可以使用本机JavaScript函数进一步修改结果,因为MongoDB的聚合框架无法将字段值投影为键,因此您必须依赖JS来执行此操作:
var pipeline = [
{
"$unwind": "$Data"
},
{
"$project": {
"_id": 0,
"key": "$Data.key",
"isTrue": {
"$cond": [{ "$eq": [ "$Data.value", true ] }, 1, 0]
}
}
},
{
"$group": {
"_id": "$key",
"count": {
"$sum": "$isTrue"
}
}
}],
agg = db.test.aggregate(pipeline),
obj = {},
result = [];
agg.forEach(function (doc){
obj[doc._id] = doc.count;
result.push(obj);
});