我在Mongo中有一个集合users
,我执行这个map reduce,我认为它相当于COUNT(*)GROUP BY origin
:
> m = function() { for (i in this.membership) {
... emit( this.membership[i].platform_profile.origin, 1 );
... } }
function () {
for (i in this.membership) {
emit(this.membership[i].platform_profile.origin, 1);
}
}
> r = function( id, values ) { var result = 0;
... for ( var i = 0; i < values.length; i ++ ) { result += values[i]; }
... return result; }
function (id, values) {
var result = 0;
for (var i = 0; i < values.length; i++) {
result += values[i];
}
return result;
}
> db.users.mapReduce(m, r, {out : { inline: 1}});
{
"results" : [
{
"_id" : 0,
"value" : 15
},
{
"_id" : 1,
"value" : 449
},
...
}
但是,如果我尝试计算将此字段设置为特定值(例如1
)的文档数量,我得到的结果会更少:
db.users.count({“membership.platform_profile.origin”:1});
424
我错过了什么?
答案 0 :(得分:2)
你的计数查询是否有机会使用稀疏索引?我的唯一猜测是,如果其他一些查询条件导致索引中缺少的文档将从计数中忽略。
我使用一些灯具数据重新创建了您的架构,map / reduce和简单计数查询之间的结果是一致的:
db.users.drop();
var map = function() {
for (i in this.membership) {
emit(this.membership[i].platform_profile.origin, 1);
}
};
var reduce = function(id, values ) {
var result = 0;
for (var i = 0; i < values.length; i++) {
result += values[i];
}
return result;
}
var origins = {1: "a", 2: "b", 3: "c", 4: "d"};
for (var i = 0; i < 1000; ++i) {
var membership = [];
for (var o in origins) {
if (0 == i % o) {
membership.push({ platform_profile: { origin: origins[o] }});
}
}
db.users.save({ membership: membership });
}
db.users.mapReduce(map, reduce, {out: {inline: 1}}).results.forEach(function(result){
print(result["_id"] + ": " + result["value"]);
});
for (var o in origins) {
print(origins[o] + ": " + db.users.count({"membership.platform_profile.origin": origins[o]}));
}
这是输出:
$ mongo --quiet mr_count.js
a: 1000
b: 500
c: 334
d: 250
a: 1000
b: 500
c: 334
d: 250
答案 1 :(得分:1)
您可以使用以下map / reduce代替COUNT(*) GROUP BY origin
地图/缩小功能:
map = function() {
if(!this.membership) return;
for (i in this.membership) {
if(!this.membership[i].platform_profile || !this.membership[i].platform_profile.origin) return;
emit(this.membership[i].platform_profile.origin, 1);
}
}
reduce = function(key, values) {
var count = 0;
for (v in values) {
count += values[v];
}
return count;
}
result = db.runCommand({
"mapreduce" : "users",
"map" : map,
"reduce" : reduce,
"out" : "users_count"
});
答案 2 :(得分:1)
我有同样的问题。我在reduce函数中替换了x.length by Array.sum(x)
(假设你在map函数中发出1)并且它有效。我同意x.length
也应该有效,但我无法解释为什么不这样做。