Question

使用此代码：

var map = function(){

    emit(1, { read: this.read, important: this.important, count: 1 })
}

var reduce = function(key, vals) {

    var i = 0, r = 0, c = 0;

    vals.forEach(function(d) {
       r += !d.read ? 0 : 1;
       i += d.important ? 0 : 1
       c += d.count;
    });

    return { read: r, important: i, count: c }
}

db.ipdr.mapReduce(map, reduce, { out: "__mr_test", verbose: true, query: { liid: "40001" } });
db.getCollection("__mr_test").find();

我的计数不完整：

{
    "_id" : 1,
    "value" : {
        "read" : 1,
        "important" : 7,
        "count" : 7607
    }
}

“计数”没问题，但“阅读”和“重要”应该高很多。我错过了什么吗？

Answer 1

而不是

r += !d.read ? 0 : 1;
i += d.important ? 0 : 1
c += d.count;

你不应该

r += !d.read ? 0 : d.read;
i += d.important ? 0 : 1
c += d.count;

Answer 2

这不是mapReduce的好用法。聚合框架使用本机代码，它比JavaScript执行速度快得多，并且可以完成相同的工作。假设“read”和“important”是逻辑值：

db.posts.aggregate([
    { "$sort": { "read": 1, "important": 1 } },
    { "$group": {
        "_id": null,
        "read": { "$sum": { "$cond": [ "$read", 1, 0 ] } },
        "important": { "$sum": { "$cond": [ "$important", 1, 0 ] } },
        "count": { "$sum": 1 }
    }}
])

因此，不仅速度更快，而且编码更简单。

鉴于样本文件：

{ "read" : true,  "important" : false }
{ "read" : true,  "important" : false }
{ "read" : false, "important" : false }
{ "read" : false, "important" : true  }
{ "read" : true,  "important" : true  }

结果将是：

{ "_id" : null, "read" : 3, "important" : 2, "count" : 5 }

Mongodb和MapReduce - 结果丢失

2 个答案: