MongoDB聚合:获取键值对的计数

时间:2013-05-11 01:31:47

标签: mongodb

我有一个包含如此结构的文档的集合:

{
  responses: {
    key1: bar,
    key2: quux
    ...
  },...
}

有没有办法获取responses对象中每个唯一键值对的计数?例如,我希望看到如下结果(确切的结构无关紧要):

{
  key1: {
    foo: 7 // aka 7 objects are similar to { responses: [{key: foo}] }
    bar: 30
  },
  key2: {
    baz: 24,
    quux: 13
  }
}

2 个答案:

答案 0 :(得分:2)

有几种方法可以做到这一点。聚合框架还不能实现,因为您无法将键名映射到值。但map reduce使其相对简单:

map = function () { 
       for (k in this.responses) { 
          emit( { key: k, response: this.responses[k]}, 1 ); 
       } 
}

reduce = function (k, values) { 
       result = 0;  
       values.forEach( function(v) { result += v; } ); 
       return result; 
}

在此样本数据集上:

> db.responses.find({},{_id:0,responses:1}).pretty()
{ "responses" : { "key1" : "foo", "key2" : "bar" } }
{ "responses" : { "key1" : "foo", "key3" : "bar" } }
{ "responses" : { "key2" : "foo", "key3" : "bar" } }
{ "responses" : { "key3" : "baz" } }

运行MR可以获得:

> db.responses.mapReduce(map, reduce, {out:{inline:1}})
{
    "results" : [
        {
            "_id" : {
                "key" : "key1",
                "response" : "foo"
            },
            "value" : 2
        },
        {
            "_id" : {
                "key" : "key2",
                "response" : "bar"
            },
            "value" : 1
        },
        {
            "_id" : {
                "key" : "key2",
                "response" : "foo"
            },
            "value" : 1
        },
        {
            "_id" : {
                "key" : "key3",
                "response" : "bar"
            },
            "value" : 2
        },
        {
            "_id" : {
                "key" : "key3",
                "response" : "baz"
            },
            "value" : 1
        }
    ],
    "timeMillis" : 65,
    "counts" : {
        "input" : 4,
        "emit" : 7,
        "reduce" : 2,
        "output" : 5
    },
    "ok" : 1,
}

以下是通过mapreduce实现此目的的第二种方法 - 这样可以使输出更像你所说的那样:

m2 = function () {
    for (k in this.responses) {
      keyname = this.responses[k];
      val = {};
      val[keyname] = 1;
      emit ( k, val );
    }
}
r2 = function (k, values) {
      result = { };
      values.forEach ( function(v) {
          for (k in v) {
             if (result[k] > 0) {
                result[k] += v[k];
             } else {
                result[k] = v[k];
             }
          }
      } );
      return result;
}

结果是:

> db.responses.mapReduce(m2, r2, {out:{inline:1}})
{
    "results" : [
        {
            "_id" : "key1",
            "value" : {
                "foo" : 2
            }
        },
        {
            "_id" : "key2",
            "value" : {
                "bar" : 1,
                "foo" : 1
            }
        },
        {
            "_id" : "key3",
            "value" : {
                "bar" : 2,
                "baz" : 1
            }
        }
    ],
    "timeMillis" : 3,
    "counts" : {
        "input" : 4,
        "emit" : 7,
        "reduce" : 3,
        "output" : 3
    },
    "ok" : 1,
}

答案 1 :(得分:1)

这可以通过map reduce完成。您可以使用map函数找到想要计算的所有键,并将发出的结果减少到reduce函数中的计数。

这是一个很好的视频解释,你之前没有使用它的mongodb中的地图缩小...

http://www.youtube.com/watch?v=WovfjprPD_I