Question

在MySQL中

select a,b,count(1) as cnt from list group by a, b having cnt > 2;

我必须使用mongodb中的条件执行group by函数。但我得到以下错误。请分享您的意见。

在MongoDB中

> res = db.list.group({key:{a:true,b:true},
...                      reduce: function(obj,prev) {prev.count++;},
...                      initial: {count:0}}).limit(10);

Sat Jan  7 16:36:30 uncaught exception: group command failed: {
        "errmsg" : "exception: group() can't handle more than 20000 unique keys",
        "code" : 10043,
        "ok" : 0

一旦执行，我们需要在下一步运行以下文件。

for (i in res) {if (res[i].count>2) printjson(res[i])};

此致库马兰

Answer 1

MongoDB group by在大多数情况下非常有限，例如

- the result set must be lesser than 10000 keys.
- it will not work in sharded environments

所以最好使用map reduce。所以查询就像这样

~~map = function（）{emit（{a：true，b：true}，{count：1}）; }~~

reduce = function(k, values) {
    var result = {count: 0};
    values.forEach(function(value) {
        result.count += value.count;
    });
    return result;
}

然后

db.list.mapReduce(map,reduce,{out: { inline : 1}})

它是一个未经测试的版本。让我知道它是否有效

修改

早期的地图功能有问题。这就是为什么你没有得到结果。应该是

map = function () { emit({a:this.a, b:this.b}, {count:1}); }

测试数据：

> db.multi_group.insert({a:1,b:2}) > db.multi_group.insert({a:2,b:2}) > db.multi_group.insert({a:3,b:2}) > db.multi_group.insert({a:1,b:2}) > db.multi_group.insert({a:3,b:2}) > db.multi_group.insert({a:7,b:2}) > db.multi_group.mapReduce(map,reduce,{out: { inline : 1}}) { "results" : [ { "_id" : { "a" : 1, "b" : 2 }, "value" : { "count" : 2 } }, { "_id" : { "a" : 2, "b" : 2 }, "value" : { "count" : 1 } }, { "_id" : { "a" : 3, "b" : 2 }, "value" : { "count" : 2 } }, { "_id" : { "a" : 7, "b" : 2 }, "value" : { "count" : 1 } } ], "timeMillis" : 1, "counts" : { "input" : 6, "emit" : 6, "reduce" : 2, "output" : 4 }, "ok" : 1, }

<强> EDIT2：

完整的解决方案，包括应用计数> = 2

map = function () { emit({a:this.a, b:this.b}, {count:1,_id:this._id}); } reduce = function(k, values) { var result = {count: 0,_id:[]}; values.forEach(function(value) { result.count += value.count; result._id.push(value._id); }); return result; } >db.multi_group.mapReduce(map,reduce,{out: { replace : "multi_result"}}) > db.multi_result.find({'value.count' : {$gte : 2}}) { "_id" : { "a" : 1, "b" : 2 }, "value" : { "_id" : [ ObjectId("4f0adf2884025491024f994c"), ObjectId("4f0adf3284025491024f994f") ], "count" : 2 } } { "_id" : { "a" : 3, "b" : 2 }, "value" : { "_id" : [ ObjectId("4f0adf3084025491024f994e"), ObjectId("4f0adf3584025491024f9950") ], "count" : 2 } }

Answer 2

您应该使用MapReduce代替。集团有其局限性。

将来您可以使用Aggregation Framework。但是现在，请使用map / reduce。

Answer 3

取决于您的群组数量，您可以通过 distinct 找到比群组或 MapReduce 更简单，更快速的解决方案：

var res = [];
for( var cur_a = db.list.distinct('a'); cur_a.hasNext(); ) {
  var a = cur_a.next();
  for( var cur_b = db.list.distinct('b'); cur_b.hasNext(); ) {
    var b = cur_b.next();
    var cnt = db.list.count({'a':a,'b':b})
    if (cnt > 2)
      res.push({ 'a': a, 'b' : b 'cnt': cnt}
  }
}

如果你在a和b

上有索引会更快

db.list.ensureIndex({'a':1,'b':1})

MongoDB group by Functionalities

3 个答案: