我是mongodb的新手,我想知道我是否可以得到一些建议。我有以下收藏
{ "_id" : "u1", "item" : [ "a", "b", "c" ] }
{ "_id" : "u2", "item" : [ "b", "d", "e" ] }
{ "_id" : "u3", "item" : [ "a", "c", "f" ] }
{ "_id" : "u4", "item" : [ "c" ] }
我想创建一个新的集合,为每对用户计算项目的并集和交集,例如在最后,为用户1和2,4计算结果
{ "_id" : "u12", "intersect_count":1,"union_count":6 }
{ "_id" : "u14", "intersect_count":1,"union_count":4}
由于效率低下,我不想为每对进行成对操作。有没有什么技巧可以更有效地做到这一点?
答案 0 :(得分:2)
我的解决方案是:
map_func = function() {
self = this;
ids.forEach(function(id) {
if (id === self._id) return;
emit([id, self._id].sort().join('_'), self.item);
});
};
reduce_func = function(key, vals) {
return {
intersect_count: intersect_func.apply(null, vals).length,
union_count: union_func.apply(null, vals).length
};
};
opts = {
out: "redused_items",
scope: {
ids: db.items.distinct('_id'),
union_func: union_func,
intersect_func: intersect_func
}
}
db.items.mapReduce( map_func, reduce_func, opts )
如果您的收藏中有N
个元素,那么map_func
会发出N*(N-1)
元素以供将来缩减。然后reduce_func
会将它们缩减为N*(N-1)/2
个新元素。
我使用scope
将全局变量(ids
)和辅助方法(union_func
,intersect_func
)传递到map_func
和reduce_func
。否则MapReduce会因错误而失败,因为它会在特殊环境中评估map_func
和reduce_func
。
调用MapReduce的结果:
> db.redused_items.find()
{ "_id" : "u1_u2", "value" : { "intersect_count" : 1, "union_count" : 6 } }
{ "_id" : "u1_u3", "value" : { "intersect_count" : 2, "union_count" : 6 } }
{ "_id" : "u1_u4", "value" : { "intersect_count" : 1, "union_count" : 4 } }
{ "_id" : "u2_u3", "value" : { "intersect_count" : 0, "union_count" : 6 } }
{ "_id" : "u2_u4", "value" : { "intersect_count" : 0, "union_count" : 4 } }
{ "_id" : "u3_u4", "value" : { "intersect_count" : 1, "union_count" : 4 } }
我使用以下助手进行测试:
union_func = function(a1, a2) {
return a1.concat(a2);
};
intersect_func = function(a1, a2) {
return a1.filter(function(x) {
return a2.indexOf(x) >= 0;
});
};
替代方法是使用mongo游标而不是全局ids
对象:
map_func = function() {
self = this;
db.items.find({},['_id']).forEach(function(elem) {
if (elem._id === self._id) return;
emit([elem._id, self._id].sort().join('_'), self.item);
});
};
opts = {
out: "redused_items",
scope: {
union_func: union_func,
intersect_func: intersect_func
}
}
db.items.mapReduce( map_func, reduce_func, opts )
结果将是相同的。