在嵌套文档上使用mongodb进行Mapreduce

时间:2013-05-01 13:45:55

标签: mongodb mapreduce

我有以下文档结构:

{
  "country_id" : 328,
  "country_name" : "Australien",
  "cities" : [{
      "city_id" : 19398,
      "city_name" : "Bondi Beach (Sydney)"
    }, {
      "city_id" : 31102,
      "city_name" : "Double Bay (Sydney)"
    }, {
      "city_id" : 31101,
      "city_name" : "Rushcutters Bay (Sydney)"
    }, {
      "city_id" : 817,
      "city_name" : "Sydney"
    }, {
      "city_id" : 31022,
      "city_name" : "Wolly Creek (Sydney)"
    }, {
      "city_id" : 18851,
      "city_name" : "Woollahra"
    }],
  "regions" : {
    "region_id" : 796,
    "region_name" : "Australien: New South Wales (Sydney)"
  }
}

对于分面导航我想计算属性country_id,cities.city_id,regions_region_id我想我可以用map / reduce来做。

这是否可以使用给定的结构?

也许有人可以指出我在正确的地图/减少方向。

2 个答案:

答案 0 :(得分:2)

可以在此处找到Mongo map-reduce示例:http://docs.mongodb.org/manual/tutorial/map-reduce-examples/

每个唯一country_id,city_id和region_id元组的文档数量很简单:

> function m() { 
    for(var i in this.cities) {     
         emit({country_id:this.country_id, 
               city_id:this.cities[i].city_id,
               region_id:this.regions.region_id}, 
              1); 
    } }



> function r(id,docs) {
      return Array.sum(docs);
}
> db.loc.mapReduce(m,r,{out:"map_reduce_out"})
{
    "result" : "map_reduce_out",
    "timeMillis" : 5,
    "counts" : {
        "input" : 1,
        "emit" : 6,
        "reduce" : 0,
        "output" : 6
    },
    "ok" : 1,
}
> db.map_reduce_out.find()
{ "_id" : { "country_id" : 328, "city_id" : 817, "region_id" : 796 }, "value" : 1 }
{ "_id" : { "country_id" : 328, "city_id" : 18851, "region_id" : 796 }, "value" : 1 }
{ "_id" : { "country_id" : 328, "city_id" : 19398, "region_id" : 796 }, "value" : 1 }
{ "_id" : { "country_id" : 328, "city_id" : 31022, "region_id" : 796 }, "value" : 1 }
{ "_id" : { "country_id" : 328, "city_id" : 31101, "region_id" : 796 }, "value" : 1 }
{ "_id" : { "country_id" : 328, "city_id" : 31102, "region_id" : 796 }, "value" : 1 }

答案 1 :(得分:0)

似乎区域应该是一个数组

 "regions" : [{
    "region_id" : 796,
    "region_name" : "Australien: New South Wales (Sydney)"
  }]
  

“我想计算属性country_id,...”

似乎你想要这个输出。

...
{_id:  328, cities: 6, regions: 1},
{_id:  329, cities: 10, regions: 4},
...

尝试尝试以下操作,注意它只会对城市数组求和。

db.Country.aggregate(
  { $unwind : "$regions" },  {'$group': {'_id': '$country_id' , 'cities' : { $sum : 1}   } }
)

以下将提供类似于已接受答案的输出。

db.Country.aggregate(
{'$group': {'_id': '$country_id' , 'cities' : { $push: "$cities.city_id" }, 'regions' : {    $push: "$regions.region_id" }   }  }
)