mongoDB mapreduce运行3m文档需要很长时间

时间:2017-06-27 14:55:10

标签: mongodb mapreduce aggregate

我有一个包含300万份文件的集合。每个文档有40个字段。字段如下。

{
    "b_date" : "2016-04-05",
    "d_date" : "2016-06-25",
    "pos" : "MISC",
    "origin" : "DXB",
    "destination" : "HGA",
    "pax" : 1,
    "pax_1" : 2
 },   
{
    "b_date" : "2016-04-05",
    "d_date" : "2016-06-25",
    "pos" : "MISC",
    "origin" : "DXB",
    "destination" : "HGA",
    "pax" : 4,
    "pax_1" : 5
 },   
{
    "b_date" : "2016-04-05",
    "d_date" : "2016-06-26",
    "pos" : "MISC",
    "origin" : "DXB",
    "destination" : "HGA",
    "pax" : 3,
    "pax_1" : 3
 }

现在,我希望通过对paxpax_1b_dated_date,{{进行分组来获得posorigin的总和1}}字段。 累积匹配是destinationposorigin字段的分组,但累积匹配和pax_1应根据destinationb_date的升序增加。

  

预期结果是。

d_date

我的mapReduce代码

{
    "_id.dep_date" : "2016-04-05",
    "_id.sale_date" : "2016-06-25",
    "_id.pos" : "MISC",
    "_id.origin" : "DXB",
    "_id.destination" : "HGA",
    "value.pax" : 5,
    "value.cumulative_pax":5,
    "value.pax_1" : 7,
    "value.cumulative_pax_1":7,

 },   
{
    "_id.dep_date" : "2016-04-05",
    "_id.sale_date" : "2016-06-26",
    "_id.pos" : "MISC",
    "_id.origin" : "DXB",
    "_id.destination" : "HGA",
    "value.pax" : 3,
    "value.cumulative_pax":8,
    "value.pax_1" : 3,
    "value.cumulative_pax_1":10,
 }

此地图减少了返回的预期值,但花费了大量时间,如3小时。是因为'b_date'和'd_date'是字符串格式的日期?或者如何进行优化 聚合在3分钟内返回结果,但我无法通过使用聚合获得累积pax。

1 个答案:

答案 0 :(得分:1)

Map Reduce code,

db.collection.mapReduce(
function() {
    emit(
    {
    "pos" : this.pos,
    "origin" : this.origin,
    "destination" : this.destination,
    'dep_date': this.d_date,
    'sale_date': this.b_date,

},
    {
        'pax':this.pax,
        'pax_1':this.pax_1,
    }
    );
}
,
function(key,values) {
    paxt = 0;
    paxt_1 = 0;
    for (var i in values){
    paxt += values[i].pax;
    paxt_1 += values[i].pax_1;
    }
    return {'pax':paxt,
    'pax_1':paxt_1,
    };
}
,
{
    'scope':{
        'pos':'',
        'origin':'',
        'destination':'',
        'dep_date': '',
        'sale_date': '',
        'result':{}
    }
    ,
    'finalize':function(key,value) {
        if (pos != key.pos || 
            origin != key.origin || 
            destination != key.destination || 
            ){  
            result['pax'] = 0;
            result['pax_1'] = 0;
            result['cumulative_pax'] = 0;
            result['cumulative_pax_1'] = 0;
        }
            result['pax'] += value.pax;
            result['cumulative_pax'] = value.pax;
            result['pax_1'] += value.pax_1;
            result['cumulative_pax_1'] = value.pax_1;
            pos = key.pos;
            origin = key.origin;
            destination = key.destination;
            dep_date = key.dep_date;
            sale_date = key.sale_date;

        return result;
        }
    ,
    'out':'some_collection'
    }
)