MongoDB聚合填充缺失天数

时间:2014-10-16 14:29:47

标签: mongodb mongodb-query aggregation-framework

我有一个包含以下文件的产品系列:

{ "_id" : 1, "item" : "abc", created: ISODate("2014-10-01T08:12:00Z") }
{ "_id" : 2, "item" : "jkl", created: ISODate("2014-10-02T09:13:00Z") }
{ "_id" : 3, "item" : "hjk", created: ISODate("2014-10-02T09:18:00Z") }
{ "_id" : 4, "item" : "sdf", created: ISODate("2014-10-07T09:14:00Z") }
{ "_id" : 5, "item" : "xyz", created: ISODate("2014-10-15T09:15:00Z") }
{ "_id" : 6, "item" : "iop", created: ISODate("2014-10-16T09:15:00Z") }

我想绘制一个描述产品数量的图表,所以我使用mongodb聚合框架来计算产品组数量:

  var proj1 = {
      "$project": {
          "created": 1,
          "_id": 0,
          "h": {"$hour": "$created"},
          "m": {"$minute": "$created"},
          "s": {"$second": "$created"},
          "ml": {"$millisecond": "$created"}
      }
  };

  var proj2 = {
      "$project": {
          "created": {
              "$subtract": [
                  "$created", {
                      "$add": [
                          "$ml",
                          {"$multiply": ["$s", 1000]},
                          {"$multiply": ["$m", 60, 1000]},
                          {"$multiply": ["$h", 60, 60, 1000]}
                      ]
                  }]
          }
      }
  };

  db.product.aggregate([
          proj1,
          proj2,
          {$group: {
              _id: "$created",
              count: {$sum: 1}
          }},
          {$sort: {_id: 1}}
      ])

mongo shell的结果是:

{
    "result" : [ 
        {
            "_id" : ISODate("2014-10-01T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-02T00:00:00.000Z"),
            "count" : 2
        }, 
        {
            "_id" : ISODate("2014-10-07T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-15T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-16T00:00:00.000Z"),
            "count" : 1
        }
    ],
    "ok" : 1
}

当然,有些日子没有产品,使用上面结果集的图表如下所示:

enter image description here

但是所需的图表应如下所示:

desired output

所以问题是:如何将遗失天数(例如过去30天)添加到count = 0的结果集中?这意味着,所需的结果集应该是看起来像这样:

{
    "result" : [
        {
            "_id" : ISODate("2014-09-16T00:00:00.000Z"),
            "count" : 0
        }, 
        {
            "_id" : ISODate("2014-09-17T00:00:00.000Z"),
            "count" : 0
        }, 
        ...            
        {
            "_id" : ISODate("2014-10-01T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-02T00:00:00.000Z"),
            "count" : 2
        }, 
        {
            "_id" : ISODate("2014-10-03T00:00:00.000Z"),
            "count" : 0
        },
        ...
        {
            "_id" : ISODate("2014-10-07T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-09-08T00:00:00.000Z"),
            "count" : 0
        },
        ...
        {
            "_id" : ISODate("2014-10-15T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-16T00:00:00.000Z"),
            "count" : 1
        },
        // also, add some extra days 
        {
            "_id" : ISODate("2014-10-17T00:00:00.000Z"),
            "count" : 0
        },
        {
            "_id" : ISODate("2014-10-10T00:00:00.000Z"),
            "count" : 0
        }
    ],
    "ok" : 1
}

2 个答案:

答案 0 :(得分:7)

使用聚合来完全处理这个问题很痛苦 但它可以达到。
(需要MongoDB V2.6 +)

var proj1 = {
    "$project" : {
        "created" : 1,
        "_id" : 0,
        "h" : {
            "$hour" : "$created"
        },
        "m" : {
            "$minute" : "$created"
        },
        "s" : {
            "$second" : "$created"
        },
        "ml" : {
            "$millisecond" : "$created"
        }
    }
};

var proj2 = {
    "$project" : {
        "created" : {
            "$subtract" : [ "$created", {
                "$add" : [ "$ml", {
                    "$multiply" : [ "$s", 1000 ]
                }, {
                    "$multiply" : [ "$m", 60, 1000 ]
                }, {
                    "$multiply" : [ "$h", 60, 60, 1000 ]
                } ]
            } ]
        }
    }
};

var group1 = {
        $group : {
            _id : "$created",
            count : {
                $sum : 1
            }
        }
    };

var group2 = {
        $group : {
            _id : 0,
            origin : {
                $push : "$$ROOT"
            },
            maxDate : {
                $max : "$_id"
            }
        }
};

var step = 24 * 60 * 60 * 1000; // milliseconds of one day

var project3 = {
    $project : {
        origin : 1,
        extents : {
            $map : {
                "input" : [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29],
                "as" : "e",
                "in" : {
                    _id : {
                        $subtract : [ "$maxDate", {
                            $multiply : [ step, "$$e"]
                        }]
                    },
                    count : {
                        $add : [0]
                    }
                }
            }
        }
    }
};

var project4 = {
        $project : {
            _id : 0,
            values : {
                $setUnion : [ "$origin", "$extents"]
            }
        }
};

var unwind1 = {
        $unwind : "$values"
};

var group3 = {
        $group : {
            _id : "$values._id",
            count : {
                $max : "$values.count"
            }
        }
};

db.product.aggregate([ proj1, proj2, group1, group2, project3, project4,
        unwind1, group3, {
            $sort : {
                _id : 1
            }
        } ]);

我想在应用程序结束时填写缺少的部分,供您参考:

function sortResult(x, y) {
    var t1 = x._id.getTime();
    var t2 = y._id.getTime();
    if (t1 < t2) {
        return -1;
    } else if (t1 == t2) {
        return 0;
    } else {
        return 1;
    }
}

var result = db.product.aggregate();

var endDateMilliseconds = result[result.length - 1]._id.getTime();
var step = 24 * 60 * 60 * 1000; // milliseconds of one day

var map = {};
for (var i in result) {
    map[ result[i]._id.getTime() ] = result[i];
}

for (var ms = endDateMilliseconds, x = 1; x < 30; x++) {
    ms -= step;
    if ( ! ( ms in map ) ) {
        map[ms] = {_id : new Date(ms), count : 0};
    }
}

var finalResult = [];
for (var x in map) {
    finalResult.push(map[x]);
}
finalResult.sort(sortResult);
printjson(finalResult);

答案 1 :(得分:2)

好的,首先:不存在的值会被评估为null(大致翻译为&#34; nada&#34;,&#34;没有&#34;,&#34;没有#&#34; 34;),它不等于0,这是一个明确定义的值。

例如,MongoDB对0和42之间的差异没有语义上的理解。那么MongoDB应该如何确定当时mongo没有语义理解的一天中假定的值?

基本上,您有两种选择:在没有值记录的情况下每天保存0,或者在您想要创建图表的时间内在应用程序中迭代,并且每天发出0,不存在任何值作为替代品。 ID&#39;建议做前者,因为那样可以使用聚合框架。