Mongodb聚合按天计算

时间:2014-06-19 13:05:18

标签: mongodb mongodb-query aggregation-framework

我正在使用mongodb聚合来聚合数据集。我的情况有点复杂。我收藏如下:

{
  startTime: ISODate("2014-12-31T10:20:30Z"),
  customerId: 123,
  ping: "2",
  link: "3"
}

现在我想将数据聚合到另一个集合,如下所示:

{
_id: {
 day: ISODate("2014-12-31T00:00:00Z"),
 customerId: 123
 },
hours: [
  {
   hour: ISODate("2014-12-31T10:00:00Z"),
   pings: 2,
   links: 3
  },
  {
   hour: ISODate("2014-12-31T11:00:00Z"),
   pings: 5,
   links: 6
  }
 ]
}

正如您所看到的那样,数据首先是按天分组,然后是按小时分组。我有以下聚合查询按天分组,但是如何按小时将它们分组?有什么想法吗?

var pipeline = [
{
 $project : {  
       startTime : 1,
               customerId: 1,
       ping:1,
       link:1,
       date : "$startTime",  
       h : {  
            "$hour" : "$startTime"  
       },  
       m : {  
            "$minute" : "$startTime"  
       },  
       s : {  
            "$second" : "$startTime"  
       },  
       ml : {  
            "$millisecond" : "$startTime"  
       }  
  }
},
{
$project: {
    startTime : 1,
            customerId: 1,
    ping:1,
    link:1,
      date : {      
            "$subtract" : [      
                 "$date",      
                 {      
                      "$add" : [      
                           "$ml",      
                           {      
                                "$multiply" : [      
                                     "$s",      
                                     1000      
                                ]      
                           },      
                           {      
                                "$multiply" : [      
                                     "$m",      
                                     60,      
                                     1000      
                                ]      
                           },
                           {      
                                "$multiply" : [      
                                     "$h",      
                                     60,      
                                     60,      
                                     1000 
                                ]      
                           }      
                      ]      
                 }      
            ]      
       }
    }          
},
{
    $match: {
        "startTime": {
            $gte: new ISODate("2013-12-01T07:00:00Z"),
            $lte: new ISODate("2014-01-01T08:00:00Z"),
        }
    }
},
// Aggregate the data
{
    $group: {
        _id: {day : "$date", customerId: "$customerId"},
        pings : {$sum: "$ping"},
        links : {$sum: "$links"}
    }
}
];

1 个答案:

答案 0 :(得分:8)

你基本上想要的是双重分组,但是你没有使用date aggregation operators来获取整个日期对象,只是相关部分:

db.collection.aggregate([
    { "$group": {
        "_id": {
            "customerId": "$customerId",
            "day": { "$dayOfYear": "$startTime" },
            "hour": { "$hour": "$startTime" }
        },
        "pings": { "$sum": "$ping" },
        "links": { "$sum": "$link" }
    }},
    { "$group": {
       "_id": {
           "customerId": "$_id.customerId",
           "day": "$_id.day"
       },
       "hours": { 
           "$push": { 
               "hour": "$_id.hour",
               "pings": "$pings",
               "links": "$links"
           }
       }
    }}
])

$group通过每天将结果放入数组来为您提供所需的格式。示例中的单个文档,但您基本上得到如下结果:

{
    "_id" : {
            "customerId" : 123,
            "day" : 365
    },
    "hours" : [
            {
                    "hour" : 10,
                    "pings" : 2,
                    "links" : 3
            }
    ]
}

如果您发现日期运算符的结果难以处理或想要日期对象的简化“传递”结果,那么您可以转换为纪元时间戳:

db.collection.aggregate([
    { "$group": {
        "_id": {
            "customerId": "$customerId",
            "day": {
               "$subtract": [
                   { "$subtract": [ "$startTime", new Date("1970-01-01") ] },
                   {
                       "$mod": [
                           { "$subtract": [ "$startTime", new Date("1970-01-01") ] },
                           1000*60*60*24   
                       ]
                   }
               ]
            },
            "hour": {
               "$subtract": [
                   { "$subtract": [ "$startTime", new Date("1970-01-01") ] },
                   {
                       "$mod": [
                           { "$subtract": [ "$startTime", new Date("1970-01-01") ] },
                           1000*60*60   
                       ]
                   }
               ]
            }
        },
        "pings": { "$sum": "$ping" },
        "links": { "$sum": "$link" }
    }},
    { "$group": {
       "_id": {
           "customerId": "$_id.customerId",
           "day": "$_id.day"
       },
       "hours": { 
           "$push": { 
               "hour": "$_id.hour",
               "pings": "$pings",
               "links": "$links"
           }
       }
    }}
])

那里的诀窍是当你$subtract一个日期对象来自另一个时,你会得到“epoch”值。在这种情况下,我们使用“epoch”开始日期来获取整个时间戳值,并提供“日期数学”以将时间更正为所需的时间间隔。结果如下:

{
    "_id" : {
            "customerId" : 123,
            "day" : NumberLong("1419984000000")
    },
    "hours" : [
            {
                    "hour" : NumberLong("1420020000000"),
                    "pings" : 2,
                    "links" : 3
            }
    ]
}

根据您的需要,您可能比运营商提供的日期更适合您。

您还可以通过$let运算符为MongoDB 2.6添加一些简写,允许您为范围操作声明“变量”:

db.event.aggregate([
    { "$group": {
        "_id": {
            "$let": {
                "vars": { 
                   "date": { "$subtract": [ "$startTime", new Date("1970-01-01") ] },
                   "day": 1000*60*60*24,
                   "hour": 1000*60*60
                },
                "in": {
                    "customerId": "$customerId",
                    "day": {
                        "$subtract": [
                            "$$date",
                            { "$mod": [ "$$date", "$$day" ] }
                         ]
                    },
                    "hour": {
                        "$subtract": [
                            "$$date",
                            { "$mod": [ "$$date", "$$hour" ] }
                         ]
                    }
                }
            }
        },
        "pings": { "$sum": "$ping" },
        "links": { "$sum": "$link" }
    }},
    { "$group": {
       "_id": {
           "customerId": "$_id.customerId",
           "day": "$_id.day"
       },
       "hours": { 
           "$push": { 
               "hour": "$_id.hour",
               "pings": "$pings",
               "links": "$links"
           }
       }
    }}
])

此外,我几乎忘记提及“ping”和“link”的值实际上是字符串,除非这是一个错字。但如果没有,那么请确保先将它们转换为数字。