是否有可能检索到一个“时间跨度”。从MongoDB查询中,使用ObjectId中的时间戳?

时间:2014-06-24 10:45:21

标签: mongodb mapreduce timestamp aggregation-framework

我们有一个基本的查询管理工具,我们用它来跟踪管理套件中的一些网站查询,我们在查询集合中使用每个文档的ObjectId来按查询添加日期对查询进行排序。

{
    "_id" :      ObjectId("53a007db144ff47be1000003"),
    "comments" : "This is a test enquiry. Please ignore. We'll delete it shortly.",
    "customer" : {
        "name" :      "Test Enquiry",
        "email" :     "test@test.com",
        "telephone" : "07890123456",
        "mobile" :    "07890123456",
        "quote" :     false,
        "valuation" : false
    },
    "site" : [],
    "test" : true,
    "updates" : [ 
        {
            "_id" :          ObjectId("53a007db144ff47be1000001"),
            "status" :       "New",
            "status_id" :    ObjectId("537de7c3a5e6e668ffc2335c"),
            "status_index" : 100,
            "substatus" :    "New Web Enquiry",
            "substatus_id" : ObjectId("5396bb9fa5e6e668ffc23388"),
            "notes" :        "New enquiry received from website.",
        }, 
        {
            "_id" :          ObjectId("53a80c977d299cfe91bacf81"),
            "status" :       "New",
            "status_id" :    ObjectId("537de7c3a5e6e668ffc2335c"),
            "status_index" : 100,
            "substatus" :    "Attempted Contact",
            "substatus_id" : ObjectId("53a80e06a5e6e668ffc2339e"),
            "notes" :        "In this test, we pretend that we've not managed to get hold of the customer on the first attempt.",
        }, 
        {
            "_id" :          ObjectId("53a80e539b966b8da5c40c36"),
            "status" :       "Approved",
            "status_id" :    ObjectId("52e77a49d85e95f00ebf6c72"),
            "status_index" : 200,
            "substatus" :    "Enquiry Confirmed",
            "substatus_id" : ObjectId("53901f1ba5e6e668ffc23372"),
            "notes" :        "In this test, we pretend that we've got hold of the customer after failing to contact them on the first attempt.",
        }
    ]
}

在每个查询中都有一个对象的更新数组,它也有一个ObjectId作为它们的主要标识字段。我们正在使用$unwind$group聚合来提取第一个和最新的更新以及更新计数,确保我们只在有多个更新的地方进行查询(作为一个在进行查询时自动插入):

db.enquiries.aggregate([
    {
        $match: {
            "test": true
        }
    },
    {
        $unwind: "$updates"
    },
    {
        $group: {
            "_id": "$_id",
            "latest_update_id": {
                $last: "$updates._id"
            },
            "first_update_id": {
                $first: "$updates._id"
            },
            "update_count": {
                $sum: 1
            }
        }
    },
    {
        $match: {
            "update_count": {
                $gt: 1
            }
        }
    }
])

这导致以下输出:

{
    "result" : [ 
        {
            "_id" : ObjectId("53a295ad122ea80200000005"),
            "latest_update_id" : ObjectId("53a80bdc7d299cfe91bacf7e"),
            "first_update_id" : ObjectId("53a295ad122ea80200000003"),
            "update_count" : 2
        }, 
        {
            "_id" : ObjectId("53a007db144ff47be1000003"),
            "latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
            "first_update_id" : ObjectId("53a007db144ff47be1000001"),
            "update_count" : 3
        }
    ],
    "ok" : 1
}

然后将其传递给我们的代码(在本例中为node.js),我们对其执行一些操作,然后在仪表板上显示一些信息。

理想情况下,我想在查询中添加另一个$group管道聚合,从first_update_id的时间戳中减去latest_update_id的时间戳给我们一个时间跨度,我们然后可以使用$avg

有人能告诉我这是否可行? (谢谢!)

1 个答案:

答案 0 :(得分:2)

正如Neil已经指出的那样,你无法从聚合框架中的ObjectId获取时间戳。

你说速度并不重要,所以使用MapReduce你可以得到你想要的东西:

var map  = function() {
    if (this.updates.length > 1) {

        var first = this.updates[0];
        var last = this.updates[this.updates.length - 1];

        var diff = last._id.getTimestamp() - first._id.getTimestamp();

        var val = {
            latest_update_id : last._id,
            first_update_id : first._id,
            update_count : this.updates.length,
            diff: diff 
        }

        emit(this._id, val);
    } 
};

var reduce = function() { };

db.runCommand(
    {
        mapReduce: "enquiries",
        map: map,
        reduce: reduce,
        out: "mrresults",
        query: { test : true}
    }
);

结果如下:

{
    "_id" : ObjectId("53a007db144ff47be1000003"),
    "value" : {
        "latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
        "first_update_id" : ObjectId("53a007db144ff47be1000001"),
        "update_count" : 3,
        "diff" : 525944000
    }
}

修改

如果你想获得所有文件的平均差异,你可以这样做:

var map  = function() {
    if (this.updates.length > 1) {

        var first = this.updates[0];
        var last = this.updates[this.updates.length - 1];

        var diff = last._id.getTimestamp() - first._id.getTimestamp();

        emit("1", {diff : diff});
    } 
};

var reduce = function(key, values) {
    var reducedVal = { count: 0, sum: 0 };

    for (var idx = 0; idx < values.length; idx++) {
        reducedVal.count += 1;
        reducedVal.sum += values[idx].diff;
    }

    return reducedVal;
};

var finalize = function (key, reducedVal) {
    reducedVal.avg = reducedVal.sum/reducedVal.count;

    return reducedVal;
};

db.runCommand(
    {
        mapReduce: "y",
        map: map,
        reduce: reduce,
        finalize : finalize,
        out: "mrtest",
        query: { test : true}
    }
);

示例输出:

> db.mrtest.find().pretty()
{
    "_id" : "1",
    "value" : {
        "count" : 2,
        "sum" : 1051888000,
        "avg" : 525944000
    }
}