如何在MongoDB中查找具有计数的连续文档?

时间:2019-06-17 06:40:58

标签: mongodb mongoose mongodb-query aggregation-framework

我收集了从用户那里收到的消息。我想通过按时间戳排序的userName的连续序列对其进行分组。 我有如下所述的收集消息:

{
    "_id":ObjectId("AAAA")
    "userName":"Krunal"
    "message":"Krunal has types some text",
    "timestamp":ISODate("2019-06-17T11:57:00.000")
}
{
    "_id":ObjectId("AAAB")
    "userName":"Krunal"
    "message":"Krunal has types some text again",
    "timestamp":ISODate("2019-06-17T11:59:00.000")
}
{
    "_id":ObjectId("AAAC")
    "userName":"Krunal"
    "message":"Krunal has types some one more time",
    "timestamp":ISODate("2019-06-17T12:05:00.000")
}
{
    "_id":ObjectId("AAAD")
    "userName":"Karan"
    "message":"Karan has type some text",
    "timestamp":ISODate("2019-06-17T12:07:00.000")
}
{
    "_id":ObjectId("AAAE")
    "userName":"Karan"
    "message":"Karan has type some more text",
    "timestamp":ISODate("2019-06-17T12:10:00.000")
}
{
    "_id":ObjectId("AAAC")
    "userName":"Krunal"
    "message":"Krunal has types some one more time",
    "timestamp":ISODate("2019-06-17T12:12:00.000")
}

我正在使用4个字节的对象ID以便于阅读,在实际情况下,它将是mongodb生成的实际对象ID 从上面的集合中,我要输出如下所述:

{
    "userName":"Krunal",
    "count":3,
    "timestamp":ISODate("2019-06-17T12:05:00.000")
}
{
    "userName":"Karan",
    "count":2,
    "timestamp":ISODate("2019-06-17T12:10:00.000")
}
{
    "userName":"Krunal",
    "count":1,
    "timestamp":ISODate("2019-06-17T12:12:00.000")
}

我要计算来自具有userName的用户的连续消息 在mongodb中是否可以使用任何查询,或者我需要在简单的find查询之后编写单独的算法?

编辑: 我不想仅按用户名分组。我要按用户名与连续文档分组。例如,考虑上述集合。 Krunal已连续发送了3条消息,因此Krunal:3,然后Karan已连续发送了2条消息,因此Karan:2,现在Krunal又发送了一条消息,但是在karan之后,它将作为Krunal成为新对象:1,它将不会增加以前的克鲁纳尔计数

3 个答案:

答案 0 :(得分:0)

使用它。

db.collection.aggregate([{"$group": {"_id":"$userName","count": {$sum:1},"timestamp": {"$first": "$$ROOT.timestamp"}}}])

答案 1 :(得分:0)

db.collection.aggregate(

    // Pipeline
    [
        // Stage 1
        {
            $group: {
                _id: '$userName',
                count: {
                    $sum: 1
                },
                timestamp: {
                    $last: '$timestamp'
                }

            }
        },

        // Stage 2
        {
            $project: {
                userName: '$_id',
                count: 1,
                timestamp: 1,
                _id: 0
            }
        },

    ]



);

答案 2 :(得分:0)

这是我想出的解决方案。

这不是一个聚合查询,而是我在mongo脚本(可以用mongo执行的Javascript文件)中使用forEach循环遍历每个条目。

var prev_name = "";
var count = 0;
var obj_dict = {};
var entries = [];
var prev_timestamp;

db.wikidata.find().forEach(function(entry) {
    var name = entry["userName"];
    if(prev_name === ""){
        count += 1;
        prev_name = name;
        prev_timestamp = entry["timestamp"];
    } else if(prev_name === name){
        count += 1;
        prev_timestamp = entry["timestamp"];
    } else {
        obj_dict["userName"] = prev_name;
        obj_dict["count"] = count;
        obj_dict["timestamp"] = prev_timestamp;
        entries.push(obj_dict);
        prev_name = name;
        count = 1;
        prev_timestamp = entry["timestamp"];
        obj_dict = {};
    }
})

obj_dict["userName"] = prev_name;
obj_dict["count"] = count;
obj_dict["timestamp"] = prev_timestamp;
entries.push(obj_dict);

print(JSON.stringify(entries));

输出:

[{"userName":"Krunal","count":3,"timestamp":"2019-06-17T12:05:00.000Z"},{"userName":"Karan","count":2,"timestamp":"2019-06-17T12:10:00.000Z"},{"userName":"Krunal","count":1,"timestamp":"2019-06-17T12:12:00.000Z"}]

希望有帮助。