我收集了从用户那里收到的消息。我想通过按时间戳排序的userName的连续序列对其进行分组。 我有如下所述的收集消息:
{
"_id":ObjectId("AAAA")
"userName":"Krunal"
"message":"Krunal has types some text",
"timestamp":ISODate("2019-06-17T11:57:00.000")
}
{
"_id":ObjectId("AAAB")
"userName":"Krunal"
"message":"Krunal has types some text again",
"timestamp":ISODate("2019-06-17T11:59:00.000")
}
{
"_id":ObjectId("AAAC")
"userName":"Krunal"
"message":"Krunal has types some one more time",
"timestamp":ISODate("2019-06-17T12:05:00.000")
}
{
"_id":ObjectId("AAAD")
"userName":"Karan"
"message":"Karan has type some text",
"timestamp":ISODate("2019-06-17T12:07:00.000")
}
{
"_id":ObjectId("AAAE")
"userName":"Karan"
"message":"Karan has type some more text",
"timestamp":ISODate("2019-06-17T12:10:00.000")
}
{
"_id":ObjectId("AAAC")
"userName":"Krunal"
"message":"Krunal has types some one more time",
"timestamp":ISODate("2019-06-17T12:12:00.000")
}
我正在使用4个字节的对象ID以便于阅读,在实际情况下,它将是mongodb生成的实际对象ID 从上面的集合中,我要输出如下所述:
{
"userName":"Krunal",
"count":3,
"timestamp":ISODate("2019-06-17T12:05:00.000")
}
{
"userName":"Karan",
"count":2,
"timestamp":ISODate("2019-06-17T12:10:00.000")
}
{
"userName":"Krunal",
"count":1,
"timestamp":ISODate("2019-06-17T12:12:00.000")
}
我要计算来自具有userName的用户的连续消息 在mongodb中是否可以使用任何查询,或者我需要在简单的find查询之后编写单独的算法?
编辑: 我不想仅按用户名分组。我要按用户名与连续文档分组。例如,考虑上述集合。 Krunal已连续发送了3条消息,因此Krunal:3,然后Karan已连续发送了2条消息,因此Karan:2,现在Krunal又发送了一条消息,但是在karan之后,它将作为Krunal成为新对象:1,它将不会增加以前的克鲁纳尔计数
答案 0 :(得分:0)
使用它。
db.collection.aggregate([{"$group": {"_id":"$userName","count": {$sum:1},"timestamp": {"$first": "$$ROOT.timestamp"}}}])
答案 1 :(得分:0)
db.collection.aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id: '$userName',
count: {
$sum: 1
},
timestamp: {
$last: '$timestamp'
}
}
},
// Stage 2
{
$project: {
userName: '$_id',
count: 1,
timestamp: 1,
_id: 0
}
},
]
);
答案 2 :(得分:0)
这是我想出的解决方案。
这不是一个聚合查询,而是我在mongo脚本(可以用mongo执行的Javascript文件)中使用forEach循环遍历每个条目。
var prev_name = "";
var count = 0;
var obj_dict = {};
var entries = [];
var prev_timestamp;
db.wikidata.find().forEach(function(entry) {
var name = entry["userName"];
if(prev_name === ""){
count += 1;
prev_name = name;
prev_timestamp = entry["timestamp"];
} else if(prev_name === name){
count += 1;
prev_timestamp = entry["timestamp"];
} else {
obj_dict["userName"] = prev_name;
obj_dict["count"] = count;
obj_dict["timestamp"] = prev_timestamp;
entries.push(obj_dict);
prev_name = name;
count = 1;
prev_timestamp = entry["timestamp"];
obj_dict = {};
}
})
obj_dict["userName"] = prev_name;
obj_dict["count"] = count;
obj_dict["timestamp"] = prev_timestamp;
entries.push(obj_dict);
print(JSON.stringify(entries));
输出:
[{"userName":"Krunal","count":3,"timestamp":"2019-06-17T12:05:00.000Z"},{"userName":"Karan","count":2,"timestamp":"2019-06-17T12:10:00.000Z"},{"userName":"Krunal","count":1,"timestamp":"2019-06-17T12:12:00.000Z"}]
希望有帮助。