我正在执行执行增量mapreduce 。我的数据集是每天获取一个日志的网络日志。因此,我们正在创建一个集合的每个日志数据。在数据库中,我们有不同日期的不同集合。在第一天收集mapreduce并在某人集合中存储输出。 mapreduce的数据集输入是
{
"_id": "1.1.145.219_1341627848904",
"client_ip": "1.1.145.219",
"timestamp": "1341627848904",
"bytes_transfered": 0
}
{
"_id": "1.1.145.219_1341627848926",
"client_ip": "1.1.145.219",
"timestamp": "1341627848926",
"bytes_transfered": 3
}
{
"_id": "1.1.145.219_1341627849227",
"client_ip": "1.1.145.219",
"timestamp": "1341627849227",
"bytes_transfered": 5
}
{
"_id": "1.1.145.219_1341627849250",
"client_ip": "1.1.145.219",
"timestamp": "1341627849250",
"bytes_transfered": 8
}
{
"_id": "1.1.145.219_1341627849277",
"client_ip": "2.3.45.89",
"timestamp": "1341627849277",
"bytes_transfered": 11
}
{
"_id": "1.1.145.219_1341627849298",
"client_ip": "2.3.45.89",
"timestamp": "1341627849298",
"bytes_transfered": 14
}
{
"_id": "1.1.145.219_1341627849529",
"client_ip": "2.3.45.89",
"timestamp": "1341627849529",
"bytes_transfered": 17
}
基于" client_ip",我们正在添加" bytes_transfered"对于每个client_ip。我们在mapreduce
之后得到输出{"_id":"1.1.145.219", "value":16}
{"_id":"2.3.45.89", "value":42}
我们在第二天运行mapreduce,将输出存储到同一个集合中。现在我的问题是,如果有任何" client_ip"存在于某个集合中,我想更新并为" bytes_transfered"添加值。基于" client_ip"提交。如果没有" client_ip"存在它将在同一个集合上创建新文档。
var mongojs = require('mongojs');
var db = mongojs('mongodb://localhost:27017/upload', ['twotimes', 'data']);
var mapper = function() {
emit(this.client_ip, this.bytes_transfered)
};
var reduce = function(key, values) {
var val = 0;
for (var i = 0; i < values.length; i++) {
val += values[i];
};
return val;
};
db.twotimes.mapReduce(
mapper,
reduce,
{
query: {
"timestamp": {
$gt: "1341599400000",
$lt: "1341682200000"
}
},
out: 'data'
}
);
请建议我,我该如何解决这个问题。
此致
Naresh Balija。