我有一个文档集合,使用类似这样的模式(一些成员编辑):
{
"_id" : ObjectId("539f41a95d1887b57ab78bea"),
"answers" : {
"ratings" : {
"positivity" : [
2,
3,
5
],
"activity" : [
4,
4,
3
],
},
"media" : [
ObjectId("537ea185df872bb71e4df270"),
ObjectId("537ea185df872bb71e4df275"),
ObjectId("537ea185df872bb71e4df272")
]
}
在此架构中,第一个,第二个和第三个positivity
评级分别对应media
数组中的第一个,第二个和第三个条目。 activity
评级也是如此。我需要针对集合中所有文档中关联的positivity
对象计算activity
和media
评级的统计信息。现在,我正在为第一个具有以下MapReduce设置的条目这样做:
var mapFunction = function() {
var activity = {
sum: this.answers.ratings.activity[0],
min: this.answers.ratings.activity[0],
max: this.answers.ratings.activity[0],
count: 1,
diff: 0
};
var positivity = {
sum: this.answers.ratings.positivity[0],
min: this.answers.ratings.positivity[0],
max: this.answers.ratings.positivity[0],
count: 1,
diff: 0
};
emit(this.media[0].str, {'activity': activity, 'positivity': positivity});
}
var reduceFunction = function(key, values) {
var activityA = values[0].activity; // will reduce into here
for (var i = 1; i < values.length; i++) {
var activityB = values[i].activity; // will merge 'b' into 'a'
// temp helpers
var delta = activityA.sum/activityA.count - activityB.sum/activityB.count; // a.mean - b.mean
var weight = (activityA.count * activityB.count)/(activityA.count + activityB.count);
// do the reducing
activityA.diff += activityB.diff + delta*delta*weight;
activityA.sum += activityB.sum;
activityA.count += activityB.count;
activityA.min = Math.min(activityA.min, activityB.min);
activityA.max = Math.max(activityA.max, activityB.max);
}
var positivityA = values[0].positivity; // will reduce into here
for (var i = 1; i < values.length; i++) {
var positivityB = values[i].positivity; // will merge 'b' into 'a'
// temp helpers
var delta = positivityA.sum/positivityA.count - positivityB.sum/positivityB.count; // a.mean - b.mean
var weight = (positivityA.count * positivityB.count)/(positivityA.count + positivityB.count);
// do the reducing
positivityA.diff += positivityB.diff + delta*delta*weight;
positivityA.sum += positivityB.sum;
positivityA.count += positivityB.count;
positivityA.min = Math.min(positivityA.min, positivityB.min);
positivityA.max = Math.max(positivityA.max, positivityB.max);
}
return {'activity': activityA, 'positivity': positivityA};
}
var finalizeFunction = function(key, value) {
value.activity.mean = value.activity.sum / value.activity.count;
value.activity.population_variance = value.activity.diff / value.activity.count;
value.activity.population_std = Math.sqrt(value.activity.population_variance);
value.activity.sample_variance = value.activity.diff / (value.activity.count - 1);
value.activity.sample_std = Math.sqrt(value.activity.sample_variance);
value.positivity.mean = value.positivity.sum / value.positivity.count;
value.positivity.population_variance = value.positivity.diff / value.positivity.count;
value.positivity.population_std = Math.sqrt(value.positivity.population_variance);
value.positivity.sample_variance = value.positivity.diff / (value.positivity.count - 1);
value.positivity.sample_std = Math.sqrt(value.positivity.sample_variance);
return value;
}
var limitingQuery = {'answers.ratings.activity':{$exists:true},'answers.ratings.positivity':{$exists:true}}
db.trials.mapReduce(mapFunction, reduceFunction, {query: limitingQuery, finalize: finalizeFunction, out: {replace: 'base_ratings', db: 'tmp'}});
使用少量文档,这一切都符合我的预期。当我针对整个系列运行它时,会发生一些奇怪的事情。首先,当我运行db.currentOp()
时,我得到以下输出:
{
"inprog" : [
{
"opid" : 2337,
"active" : true,
"secs_running" : 2787,
"microsecs_running" : NumberLong("2787597940"),
"op" : "query",
"ns" : "eim.trials",
"query" : {
"$msg" : "query not recording (too large)"
},
"planSummary" : "COLLSCAN",
"client" : "109.201.154.152:59939",
"desc" : "conn17",
"threadId" : "0x7ef89b022700",
"connectionId" : 17,
"locks" : {
"^" : "r",
"^eim" : "R"
},
"waitingForLock" : false,
"msg" : "m/r: (1/3) emit phase M/R: (1/3) Emit Progress: 8300/1 830000%",
"progress" : {
"done" : 8300,
"total" : 1
},
"numYields" : 1133,
"lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong("5075753298"),
"w" : NumberLong(2274)
},
"timeAcquiringMicros" : {
"r" : NumberLong(243155328),
"w" : NumberLong(131)
}
}
},
{
"opid" : 2480,
"active" : true,
"secs_running" : 2111,
"microsecs_running" : NumberLong(2111502538),
"op" : "query",
"ns" : "eim.trials",
"query" : {
"$msg" : "query not recording (too large)"
},
"planSummary" : "COLLSCAN",
"client" : "109.201.154.192:61609",
"desc" : "conn23",
"threadId" : "0x7ef89ac1e700",
"connectionId" : 23,
"locks" : {
"^" : "r",
"^eim" : "R"
},
"waitingForLock" : false,
"msg" : "m/r: (1/3) emit phase M/R: (1/3) Emit Progress: 7952/1 795200%",
"progress" : {
"done" : 7952,
"total" : 1
},
"numYields" : 819,
"lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong("3399905661"),
"w" : NumberLong(73184)
},
"timeAcquiringMicros" : {
"r" : NumberLong(406543723),
"w" : NumberLong(145)
}
}
}
]
}
Emit Progress
大于100%的情况是什么?我知道,当这一切都在运行时,其他文档没有被添加到集合中。而inprog.progress.done
为8300
,而inprog.progress.total
为1
。咦?
更糟糕的是,此操作最终会因错误而失败:
Error: error doing query: failed src/mongo/shell/query.js:78
即使在报告错误之后, db.currentOp()
仍继续返回与上述类似的结果。日志根本没有显示任何错误(仅指示此命令仍在运行的行):
2014-06-19T13:24:15.378-0400 [conn23] M/R: (1/3) Emit Progress: 8400
任何想法可能会发生在这里?在Ubuntu 13.10上运行MongoDB 2.6.2。
答案 0 :(得分:0)
不是答案(显然我没有足够的评论来评论),但我有一个类似的错误,并在MongoDB上提出了一个问题:https://jira.mongodb.org/browse/SERVER-15334所以如果你在google搜索时找到这个网页,就像我一样做了,然后按照那个链接(希望!)更多的细节来了(MongoDB通常非常擅长回来对这类事情很快就回来了)
答案 1 :(得分:-1)
有时在emit中进行一些验证检查有帮助。 不会说永远有帮助。
但有时我遇到这个错误,问题在于我正在发出的价值的质量。
尝试运行小块并检查它是否正常运行。