我有一个node.js应用程序,它使用kue作业使用mongoose游标遍历大型集合。
kue job configuration:
function childJob(cid, name, dataset, type, link, line, market, done) {
var deferred = Q.defer();
mongoose.model(dataset).count({processed:{$ne:1}}, function(err, total){
console.log('total', total);
var job = jobs.create('child', {
type: 'CHILD',
cid: cid,
title: name,
dataset: dataset,
kind: type,
link: link,
line: line,
market: market,
current: 0,
total: total
});
job
.on('complete', function() {
deferred.resolve({
done: true,
job: job.data,
success: true
});
})
.on('failed', function() {
deferred.resolve({
done: true,
job: job.data,
success: false
});
})
job.save();
return deferred.promise;
});
}
每当儿童工作开始时:
jobs.process('child', 10, function(job, done) {
var count = 0;
mongoose.model(job.data.dataset).find({processed:{$ne:1}}).lean().batchSize(1000).cursor().eachAsync(function(record){
functions.workRecord(record, job.data.link, job.data.line, job.data.market, myStats[job.data.line+job.data.market], function(stats){
count++;
job.data.current = count;
job.update();
job.progress(count, job.data.total, record);
if( count == job.data.total ){
myStats[job.data.line + job.data.market] = stats;
done();
}
});
});
})
随着时间的推移,cursor().eachAsync()
显着减少的记录数量。从几秒钟处理100秒到大约1或2个记录,然后完全停止。
这是配置问题吗?如何将查询设置为随着时间的推移在游标中具有一致的记录流?
EDIT1:最终我收到以下错误:
(node:61193)UnhandledPromiseRejectionWarning:MongoError:游标不是 found,cursor id:90600322391 在Function.MongoError.create(/home/sigma/SigmaCWCDataAnalysis/node_modules/mongoose/node_modules/mongodb-core/lib/error.js:31:11)
答案 0 :(得分:0)
看起来发生的事情是光标超时了。
解决方案是将find()查询设置为{timeout:false},如下所示:
mongoose.model(job.data.dataset).find({},{timeout: false}).lean().cursor()