请求:https://www.npmjs.com/package/request
在请求队列中的请求模块请求大量站点(10000
一次)时遇到问题。它适用于250或更多列表。但是,当我提供csv
接近300
或更多内容的任何内容时,它只会挂起而且q.drain
永远不会真正触发。
代码:
program.command('sanitizedata <file> <outfile>').description('Sanitize Data').action(( file, outfile ) => {
if(file !== '' && outfile != '') {
var request = require("request");
var parse = require('url-parse');
csv({noheader:false, trim:true})
.fromFile(file)
.on('end_parsed', function(SitesArray) {
var possibleUrls = [];
var q = async.queue(function (task, done) {
var parsed = parse(task.url);
if(parsed.protocol == '') {
task.url = 'http://' + task.url;
task.host = parsed.pathname;
}
var options = {
url: `${task.url}`,
headers: {
'User-Agent': 'request',
'Host': `${task.host}`
}
};
request(options , function(err, res, body) {
if (err) return done(err);
if (res.statusCode != 200) return done(res.statusCode);
done(res);
});
}, 5);
SitesArray.map(function( site, index ) {
q.push(site, function( result ) {
if( result.statusCode == 200 ) {
delete site['host'];
console.log('\x1b[42m\x1b[37m%s\x1b[0m \x1b[46m\x1b[37m%s\x1b[0m', `Assert Success:${site.url}`, `${index}`);
possibleUrls.insert( site.index, site );
} else {
console.log( '\n\r' + result )
return false;
}
});
});
q.drain = function() {
var csvOutput = toCSV( possibleUrls );
console.log('draining')
fs.outputFile(`./data/sanitizedata/${outfile}`, csvOutput, function(err) {
if(err) {
return console.log(err);
}
console.log(`The file ${outfile} was saved!`);
process.exit();
});
console.log('all items have been processed');
}
});
}}
);
在最后一个请求附近,它显示ETIMEDOUT (connection timed out)
错误。我有csv格式的数据..
index,url
...
...
9993,supercircusspectacular.com
9994,theleadershipnetwork.com
9995,wizardofozthemusical.com
9996,allnews365.com
9997,blog.vendhq.com
9998,businesspropertynetwork.co.uk
9999,dashboardjunkie.com
答案 0 :(得分:0)
我能够在return
前面使用done(res);
来处理错误案例。
request(options , function(err, res, body) {
if (err) return done(err);
if (res.statusCode != 200) return done(res.statusCode);
return done(res);
});