我正在编写一种带有Node.js和request.js,async npm模块的web-scrape-client。
我希望每个url的async.times()函数中的request()调用都会向服务器发出并发请求。
但是实际请求一个接一个地触发,即使在async.times()中调用urls request()函数的次数也是如此。
代码如下
request.defaults({
pool:{
maxSockets:50
} });
...
...
async.times(scrapeTarget.totalPage, function(index, next) {
scrapeTarget.downloadedListPages++;
var reqPageNum = index+1;
var listReqOpt = requestUtil.makeOption('url:listPage', requestOption);
listReqOpt.url += '?page=' + reqPageNum;
listReqOpt.jar = cookieJar;
console.log('['+reqPageNum+'] : request page');
console.time(index);
request(listReqOpt, function(err, response, body) {
// response job here !
console.log('['+reqPageNum+'] : got response');
console.timeEnd(index);
next(null,{'done':reqPageNum});
}).pipe(fs.createWriteStream(archiveDir + '/listPage' + reqPageNum + '.html'));
},
...
...);
,结果
[1] : request page
[2] : request page
[3] : request page
[4] : request page
[5] : request page
[6] : request page
[7] : request page
...
[212] : request page
[213] : request page
[214] : request page
[5] : got response
4: 3607ms
[3] : got response
2: 5844ms
[4] : got response
3: 8171ms
[1] : got response
0: 10221ms
[2] : got response
1: 12689ms
[6] : got response
5: 14313ms
[7] : got response
6: 16630ms
[8] : got response
...
[16] : got response
15: 36488ms
[17] : got response
16: 38807ms
[18] : got response
17: 41000ms
[19] : got response
18: 43193ms
...
...
I break running because it takes too long.
我正在运行代码 Node.js 0.10.x 要求2.36.0 在Ubuntu 14.04和Cloud 9 bash上
如何同时并行运行请求?