我正在使用Node.js运行服务器,需要从我正在运行的另一台服务器(localhost:3001
)请求数据。我需要向数据服务器发出许多请求(~200)并收集数据(响应大小从~20Kb到~20Mb不等)。每个请求都是独立的,我想将响应保存为一个巨大的数组:
[{"urlAAA": responseAAA}, {"urlCCC": responseCCC}, {"urlBBB": responseBBB}, etc ]
请注意,项目的顺序并不重要,理想情况下应该按照数据可用的顺序填充数组。
var express = require('express');
var router = express.Router();
var async = require("async");
var papa = require("papaparse");
var sync_request = require('sync-request');
var request = require("request");
var pinnacle_data = {};
var lookup_list = [];
for (var i = 0; i < 20; i++) {
lookup_list.push(i);
}
function write_delayed_files(object, key, value) {
object[key] = value;
return;
}
var show_file = function (file_number) {
var file_index = Math.round(Math.random() * 495) + 1;
var pinnacle_file_index = 'http://localhost:3001/generate?file=' + file_index.toString();
var response_json = sync_request('GET', pinnacle_file_index);
var pinnacle_json = JSON.parse(response_json.getBody('utf8'));
var object_key = "file_" + file_number.toString();
pinnacle_data[object_key] = pinnacle_json;
console.log("We've handled file: " + file_number);
return;
};
async.each(lookup_list, show_file, function (err) {});
console.log(pinnacle_data);
/* GET contact us page. */
router.get('/', function (req, res, next) {
res.render('predictionsWtaLinks', {title: 'Async Trial'});
});
module.exports = router;
现在运行此程序时,它会显示:
We've handled file: 0
We've handled file: 1
We've handled file: 2
We've handled file: 3
We've handled file: 4
We've handled file: 5
etc
现在由于文件的大小可变,我希望这会“并行”执行请求,但它似乎是按顺序执行它们,这是我试图通过使用async.each()
来避免的。目前连接数据服务器大约需要1-2秒,因此在许多文件上执行此操作需要很长时间。
我意识到我正在使用同步请求,因此想要理想地替换:
var response_json = sync_request('GET', pinnacle_file_index);
与
类似的东西request(pinnacle_file_index, function (error, response, body) {
if (!error && response.statusCode == 200) {
pinnacle_data[object_key] = JSON.parse(body);
}
});
非常感谢任何帮助。
此外,我看过尝试:
async.parallel(function_list, function (err, results) { //add results to pinnacle_data[]});
。 (我在尝试为数组中的每个元素定义唯一函数时遇到了问题。)同样,我查看了其他相关主题:
我试图模仿来自Asynchronous http calls with nodeJS的建议解决方案但没有任何进展。
以下代码现在执行任务(每个请求需要大约80毫秒,包括必须使用npm requestretry
重复请求)。类似地,这可以很好地扩展,平均请求时间约为80毫秒,总共可以在5个请求之间进行,最多可达1000个。
var performance = require("performance-now");
var time_start = performance();
var async = require("async");
var request_retry = require('requestretry');
var lookup_list = [];
var total_requests = 50;
for (var i = 0; i < total_requests; i++) {
lookup_list.push(i);
}
var pinnacle_data = {};
async.map(lookup_list, function (item, callback) {
var file_index = Math.round(Math.random() * 495) + 1;
var pinnacle_file_index = 'http://localhost:3001/generate?file=' + file_index;
request_retry({
url: pinnacle_file_index,
maxAttempts: 20,
retryDelay: 20,
retryStrategy: request_retry.RetryStrategies.HTTPOrNetworkError
},
function (error, response, body) {
if (!error && response.statusCode == 200) {
body = JSON.parse(body);
var data_array = {};
data_array[file_index.toString()] = body;
callback(null, data_array);
} else {
console.log(error);
callback(error || response.statusCode);
}
});
},
function (err, results) {
var time_finish = performance();
console.log("It took " + (time_finish - time_start).toFixed(3) + "ms to complete " + total_requests + " requests.");
console.log("This gives an average rate of " + ((time_finish - time_start) / total_requests).toFixed(3) + " ms/request");
if (!err) {
for (var i = 0; i < results.length; i++) {
for (key in results[i]) {
pinnacle_data[key] = results[i][key];
}
}
var length_array = Object.keys(pinnacle_data).length.toString();
console.log("We've got all the data, totalling " + length_array + " unique entries.");
} else {
console.log("We had an error somewhere.");
}
});
感谢您的帮助。
答案 0 :(得分:5)
正如您所发现的,async.parallel()
只能并行化本身异步的操作。如果操作是同步的,那么由于node.js的单线程特性,操作将一个接一个地运行,而不是并行运行。但是,如果操作本身是异步的,那么async.parallel()
(或其他异步方法)将立即启动它们并为您协调结果。
这是使用async.map()
的一般概念。我使用async.map()
因为它的想法是它将数组作为输入并以与原始数据相同的顺序生成结果数组,但是并行运行所有请求,这似乎符合您的要求:
var async = require("async");
var request = require("request");
// create list of URLs
var lookup_list = [];
for (var i = 0; i < 20; i++) {
var index = Math.round(Math.random() * 495) + 1;
var url = 'http://localhost:3001/generate?file=' + index;
lookup_list.push(url);
}
async.map(lookup_list, function(url, callback) {
// iterator function
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
var body = JSON.parse(body);
// do any further processing of the data here
callback(null, body);
} else {
callback(error || response.statusCode);
}
});
}, function(err, results) {
// completion function
if (!err) {
// process all results in the array here
console.log(results);
for (var i = 0; i < results.length; i++) {
// do something with results[i]
}
} else {
// handle error here
}
});
而且,这是一个使用Bluebird承诺的版本,有些类似地使用Promise.map()
来迭代初始数组:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
// create list of URLs
var lookup_list = [];
for (var i = 0; i < 20; i++) {
var index = Math.round(Math.random() * 495) + 1;
var url = 'http://localhost:3001/generate?file=' + index;
lookup_list.push(url);
}
Promise.map(lookup_list, function(url) {
return request.getAsync(url).spread(function(response, body) {
if response.statusCode !== 200) {
throw response.statusCode;
}
return JSON.parse(body);
});
}).then(function(results) {
console.log(results);
for (var i = 0; i < results.length; i++) {
// process results[i] here
}
}, function(err) {
// process error here
});
答案 1 :(得分:4)
听起来你只是想并行下载一堆网址。这样就可以了:
var request = require('request');
var async = require('async');
var urls = ['http://microsoft.com', 'http://yahoo.com', 'http://google.com', 'http://amazon.com'];
var loaders = urls.map( function(url) {
return function(callback) {
request(url, callback);
}
});
async.parallel(loaders, function(err, results) {
if (err) throw(err); // ... handle appropriately
// results will be an array of the results, in
// the same order as 'urls', even thought the operation
// was done in parallel
console.log(results.length); // == urls.length
});
甚至更简单,使用async.map
:
var request = require('request');
var async = require('async');
var urls = ['http://microsoft.com', 'http://yahoo.com', 'http://google.com', 'http://amazon.com'];
async.map(urls, request, function(err, results) {
if (err) throw(err); // handle error
console.log(results.length); // == urls.length
});
答案 2 :(得分:0)
试试这个:
var async = require("async");
var request = require("request");
var show_file = function (file_number,cb) {
//..Sync ops
var file_index = Math.round(Math.random() * 495) + 1;
var pinnacle_file_index = 'http://localhost:3001/generate?file='+file_index.toString();
//request instance from Request npm Module
//..Async op --> this should make async.each asynchronous
request(pinnacle_file_index, function (error, response, body) {
if(error)
return cb(error);
var object_key = "file_" + file_number.toString();
pinnacle_data[object_key] = JSON.parse(body);
return cb();
});
};
async.each(
lookup_list,
show_file,
function (err) {
if(err){
console.log("Error",err);
}else{
console.log("Its ok");
console.log(pinnacle_data);
}
});