我使用节点js,我在数组列表中有100个或更多URL。 我正在尝试获取所有网址的元数据。所以我用了#34; request"和" cheerio"包。
var request = require('request');
var cheerio = require('cheerio');
var resultData = ["http://www.realsimple.com/food-recipes/9-healthy-predinner-snacks", "http://www.womenshealthmag.com/weight-loss/100-calorie-snacks", "https://www.pinterest.com/explore/healthy-snacks/", "http://www.rd.com/slideshows/healthy-snacks-for-adults/", "http://greatist.com/snacking", "http://www.bodybuilding.com/fun/26-best-healthy-snacks.html"];
var keyObject = new Array();
for (var index in resultData) {
var resourceUrl = resultData[index];
request(resourceUrl, function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('meta').each(function(i, element) {
if ((element.attribs.name == 'Keywords') || (element.attribs.name == 'keywords')) {
var keyObject = new Object();
keyObject.url = response.request.uri.href;
keyObject.key = element.attribs.content;
resourceDetails.push(keyObject);
}
});
}
});
}
一旦我运行代码,我得到最多5到10个结果。然后我收到错误:
(node) warning: possible EventEmitter memory leak detected. 11 listeners added. Use emitter.setMaxListeners() to increase limit.
Trace at Request.EventEmitter.addListener (events.js:160:15)
然后我在谷歌搜索需要添加循环self.setMaxListeners(0);
var keyObject = new Array();
for (var index in resultData) {
var resourceUrl = resultData[index];
request(resourceUrl, function(error, response, html) {
if (!error && response.statusCode == 200) {
self.setMaxListeners(0);
var $ = cheerio.load(html);
$('meta').each(function(i, element) {
if ((element.attribs.name == 'Keywords') || (element.attribs.name == 'keywords')) {
var keyObject = new Object();
keyObject.url = response.request.uri.href;
keyObject.key = element.attribs.content;
resourceDetails.push(keyObject);
}
});
}
});
}
但我没有得到任何输出。我不知道现在正确的方式。
例外输出:
[{"url" : "http:example.com", "key" : "metadata list"},{"url" : "http:example.com", "key" : "metadata list"},{"url" : "http:example.com", "key" : "metadata list"},{"url" : "http:example.com", "key" : "metadata list"},{"url" : "http:example.com", "key" : "metadata list"},{"url" : "http:example.com", "key" : "metadata list"}] etc