(节点)警告:检测到可能的EventEmitter内存泄漏

时间:2013-09-16 01:33:08

标签: javascript node.js memory-leaks web-scraping cheerio

我写了一个基于cheerio.js和request.js的小刮刀脚本来获取预订机构网站的联系信息(URL和电子邮件)。虽然它确实运行并返回我需要的所有信息,但是在运行它时连续5次收到以下警告:

(node) warning: possible EventEmitter memory leak detected. 11 listeners added. Use emitter.setMaxListeners() to increase limit.
Trace
    at Socket.EventEmitter.addListener (events.js:160:15)
    at Socket.Readable.on (_stream_readable.js:689:33)
    at Socket.EventEmitter.once (events.js:179:8)
    at Request.onResponse (/home/max/Desktop/scraping/node_modules/request/request.js:625:25)
    at ClientRequest.g (events.js:175:14)
    at ClientRequest.EventEmitter.emit (events.js:95:17)
    at HTTPParser.parserOnIncomingClient [as onIncoming] (http.js:1689:21)
    at HTTPParser.parserOnHeadersComplete [as onHeadersComplete] (http.js:120:23)
    at Socket.socketOnData [as ondata] (http.js:1584:20)
    at TCP.onread (net.js:525:27)

我的直觉告诉我,我可能会收到此警告,因为我在另一个请求中嵌套了请求。我不确定,但我知道的是,当调用位于getArtistInfo()函数的cheerio.js .each()循环中的嵌套请求时,警告会立即出现。 (看下面的代码,看看我的意思)

以下是我的刮刀的代码:

var request = require('request');
var cheerio = require('cheerio');

var url = 'http://www.primarytalent.com/';

var getManyArtistsInfo = function(url){
  request(url, (function(){
    return function(err, resp, body) {
      if(err)
        throw err;
      $ = cheerio.load(body);

      // TODO: scraping goes here
      $('#rosterlists div li a').each(function(){
          var urlCap = this[0]['attribs']['href'].slice(1);
          var artistURL = url.concat(urlCap);
          console.log(artistURL);

          getArtistInfo(artistURL);
      });
    }
  })());
}

var getArtistInfo = function(artistURL){
  request(artistURL, (function(){
    return function(err, resp, body) {
      if(err)
        throw err;
      $ = cheerio.load(body);

      console.log("NOW SCRAPING artist's PAGE")

      var artistName = "";
      $('#content #col3-1 h1').each(function(){
        artistName = this.text();
        console.log(artistName);
      });
      $('#content #col3-1 #links li a').each(function(){
        var socialURL = this.attr('href');
        var siteURL = "";
        var facebookURL = "";
        var twitterURL = "";
        var soundcloudURL = "";
        var bandcampURL = "";
        var myspaceURL = "";

        switch(socialURL) {
          case socialURL.indexOf("facebook"):
            facebookURL = socialURL;
            console.log(facebookURL);
            break;
          case socialURL.indexOf("twitter"):
            twitterURL = socialURL;
            console.log(twitterURL);
            break;
          case socialURL.indexOf("soundcloud"):
            soundcloudURL = socialURL;
            console.log(soundcloudURL);
            break;
          case socialURL.indexOf("bandcamp"):
            bandcampURL = socialURL;
            console.log(bandcampURL);
            break;
          case socialURL.indexOf("myspace"):
            myspaceURL = socialURL;
            console.log(myspaceURL);
            break;
          default: 
            siteURL = socialURL;
            console.log(siteURL)
        }
      });

      // get agentURL
      $('#content #col3-1 .contacts li a').each(function(){
        var agentURL = url + this.attr('href').slice(1);
        console.log("Agent url is : " + agentURL);

        request(agentURL, artistName, (function(){
          return function(err, resp, body) {
            if(err)
              throw err;
            $ = cheerio.load(body);

            console.log("NOW SCRAPING AGENT'S PAGE")

            var agentName = $('#content #col3-1 #details li h1').text();
            console.log(agentName + ' reps ' + artistName);

            var agentEmail = $('#content #col3-1 #details li a').attr("href").slice(7);
            console.log(agentEmail);

            var agentPhone = $('#content #col3-1 #details li').last().text();
            console.log(agentPhone);

            var agentArtistList = []; 
            $('#content #col3-1 #artists li a').each(function(){
              agentArtistList.push(this.text());
            }); 
            console.log(agentName + ' represents ' + agentArtistList.length + ' artists!');

          }
        })(agentURL, artistName));
      });
    }
  })(artistURL));
}

getManyArtistsInfo(url);

我在这里制作了意大利面吗?

如何阻止此EventEmitter内存泄漏问题发生?

1 个答案:

答案 0 :(得分:0)

没有理由在请求中使用IIFE。我想知道是否可能导致此错误:

  request(artistURL, function(err, resp, body) {
      if(err)
        throw err;
      $ = cheerio.load(body);