错误:读取ECONNRESET,ENOTFOUND和套接字挂起

时间:2016-08-25 12:02:17

标签: node.js sockets express

我在节点JS中遇到了一些爬虫问题。事实上,当我为Vinted网站午餐时,我有一个错误。 但是我有很多时间错误: 首先,我有这个:

Error: getaddrinfo ENOTFOUND www.vinted.fr www.vinted.fr:443

然后

... Error: read ECONNRESET Error: read ECONNRESET Error: read ECONNRESET Error: read ECONNRESET Error: read ECONNRESET ...

有时候

Error: socket hang up
 Error: socket hang up
 Error: socket hang up

但是我的爬虫工作并返回一些产品的好结果,并在10分钟后停止。 我想是因为我发送了太多的请求,但我需要它......所以,它可能是一个网络问题

我完全陷入了所有这些错误它可以解决它吗?

感谢您的帮助。

这是我的代码:

fs.readFile(__dirname +'/link.json', 'utf8', function (err, data) {
        var obj;
        if (err) throw err;
        obj = JSON.parse(data);
        urlp = obj.link;
        console.log(colors.yellow("Products:"+urlp.length));
          for(i = 1; i < urlp.length-1; i++){

            url = 'https://www.vinted.fr'+urlp[i-1];
            request(url, function(error, response, html){

            if(!error){
                  var $ = cheerio.load(html);
                  var link = [];
                  var json = { link : ""};
                  var price = $('span[itemprop=price]').text();
                  var format_price = price.replace(/\n|\r/g,""); 
                  var format_price2 = format_price.replace(/ /g,"");
                  var res1 = $('.details-list--details');
                  var meta = $("link[rel='canonical']").attr('href');
                  var images = []; // tableau img


                  $('img[itemprop=image]').filter(function(){
                      var img = $(this).attr('data-src');;
                      images.push(img);
                  })
                //  var imageshow = console.log(colors.rainbow(images .join(", ")));

                  var brand = $('.inverse > [itemprop=name]').text();
                  var state = $('div[itemprop=itemCondition]').text(); 
                  var color =  $('div[itemprop=color]').text(); 
                  console.log(urlp[i]);
                  var token_vendu = $('.state-bar').text();
                  if(token_vendu != ""){ 
                    console.log(colors.red('PRODUIT VENDU'));
                    var vendu = 1; 
                  }else{
                    vendu = 0;
                  }
                  console.log(colors.blue("CallBack Vente "+vendu));

                  var discount_price = $('.old-price').text(); 
                  console.log("Discount: " + discount_price);
                try{
                  if(brand == ""){ 

                    var size = res1.children().parent().text();
                    var format_size = size.replace(/ /g,"");
                    var format_size2 = format_size.replace(/[\n]/gi, " " );
                    var split_size1 = format_size2.split("    ");
                    var split_size2 = split_size1[0].split(" ");
                    var split4 = split_size2[4];
                    var formatsize = split4;

                  }else{

                    var size = res1.children().parent().children().text(); 
                    var format_size = size.replace(/ /g,"");
                    var format_size2 = format_size.replace(/[\n]/gi, " " );
                    var split_size = format_size2.split("         ");
                    console.log("split: "+split_size[1] )
                    var split3 = split_size[1].split(" ");
                    formatsize = split3[1];


                  }

                } catch (e) {
                    split_size[1] = "N/A";
                    console.log(e.message);
                    }
                  console.log("Size : " + formatsize);
                  console.log("Brand : "+brand);
                  console.log(meta);
                  console.log("color : " + color);
                  console.log("state : " + state);

                  //Save to database
                 connection.query('INSERT INTO `vinted` VALUES ( NULL , ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP())',
                      [brand,
                      color,
                      format_price2,
                      discount_price,
                      state,
                      formatsize,
                      vendu,
                      images.join(", "),
                      meta
                      ]
                  , function (err, result) {
                      if (err) {console.error('error inserting into database : ' + err.stack); return;}
                  });
                }else{console.log(colors.red(error));} // here the error ECONNRESET/ENOTFOUND ...

1 个答案:

答案 0 :(得分:0)

我怀疑您的抓取工具可能会压倒您尝试抓取的网站,因此ECONNRESET。我有自己编写node.js爬虫的类似经历。我不得不通过定时发送请求来自我节制,给服务器一些喘息空间。

示例:

var request = require('request');
var EventEmitter = require('events').EventEmitter;

emitter = new EventEmitter();
function doCrawl(){
      setTimeout(function(){
        //do crawling operation, e.g.
        request(url, function(err, resp, html){
          if(!err){
            //do all you want with the response then trigger another one
            emitter.emit('fetchNext');
          }
        });
      }, 60000);
    }
}

emitter.on('fetchNext', doCrawl);

您可能还想考虑这样做

修改&GT;&GT;使用示例代码

var urlStack = []; //an array that holds the list of urls you want to visit
var emitter = new EventEmitter();
emitter.on('fetchNext', delayedCrawl); //this is triggered after any item is saved

fs.readFile(__dirname + '/link.json', 'utf8', function(err, data) {
      var obj;
      if (err) throw err;
      obj = JSON.parse(data);
      urlp = obj.link;
      console.log(colors.yellow("Products:" + urlp.length));

      for (i = 1; i < urlp.length - 1; i++) {
        urlStack.push('https://www.vinted.fr' + urlp[i - 1];
        }
      emmiter.emit('fetchNext');
});
    function delayedCrawl(){
        setTimeout(doCrawl, 5000); //5-second delay
    }

    function doCrawl() {
      var url = urlStack.pop();
      if(!url) return;
      request(url, function(error, response, html) {

            if (!error) {
              var $ = cheerio.load(html);
              var link = [];
              var json = {
                link: ""
              };
              var price = $('span[itemprop=price]').text();
              var format_price = price.replace(/\n|\r/g, "");
              var format_price2 = format_price.replace(/ /g, "");
              var res1 = $('.details-list--details');
              var meta = $("link[rel='canonical']").attr('href');
              var images = []; // tableau img


              $('img[itemprop=image]').filter(function() {
                  var img = $(this).attr('data-src');;
                  images.push(img);
                })
                //  var imageshow = console.log(colors.rainbow(images .join(", ")));

              var brand = $('.inverse > [itemprop=name]').text();
              var state = $('div[itemprop=itemCondition]').text();
              var color = $('div[itemprop=color]').text();
              console.log(url);
              var token_vendu = $('.state-bar').text();
              if (token_vendu != "") {
                console.log(colors.red('PRODUIT VENDU'));
                var vendu = 1;
              } else {
                vendu = 0;
              }
              console.log(colors.blue("CallBack Vente " + vendu));

              var discount_price = $('.old-price').text();
              console.log("Discount: " + discount_price);
              try {
                if (brand == "") {

                  var size = res1.children().parent().text();
                  var format_size = size.replace(/ /g, "");
                  var format_size2 = format_size.replace(/[\n]/gi, " ");
                  var split_size1 = format_size2.split("    ");
                  var split_size2 = split_size1[0].split(" ");
                  var split4 = split_size2[4];
                  var formatsize = split4;

                } else {

                  var size = res1.children().parent().children().text();
                  var format_size = size.replace(/ /g, "");
                  var format_size2 = format_size.replace(/[\n]/gi, " ");
                  var split_size = format_size2.split("         ");
                  console.log("split: " + split_size[1])
                  var split3 = split_size[1].split(" ");
                  formatsize = split3[1];


                }

              } catch (e) {
                split_size[1] = "N/A";
                console.log(e.message);
              }
              console.log("Size : " + formatsize);
              console.log("Brand : " + brand);
              console.log(meta);
              console.log("color : " + color);
              console.log("state : " + state);

              //Save to database
              connection.query('INSERT INTO `vinted` VALUES ( NULL , ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP())', [brand,
                color,
                format_price2,
                discount_price,
                state,
                formatsize,
                vendu,
                images.join(", "),
                meta
              ], function(err, result) {
                emitter.emit('fetchNext');
                if (err) {
                  console.error('error inserting into database : ' + err.stack);
                  return;
                }
              });
            } else {
              console.log(colors.red(error));
            } // here the error ECONNRESET/ENOTFOUND ...