错误:getaddrinfo ENOTFOUND parishackers.org parishackers.org:80

时间:2018-06-12 08:19:10

标签: node.js

错误:getaddrinfo ENOTFOUND parishackers.org parishackers.org:80错误发生。 我写了代码,

var Crawler = require("node-webcrawler");
var url = require('url');

var c = new Crawler({
    maxConnections : 10,
    // This will be called for each crawled page
    callback : function (error, result, $) {
        // $ is Cheerio by default
        //a lean implementation of core jQuery designed specifically for the server
        if(error){
            console.log(error);
        }else{
            console.log($("title").text());
        }
    }
});

// Queue just one URL, with default callback
c.queue('http://www.amazon.com');

// Queue a list of URLs
c.queue(['http://www.google.com/','http://www.yahoo.com']);

// Queue URLs with custom callbacks & parameters
c.queue([{
    uri: 'http://parishackers.org/',
    jQuery: false,

    // The global callback won't be called
    callback: function (error, result) {
        if(error){
            console.log(error);
        }else{
            console.log('Grabbed', result.body.length, 'bytes');
        }
    }
}]);

// Queue some HTML code directly without grabbing (mostly for tests)
c.queue([{
    html: '<p>This is a <strong>test</strong></p>'
}]);

但是当我运行代码时, 谷歌 雅虎 Amazon.com:电子产品,服装,电脑,书籍,DVD和电子产品的在线购物更

{ Error: getaddrinfo ENOTFOUND parishackers.org parishackers.org:80
    at errnoException (dns.js:50:10)
    at GetAddrInfoReqWrap.onlookup [as oncomplete] (dns.js:92:26)
  code: 'ENOTFOUND',
  errno: 'ENOTFOUND',
  syscall: 'getaddrinfo',
  hostname: 'parishackers.org',
  host: 'parishackers.org',
  port: 80 }

错误发生。我认为程序刮掉数据的一部分,但我不知道为什么程序可以完全做到。我像npm install node-webcrawler一样安装了库。我读了另一个网站,所以我认为这个错误是因为链接错误而发生的,对吧?我该怎么解决这个问题?我的代码出了什么问题?

1 个答案:

答案 0 :(得分:1)

由于域http://parishackers.org无效而引发错误(getaddrinfo ENOTFOUND parishackers.org parishackers.org:80)。使用有效的网址链接,您的节点webcrawler将像魅力一样工作。修改了代码段以供参考

var Crawler = require("node-webcrawler");
var url = require('url');

var c = new Crawler({
    maxConnections : 10,
    // This will be called for each crawled page
    callback : function (error, result, $) {
        // $ is Cheerio by default
        //a lean implementation of core jQuery designed specifically for the server
        if(error){
            console.log(error);
        }else{
            console.log($("title").text());
        }
    }
});

// Queue just one URL, with default callback
c.queue('http://www.amazon.com');

// Queue a list of URLs
c.queue(['http://www.google.com/','http://www.yahoo.com']);

// Queue URLs with custom callbacks & parameters
c.queue([{
    uri: 'http://www.amazon.com',
    jQuery: false,

    // The global callback won't be called
    callback: function (error, result) {
        if(error){
            console.log(error);
        }else{
            console.log('Grabbed', result.body.length, 'bytes');
        }
    }
}]);

// Queue some HTML code directly without grabbing (mostly for tests)
c.queue([{
    html: '<p>This is a <strong>test</strong></p>'
}]);