错误:getaddrinfo ENOTFOUND parishackers.org parishackers.org:80错误发生。 我写了代码,
var Crawler = require("node-webcrawler");
var url = require('url');
var c = new Crawler({
maxConnections : 10,
// This will be called for each crawled page
callback : function (error, result, $) {
// $ is Cheerio by default
//a lean implementation of core jQuery designed specifically for the server
if(error){
console.log(error);
}else{
console.log($("title").text());
}
}
});
// Queue just one URL, with default callback
c.queue('http://www.amazon.com');
// Queue a list of URLs
c.queue(['http://www.google.com/','http://www.yahoo.com']);
// Queue URLs with custom callbacks & parameters
c.queue([{
uri: 'http://parishackers.org/',
jQuery: false,
// The global callback won't be called
callback: function (error, result) {
if(error){
console.log(error);
}else{
console.log('Grabbed', result.body.length, 'bytes');
}
}
}]);
// Queue some HTML code directly without grabbing (mostly for tests)
c.queue([{
html: '<p>This is a <strong>test</strong></p>'
}]);
但是当我运行代码时, 谷歌 雅虎 Amazon.com:电子产品,服装,电脑,书籍,DVD和电子产品的在线购物更
{ Error: getaddrinfo ENOTFOUND parishackers.org parishackers.org:80
at errnoException (dns.js:50:10)
at GetAddrInfoReqWrap.onlookup [as oncomplete] (dns.js:92:26)
code: 'ENOTFOUND',
errno: 'ENOTFOUND',
syscall: 'getaddrinfo',
hostname: 'parishackers.org',
host: 'parishackers.org',
port: 80 }
错误发生。我认为程序刮掉数据的一部分,但我不知道为什么程序可以完全做到。我像npm install node-webcrawler一样安装了库。我读了另一个网站,所以我认为这个错误是因为链接错误而发生的,对吧?我该怎么解决这个问题?我的代码出了什么问题?
答案 0 :(得分:1)
由于域http://parishackers.org无效而引发错误(getaddrinfo ENOTFOUND parishackers.org parishackers.org:80)
。使用有效的网址链接,您的节点webcrawler将像魅力一样工作。修改了代码段以供参考
var Crawler = require("node-webcrawler");
var url = require('url');
var c = new Crawler({
maxConnections : 10,
// This will be called for each crawled page
callback : function (error, result, $) {
// $ is Cheerio by default
//a lean implementation of core jQuery designed specifically for the server
if(error){
console.log(error);
}else{
console.log($("title").text());
}
}
});
// Queue just one URL, with default callback
c.queue('http://www.amazon.com');
// Queue a list of URLs
c.queue(['http://www.google.com/','http://www.yahoo.com']);
// Queue URLs with custom callbacks & parameters
c.queue([{
uri: 'http://www.amazon.com',
jQuery: false,
// The global callback won't be called
callback: function (error, result) {
if(error){
console.log(error);
}else{
console.log('Grabbed', result.body.length, 'bytes');
}
}
}]);
// Queue some HTML code directly without grabbing (mostly for tests)
c.queue([{
html: '<p>This is a <strong>test</strong></p>'
}]);