在javascript中进行网络爬虫错误:错误:连接ETIMEDOUT

时间:2018-06-22 03:30:46

标签: javascript node.js npm web-crawler

您好,我在执行有关如何使用javascript ..进行网络抓取的教程后遇到此错误,但是在执行该错误时却收到了此错误

   Visiting page https://arstechnica.com/
                                                             testcrawl.js:6
Error: Error: connect ETIMEDOUT 50.31.169.131:443
                                                             testcrawl.js:9
TypeError: Cannot read property 'statusCode' of undefined
                                                             testcrawl.js:12

    at Request._callback (c:\Users\nab\practise\testcrawl.js:12:43)
    at self.callback (c:\Users\nab\node_modules\request\request.js:185:22)
    at emitOne (events.js:116:13)
    at Request.emit (events.js:211:7)
    at Request.onRequestError (c:\Users\nab\node_modules\request\request.js:877:8)
    at emitOne (events.js:116:13)
    at ClientRequest.emit (events.js:211:7)
    at TLSSocket.socketErrorListener (_http_client.js:387:9)
    at emitOne (events.js:116:13)
    at TLSSocket.emit (events.js:211:7)

这些是我正在运行的脚本

var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');

var pageToVisit = "https://arstechnica.com/";
console.log("Visiting page " + pageToVisit);
request(pageToVisit, function(error, response, body) {
   if(error) {
     console.log("Error: " + error);
   }
   // Check status code (200 is HTTP OK)
   console.log("Status code: " + response.statusCode);
   if(response.statusCode === 200) {                      
     // Parse the document body
     var $ = cheerio.load(body);
     console.log("Page title:  " + $('title').text());
   }
});

为什么会出现此错误以及如何解决此问题?

1 个答案:

答案 0 :(得分:0)

请尝试以下操作,并让我知道结果。

var pageToVisit = "https://arstechnica.com/";
console.log("Visiting page " + pageToVisit);
request({url:pageToVisit,timeout:20000}, function(error, response, body) {
   if(error) {
     console.log("Error: " + error);
   }
   // Check status code (200 is HTTP OK)
   console.log("Status code: " + response.statusCode);
   if(response.statusCode === 200) {                      
     // Parse the document body
     var $ = cheerio.load(body);
     console.log("Page title:  " + $('title').text());
   }
});

注意,我添加了一个timeout,以查看其运行情况。