有时在Node JS中抓取网页时会出现ETIMEDOUT错误(使用localhost作为服务器)。我正在使用Request-Promise发出请求。我找不到处理它的方法,它没有总是给出它,而是随机出现。我试图减少并发性,但是没有用。毕竟,我并不辛苦。有什么建议(使用其他图书馆等...)?
这是错误代码:
{ RequestError: Error: connect ETIMEDOUT 52.232.0.90:443
at new RequestError (C:\Users\HHS\NodeJs
Projects\News_Scraper\node_modules\request-promise-core\lib\errors.js:14:15)
at Request.plumbing.callback (C:\Users\HHS\NodeJs
Projects\News_Scraper\node_modules\request-promise-core\lib\plumbing.js:87:29)
at Request.RP$callback [as _callback] (C:\Users\HHS\NodeJs
Projects\News_Scraper\node_modules\request-promise-core\lib\plumbing.js:46:31)
at self.callback (C:\Users\HHS\NodeJs
Projects\News_Scraper\node_modules\request\request.js:185:22)
at emitOne (events.js:116:13)
at Request.emit (events.js:211:7)
at Request.onRequestError (C:\Users\HHS\NodeJs
Projects\News_Scraper\node_modules\request\request.js:881:8)
at emitOne (events.js:116:13)
at ClientRequest.emit (events.js:211:7)
at TLSSocket.socketErrorListener (_http_client.js:387:9)
at emitOne (events.js:116:13)
at TLSSocket.emit (events.js:211:7)
at emitErrorNT (internal/streams/destroy.js:66:8)
at _combinedTickCallback (internal/process/next_tick.js:139:11)
at process._tickCallback (internal/process/next_tick.js:181:9)
name: 'RequestError',
message: 'Error: connect ETIMEDOUT 52.232.0.90:443',
cause:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
error:
{ Error: connect ETIMEDOUT 52.232.0.90:443
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
errno: 'ETIMEDOUT',
code: 'ETIMEDOUT',
syscall: 'connect',
address: '52.232.0.90',
port: 443 },
options:
{ uri: 'https://www.ntv.com.tr/ekonomi/hazine-ve-maliye-bakani-albayrakenflasyondaki-asagiya-dogru-trendi-cok-daha-guc,400oDcsHMUq1nXl6-52C9w',
callback: [Function: RP$callback],
transform: undefined,
simple: true,
resolveWithFullResponse: false,
transform2xxOnly: false },
response: undefined }
用于抓取的代码:
const rp = require('request-promise');
const cheerio = require('cheerio');
const fs = require("fs");
const Promise = require("bluebird");
const moment = require('moment');
async function Gundem () {
var posts = [];
try {
const baseUrl = 'https://www.ntv.com.tr';
const mainHtml = await rp(baseUrl);
const $ = cheerio.load(mainHtml);
const links = $(".swiper-slide.color-white).map((i, el) => {
return baseUrl + $(el).children("a").first().attr("href");
}).get();
posts = await Promise.map(links, async (link) => {
try {
const newsHtml = await rp(link);
const $ = cheerio.load(newsHtml);
return {
title: getTitle ($),
newsUrl: $("meta[property='og:url']").attr("content"),
imageUrl: $("meta[property='og:image']").attr("content"),
time: moment($("time").attr("datetime")).valueOf()
}
} catch (e) {
console.log('error scraping ' + link + '\n', e)
}
},
{concurrency:10})
} catch (e) {
console.log('error scraping ntv' +'\n', e)
};
fs.writeFile('./ntv.json', JSON.stringify(posts, null, 3), (err) => {
if (err) throw err;
});
return posts;
};