错误:抓取时连接ETIMEDOUT

时间:2017-05-24 20:01:07

标签: node.js mongodb express rate-limiting npm-request

我有一个功能:
1.得到一个3000' id'来自集合foo的mongoDB文档中的属性 2.为每个ID创建一个GET请求以获得' resp' obj用于id,并将其存储在另一个数据库中。

router.get('/', (req, res) => {

    var collection = db.get().collection('foo');
    var collection2 = db.get().collection('test');
    collection.distinct('id',  (err, idArr) => { // count: 3000+
    idArr.forEach(id => {
    let url = 'https://externalapi.io/id=' + id
    request(url, (error, response, body) => {
           if (error) { 
             console.log(error) 
           } else {
             resp = JSON.parse(resp);
             collection2.insert(resp);
           }
    });
});

节点错误日志:

[0] events.js:163
[0]       throw er; // Unhandled 'error' event
[0]       ^
[0]
[0] Error: connect ETIMEDOUT [EXT URL REDACTED]
[0]     at Object.exports._errnoException (util.js:1050:11)
[0]     at exports._exceptionWithHostPort (util.js:1073:20)
[0]     at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1093:14)

我使用简单速率限制器不会导致速率限制(25cps):

const limit = require("simple-rate-limiter");
const request = limit(require("request")).to(20).per(1000);

但是在300-1700个请求之间的任何地方,我都会收到此错误,该错误会导致命令行上的节点崩溃。 如何处理此错误以防止我的应用崩溃?

我尝试了很多错误处理,但没有一个能够处理connect ETIMEDOUT

1 个答案:

答案 0 :(得分:2)

正如评论中所讨论的,如果您想要同时控制正在进行中的最大请求数,您可以使用Bluebird这样轻松地执行此操作:

const Promise = require('bluebird');
const rp = require('request-promise');

router.get('/', (req, res) => {

    let collection = db.get().collection('foo');
    let collection2 = db.get().collection('test');
    collection.distinct('id', (err, idArr) => { // count: 3000+
        if (err) {
            // handle error here, send some error response
            res.status(501).send(...);
        } else {
            Promise.map(idArr, id => {
                let url = 'https://externalapi.io/id=' + id
                return rp(url).then(body => {
                    if (error) {
                        console.log(error)
                    } else {
                        let resp = JSON.parse(body);
                        // probably want to return a promise here too, but I'm unsure what DB you're using
                        collection2.insert(resp);
                    }
                }).catch(err => {
                    // decide what you want to do when a single request fails here
                    // by providing a catch handler that does not rethrow, other requests will continue
                });
                   // pick some concurrency value here that does not cause errors
            }, {concurrency: 10}).then(() => {
                // all requests are done, send final response
                res.send(...);
            }).catch(err => {
                // your code may never get here (depends upon earlier .catch() handler)
            });
        }
    });
});