如何使用promises在nodejs中创建异步html解析器?

时间:2017-03-04 16:31:06

标签: javascript node.js multithreading

玩JS中的promises并尝试制作简单的xpath网站解析器,但我在完成整体解析过程时遇到了逻辑,我的代码是:

var request = require('request');
var xpath = require('xpath');
var dom = require('xmldom').DOMParser;

var olxMain = 'https://www.some.site/';

var xpathRoot = '//a[contains(@href, "https://www.some.site/mask/")]';
var linksXpath = '//a';

var allGlobalLinks = [];


var getLink = function (node) {
    for (key in node['attributes']) {
        if (node['attributes'][key]['name'] === 'href') {
            return node['attributes'][key]['value'];
        }
    }
}


var getData = function (url, xpathPattern) {
    return new Promise(function (resolve, reject) {
            console.log("Opening " + url);
            var processResponse = function (error, response, body) {
                var doc = new dom().parseFromString(body);
                var childNodes = xpath.select(xpathPattern, doc);
                var links = childNodes.map(function (n) {
                    return getLink(n);
                });
                resolve(links);
            };
            request({url: url}, processResponse);
        }
    );
}


var arrayUnique = function (x, i, a) {
    return a.indexOf(x) == i;
};

var main = function () {
    getData(olxMain, xpathRoot).then(function (links) {
        links = links.filter(arrayUnique);
        var maxThreads = 10, n = 0;
        var chunks = [];
        for (k in links) {
            var url = links[k];
            n++;
            if (n <= maxThreads)
                chunks.push(url);
            else {
                n = 0;
                // console.log(chunks);
                Promise.all(chunks.map(function (url) {
                    return getData(url, linksXpath);
                })).then(function (links) {
                    // add these links to global scope list here
                    });
                    console.log("Finished mappings iteration");
                });

                chunks = [];
            }
        }
        ;
    });
}




main();

所以我想要的基本上是某种带有承诺的threadPool,如何管理这10个承诺,当它们全部完成时,我应该再产生10个,直到列表完成并且所有Promise都已完成?

0 个答案:

没有答案