Node.js中的Webscraper返回带有异步和Promise的空数组

时间:2018-08-26 14:58:05

标签: javascript node.js asynchronous promise request

我在使Node.js异步化方面遇到问题,并答应使用forloop与webscraper一起使用来访问网站。看了几篇文章并在stackoverflow上测试了不同的解决方案后,我无法使我的异步功能正常工作。谢谢!

代码:

var data = {};

async function run() {
    console.log("Setup links..");
    var links = ['https://example1.com', 'https://example2.com'];

    await Promise.all(links.map(async (element) => {
        const contents = await scrape(element);
        console.log("After call in Promise: " + JSON.stringify(data));
    }));

    console.log("------------");
    console.log(JSON.stringify(data));
    return JSON.stringify(data);
}

async function scrape(element) {
    request(element, function (error, response, html) {
        console.log("Scrape website...");
        if (!error && response.statusCode == 200) {
            var $ = cheerio.load(html);
            var rowCounter = 0;
            var columnCounter = 0;
            var dates = [];
            var item = [];
            var mainTitle = false;
            var title;

            $('tr td').each(function(i, elem) {
                var txt = $(elem).text().trim();
                if (rowCounter == 0) {
                    if (columnCounter != 0) {
                        dates.push(txt.substring(txt.length - 4, txt.length));
                    }
                } else {
                    if (txt == "Current Assets" || txt == "Current Liabilities" || txt == "Stockholders' Equity" || txt == "Revenue" || txt == "Operating Expenses" || txt == "Income from Continuing Operations" || txt == "Non-recurring Events" || txt == "Net Income") {
                        mainTitle = true;
                    } else {
                        if (columnCounter == 0) {
                            title = txt.split(' ').join('');
                            data[title] = {};
                        } else {
                            item.push(txt);
                        }
                    }
                }

                columnCounter++;

                if (mainTitle) {
                    columnCounter = 0;
                    mainTitle = false;
                }

                if (columnCounter == 5) {
                    columnCounter = 0;
                    if (rowCounter != 0) {
                        data[title][0] = item[0];
                        data[title][1] = item[1];
                        data[title][2] = item[2];
                        data[title][3] = item[3];
                        item = [];
                    } 
                    rowCounter++;
                }
            });
        }
    });   
}

module.exports.run = run;

上面的控制台中的代码:

Server started!
Route called
Setup links..
After call in Promise: {}
After call in Promise: {}
------------
{}
Scrape website...
Scrape website...

因此,使用循环时,promise存在问题。

1 个答案:

答案 0 :(得分:0)

我相信这就是您想要的(未经测试,只是被黑):

async function scrape(element) {
return new Promise( (resolve, reject ) => {
    request(element, function (error, response, html) {
        if( error ) return reject( error );
        if (response.statusCode != 200) return reject( "Got HTTP code: " + response.statusCode);

        console.log("Scrape website...");
        var $ = cheerio.load(html);
        var rowCounter = 0;
        var columnCounter = 0;
        var dates = [];
        var item = [];
        var mainTitle = false;
        var title;

        $('tr td').each(function(i, elem) {
            var txt = $(elem).text().trim();
            if (rowCounter == 0) {
                if (columnCounter != 0) {
                    dates.push(txt.substring(txt.length - 4, txt.length));
                }
            } else {
                if (txt == "Current Assets" || txt == "Current Liabilities" || txt == "Stockholders' Equity" || txt == "Revenue" || txt == "Operating Expenses" || txt == "Income from Continuing Operations" || txt == "Non-recurring Events" || txt == "Net Income") {
                    mainTitle = true;
                } else {
                    if (columnCounter == 0) {
                        title = txt.split(' ').join('');
                        data[title] = {};
                    } else {
                        item.push(txt);
                    }
                }
            }

            columnCounter++;

            if (mainTitle) {
                columnCounter = 0;
                mainTitle = false;
            }

            if (columnCounter == 5) {
                columnCounter = 0;
                if (rowCounter != 0) {
                    data[title][0] = item[0];
                    data[title][1] = item[1];
                    data[title][2] = item[2];
                    data[title][3] = item[3];
                    item = [];
                } 
                rowCounter++;
            }
        });
        resolve();
    });   
} );

}

将代码包装在名为Promise的{​​{1}}中,并使用resolve处理错误-但您最了解如何处理错误。