Question

我有一个简单的功能，我可以从网址获取字数。如果我的网址数量很少，该脚本可以正常运行。我一次只限制异步4。我看着我的ram和cpu，它在我的机器上没有接近最大值。让我们说大约70个网址之后没有错误。脚本就在那里。我有一个try catch块，它永远不会捕获。任何帮助将不胜感激。

我尝试过lodash forEach而不是异步，我得到同样的问题。

const async = require('async')
const wordcount = require('wordcount')
const afterLoad = require('after-load')
const htmlToText = require('html-to-text')

function getWordCount(urls, cb) {
    async.eachLimit(urls, 4, function(url, cbe) {
           try {
                let html = afterLoad(url) // https://www.npmjs.com/package/after-load
                let text = htmlToText.fromString(html)
                let urlWordCount = wordcount(text) // https://www.npmjs.com/package/wordcount
                console.log(url, urlWordCount)
                cbe(null)
           } catch(err) {
                console.log(err)
                urlWordCount = 0
                console.log(url, urlWordCount, err)
                cbe(null)
           }
    }, function(err) {
        console.log("finished getting wordcount", err)
        if (err) {
            cb(err)
        } else {
            cb(null)
        }
    })
}

getWordCount(["https://stackoverflow.com/", "https://caolan.github.io/async/docs.html#eachLimit"], function(err){
    console.log(err)
})

Answer 1

我认为问题在于after-load模块的同步实现，但是除非你得到一个实际的错误，否则很难判断（你可以在每一行上放置一些console.logs并查看你的代码实际上被卡住了 - 或者为了相同的目的使用调试器。）

我建议使用正确的异步代码 - 我使用一组1000个网址运行下面的示例并且它没有被卡住 - 使用[scramjet]它也更具可读性：

const {StringStream} = require('scramjet');
const wordcount = require('wordcount');
const fetch = require('node-fetch');
const htmlToText = require('html-to-text');
const {promisify} = require('util');

StringStream.fromArray(["https://stackoverflow.com/", "https://caolan.github.io/async/docs.html#eachLimit"])
    .setOptions({maxParallel: 4})
    .parse(async url => ({
        url,
        response: await fetch(url)
    }))
    .map(async ({url, response}) => {
        const html = await response.text();
        const text = htmlToText.fromString();
        const count = wordcount(text);

        return {
            url,
            count
        };
    })
    .each(console.log)
;

我实际上是通过将第一行改为：

从带有URL的文件中运行它

StringStream.from(fs.createReadStream('./urls-list.txt'), 'utf-8')
    .lines()
    .setOptions({maxParallel: 4})
    // and so on.

async.each没有完成没有错误

1 个答案: