嵌套的async.eachSeries堆栈迭代

时间:2016-03-29 00:06:08

标签: javascript node.js

以下函数接收一系列链接并扫描远程网站,每页收集10篇博文,然后使用*.store.filter("modelName", callback)获取博客帖子的每条评论。

async.waterfall

它提供如下输出:

// @param {Array} url
export default function getData(url, cb) {
  const arrayOfPosts = [];
  // Outer loop
  async.eachSeries(url, (link, topLVLcb) => {

    // Waterfall
    async.waterfall([

      // Collects links to posts
      callback => {
        request(link, (err, response, body) => {
          console.log(`working on ${link}`);

          const $ = cheerio.load(body);

          // OVERALL 10 LINKS PER ONE BLOGPOST
          $('.blogpost').each((i, element) => {

            // build post ojbect

            const post =  {
              content,
              link,
              comments: []
            }
            arrayOfPosts.push(post);
          });
          callback(null, arrayOfPosts);
        });
      },

      // Looks for details in given post
      (arrOfPosts, postDetailsCallback) => {
        let counter = 1;

        // Inner loop through 10 links
        async.eachSeries(arrOfPosts, (post, eachSeriesCallback) => {
          request(post.link, (err, response, body) => {
            console.log(counter++);
            const $ = cheerio.load(body);
            $('.comment').each((i, element) => {

              // build comment

              const comment = {
                author,
                content
              };

              post.comments.push(comment);
            });
            eachSeriesCallback(null);
          });
        }, postDetailsCallback);
      }
    ], err => {
      console.log('DONE PAGE');
      console.log('*************************');
      topLVLcb(err);
    });
  }, (result, err) => {
    if (err) {
      throw err;
    } else {
      console.log('DONE ALL');
      cb(arrayOfPosts);
    }
  });
}

它将每页增加10,每次迭代重新开始。虽然它应扫描每页10次。我认为我已经搞砸了一些回调,但我已经无法弄清楚几个小时了。这是我的第一个nodejs异步代码,它非常强大。

2 个答案:

答案 0 :(得分:1)

好像您将所有帖子存储在顶级arrayOfPosts中,但这意味着在您的第二个瀑布功能中,您将从头开始处理所有帖子,因为那是什么你正在回调。因此,在收集帖子链接的第一个回调中,您应该有一个本地数组的帖子,您将传递给下一个回调。

答案 1 :(得分:1)

  

arrayOfPost[]对每个async.waterfall来说都是全局的   url url中的元素你应该创建一个新的arrayOfPost[]   如下....

export default function getData(url, cb) {
    const arrayOfPosts = [];
    // Outer loop
    async.eachSeries(url, (link, topLVLcb) => {
        var tmpArr=[];
    // Waterfall
    async.waterfall([

        // Collects links to posts
        callback => {
        request(link, (err, response, body) => {
        console.log(`working on ${link}`);

    const $ = cheerio.load(body);

    // OVERALL 10 LINKS PER ONE BLOGPOST
    $('.blogpost').each((i, element) => {

        // build post ojbect

        const post =  {
            content,
            link,
            comments: []
        }
        tmpArr.push(post);
        arrayOfPosts.push(post);
});
    callback(null, tmpArr);//this tmpArr which is being passed will always cantain 10 items(posts) so there will be 10 iterations for each element in url
});
},

    // Looks for details in given post
    (arrOfPosts, postDetailsCallback) => {
        let counter = 1;

        // Inner loop through 10 links
        async.eachSeries(arrOfPosts, (post, eachSeriesCallback) => {
            request(post.link, (err, response, body) => {
            console.log(counter++);
            const $ = cheerio.load(body);
            $('.comment').each((i, element) => {

                // build comment

                const comment = {
                    author,
                    content
                };

            post.comments.push(comment);
        });
            eachSeriesCallback(null);
        });
    }, postDetailsCallback);
    }
    ], err => {
        console.log('DONE PAGE');
        console.log('*************************');
        topLVLcb(err);
    });
}, (result, err) => {
        if (err) {
            throw err;
        } else {
            console.log('DONE ALL');
            cb(arrayOfPosts);
        }
    });
}