以下函数接收一系列链接并扫描远程网站,每页收集10篇博文,然后使用*.store.filter("modelName", callback)
获取博客帖子的每条评论。
async.waterfall
它提供如下输出:
// @param {Array} url
export default function getData(url, cb) {
const arrayOfPosts = [];
// Outer loop
async.eachSeries(url, (link, topLVLcb) => {
// Waterfall
async.waterfall([
// Collects links to posts
callback => {
request(link, (err, response, body) => {
console.log(`working on ${link}`);
const $ = cheerio.load(body);
// OVERALL 10 LINKS PER ONE BLOGPOST
$('.blogpost').each((i, element) => {
// build post ojbect
const post = {
content,
link,
comments: []
}
arrayOfPosts.push(post);
});
callback(null, arrayOfPosts);
});
},
// Looks for details in given post
(arrOfPosts, postDetailsCallback) => {
let counter = 1;
// Inner loop through 10 links
async.eachSeries(arrOfPosts, (post, eachSeriesCallback) => {
request(post.link, (err, response, body) => {
console.log(counter++);
const $ = cheerio.load(body);
$('.comment').each((i, element) => {
// build comment
const comment = {
author,
content
};
post.comments.push(comment);
});
eachSeriesCallback(null);
});
}, postDetailsCallback);
}
], err => {
console.log('DONE PAGE');
console.log('*************************');
topLVLcb(err);
});
}, (result, err) => {
if (err) {
throw err;
} else {
console.log('DONE ALL');
cb(arrayOfPosts);
}
});
}
它将每页增加10,每次迭代重新开始。虽然它应扫描每页10次。我认为我已经搞砸了一些回调,但我已经无法弄清楚几个小时了。这是我的第一个nodejs异步代码,它非常强大。
答案 0 :(得分:1)
好像您将所有帖子存储在顶级arrayOfPosts
中,但这意味着在您的第二个瀑布功能中,您将从头开始处理所有帖子,因为那是什么你正在回调。因此,在收集帖子链接的第一个回调中,您应该有一个本地数组的帖子,您将传递给下一个回调。
答案 1 :(得分:1)
arrayOfPost[]
对每个async.waterfall
来说都是全局的 urlurl
中的元素你应该创建一个新的arrayOfPost[]
如下....
export default function getData(url, cb) {
const arrayOfPosts = [];
// Outer loop
async.eachSeries(url, (link, topLVLcb) => {
var tmpArr=[];
// Waterfall
async.waterfall([
// Collects links to posts
callback => {
request(link, (err, response, body) => {
console.log(`working on ${link}`);
const $ = cheerio.load(body);
// OVERALL 10 LINKS PER ONE BLOGPOST
$('.blogpost').each((i, element) => {
// build post ojbect
const post = {
content,
link,
comments: []
}
tmpArr.push(post);
arrayOfPosts.push(post);
});
callback(null, tmpArr);//this tmpArr which is being passed will always cantain 10 items(posts) so there will be 10 iterations for each element in url
});
},
// Looks for details in given post
(arrOfPosts, postDetailsCallback) => {
let counter = 1;
// Inner loop through 10 links
async.eachSeries(arrOfPosts, (post, eachSeriesCallback) => {
request(post.link, (err, response, body) => {
console.log(counter++);
const $ = cheerio.load(body);
$('.comment').each((i, element) => {
// build comment
const comment = {
author,
content
};
post.comments.push(comment);
});
eachSeriesCallback(null);
});
}, postDetailsCallback);
}
], err => {
console.log('DONE PAGE');
console.log('*************************');
topLVLcb(err);
});
}, (result, err) => {
if (err) {
throw err;
} else {
console.log('DONE ALL');
cb(arrayOfPosts);
}
});
}