我正在使用Express和Request创建一个刮刀。 将URL的数组传递给请求,然后通过for循环进行迭代。一旦解析了所有数据并解析了所有数据,回调就会调用res.send。
我正在尝试将此转换为承诺,但我相信我使用的for循环不允许我。如果循环导致问题,是否有另一种方法可以对此进行编码并获得相同的结果?
回调方法
function scrape(callback){
for(var i = 0; i < urls.length; i++){
request(urls[i], function(error, response, html){
if(!error && response.statusCode == 200){
// LOAD Cherio (jQuery) on the webpage
var $ = cheerio.load(html);
try{
var name = $(".name").text();
var mpn = $(".specs.block").contents().get(6).nodeValue.trim();
var jsontemp = {"MPN": "", "Name": "", "PriceList": {}};
jsontemp.MPN = mpn;
jsontemp.Name = name;
// Traverse the DOM to get tr tags and extract info
$(".wide-table tbody tr").each(function (i, row) {
var $row = $(row),
merchant = $row. attr("class").trim(),
total = $row.children(".total").text();
jsontemp.PriceList[merchant] = merchant;
jsontemp.PriceList[merchant] = total;
});
}
catch(err){
console.log('Error occured during data scraping:');
}
list.push(jsontemp);
}
else{
console.log(error);
}
count++;
callback();
});
}
}
});
scrape(() => {
console.log(count);
if(count == urls.length){res.send(list)}
});
承诺实施尝试
var urls = [
"http://test.com/",
"http://test.com/2"
];
var list = [];
var count = 0;
scrape().then((data) => {
list.push(data)
if(count == urls.length){res.send(list)}
})
.catch(error => console.log(error))
function scrape(){
for(var i = 0; i < urls.length; i++){
return new Promise(function (resolve, reject) {
request(urls[i], function(error, response, html){
if(!error && response.statusCode == 200){
var $ = cheerio.load(html);
try{
var name = $(".name").text();
var mpn = $(".specs.block").contents().get(6).nodeValue.trim();
var jsontemp = {"MPN": "", "Name": "", "PriceList": {}};
jsontemp.MPN = mpn;
jsontemp.Name = name;
// TRAVERSING DOM FOR DATA //
$(".wide-table tbody tr").each(function (i, row) {
var $row = $(row),
merchant = $row. attr("class").trim(),
total = $row.children(".total").text();
jsontemp.PriceList[merchant] = merchant;
jsontemp.PriceList[merchant] = total;
});
}
catch(err){
console.log('Error occured during data scraping:');
}
resolve(jsontemp);
}
else{
console.log(error);
return reject(error);
}
count++;
});
}
}
答案 0 :(得分:2)
您需要将这些承诺存储在列表中,然后调用Promise.all
以获得对所有内容的承诺:
function scrape() {
var promises = []; // array of promises
for(var i = 0; i < urls.length; i++) {
var url = urls[i];
var promise = new Promise(function(resolve, reject) {
// ...
};
// add to array
promises.push(promise);
}
// return a single promise with an array of the results
// by using Promise.all
return Promise.all(promises);
}
此外,在使用i
时,不要在循环内的函数内使用循环变量(如var
)。相反,您应该在promise回调函数之外声明url
变量,或者将var
替换为较新的let
。