我正在尝试重写我的代码以正确使用promises。
整个程序应该从T恤网站上抓取数据。第一个代码块应该进入站点的首页,获取可立即使用的产品页面,然后将URL存储在一个数组中。剩余的URL将存储在'remainder'中,以便稍后执行secondScrape。
目前手动对每个部分进行单元测试:
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
//Modules being used:
var cheerio = require('cheerio');
var request = require('request');
//harcoded url
var url = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = new Set();
var remainder;
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error) return reject(error);
if(!error && response.statusCode == 200){
return resolve(html);
}
});
});
}
function firstScrape (url) {
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(url + a);
});
return links;
// return this array you've made
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
for(var link in arrayOfLinks){
promiseArray.push(requestPromise(link));
return Promise.all(promiseArray);
}
}
function lastStep (arrayOfHTMLresults){
for(var html in arrayOfHTMLresults){
var $ = cheerio.load(html);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.add(scrapeLink);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
remainder = scrapeLink;
}
}
console.log(urlSet);
console.log(remainder);
}
firstScrape(url)
.then(nextStep)
.then(lastStep);
我目前收到以下错误:
(node:71094) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 3): Error: Invalid URI "0"
这是我试图宣传的代码:
// Load front page of shirts4mike
function firstScrape(){
request(url, function(error, response, html) {
if(!error && response.statusCode == 200){
var $ = cheerio.load(html);
//iterate over links with 'shirt'
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//create new link
var scrapeLink = url + a;
//for each new link, go in and find out if there is a submit button.
//If there, add it to the set
request(scrapeLink, function(error,response, html){
if(!error && response.statusCode == 200) {
var $ = cheerio.load(html);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.add(scrapeLink);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
remainder = scrapeLink;
}
}
});
});
}
});
}
我无法解决的问题是,当urlSet.add(scrapeLink);
不知道lastStep()
是什么时,我如何使用scrapeLink
?
知道为什么吗?谢谢
答案 0 :(得分:2)
.add()
不是Array.prototype
方法,您还return
promiseArray
循环for
而不是将Promise
推送到{{1}并使用promiseArray
Promise.all()
答案 1 :(得分:1)
由于问题更改而更新:
所以从firstScrape()
你可以返回一个结果对象,而不仅仅是一个链接数组:
return { scrapeLink: link, result: links }
然后,您可以在nextStep()
中获得该作为承诺的结果,您可以再次返回具有相同形状的内容:
return { scrapeLink: firstStepResult.scrapLink, result: Promise.all(promiseArray) }
然后在lastStep()
而不是arrayOfHTMLresults
传入,你会得到一个看似如下的对象:
{ scrapeLink: "http://someurl.com", result: arrayOfHTMLresults }
上一个回答:
您需要在for ... in循环中初始化变量。例如使用const
,var
或let
取决于您的使用案例和JS版本。
for(var link in arrayOfLinks){
promiseArray.add(requestPromise(link));
return promiseArray;
}