我在使Node.js异步化方面遇到问题,并答应使用forloop与webscraper一起使用来访问网站。看了几篇文章并在stackoverflow上测试了不同的解决方案后,我无法使我的异步功能正常工作。谢谢!
代码:
var data = {};
async function run() {
console.log("Setup links..");
var links = ['https://example1.com', 'https://example2.com'];
await Promise.all(links.map(async (element) => {
const contents = await scrape(element);
console.log("After call in Promise: " + JSON.stringify(data));
}));
console.log("------------");
console.log(JSON.stringify(data));
return JSON.stringify(data);
}
async function scrape(element) {
request(element, function (error, response, html) {
console.log("Scrape website...");
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var rowCounter = 0;
var columnCounter = 0;
var dates = [];
var item = [];
var mainTitle = false;
var title;
$('tr td').each(function(i, elem) {
var txt = $(elem).text().trim();
if (rowCounter == 0) {
if (columnCounter != 0) {
dates.push(txt.substring(txt.length - 4, txt.length));
}
} else {
if (txt == "Current Assets" || txt == "Current Liabilities" || txt == "Stockholders' Equity" || txt == "Revenue" || txt == "Operating Expenses" || txt == "Income from Continuing Operations" || txt == "Non-recurring Events" || txt == "Net Income") {
mainTitle = true;
} else {
if (columnCounter == 0) {
title = txt.split(' ').join('');
data[title] = {};
} else {
item.push(txt);
}
}
}
columnCounter++;
if (mainTitle) {
columnCounter = 0;
mainTitle = false;
}
if (columnCounter == 5) {
columnCounter = 0;
if (rowCounter != 0) {
data[title][0] = item[0];
data[title][1] = item[1];
data[title][2] = item[2];
data[title][3] = item[3];
item = [];
}
rowCounter++;
}
});
}
});
}
module.exports.run = run;
上面的控制台中的代码:
Server started!
Route called
Setup links..
After call in Promise: {}
After call in Promise: {}
------------
{}
Scrape website...
Scrape website...
因此,使用循环时,promise存在问题。
答案 0 :(得分:0)
我相信这就是您想要的(未经测试,只是被黑):
async function scrape(element) {
return new Promise( (resolve, reject ) => {
request(element, function (error, response, html) {
if( error ) return reject( error );
if (response.statusCode != 200) return reject( "Got HTTP code: " + response.statusCode);
console.log("Scrape website...");
var $ = cheerio.load(html);
var rowCounter = 0;
var columnCounter = 0;
var dates = [];
var item = [];
var mainTitle = false;
var title;
$('tr td').each(function(i, elem) {
var txt = $(elem).text().trim();
if (rowCounter == 0) {
if (columnCounter != 0) {
dates.push(txt.substring(txt.length - 4, txt.length));
}
} else {
if (txt == "Current Assets" || txt == "Current Liabilities" || txt == "Stockholders' Equity" || txt == "Revenue" || txt == "Operating Expenses" || txt == "Income from Continuing Operations" || txt == "Non-recurring Events" || txt == "Net Income") {
mainTitle = true;
} else {
if (columnCounter == 0) {
title = txt.split(' ').join('');
data[title] = {};
} else {
item.push(txt);
}
}
}
columnCounter++;
if (mainTitle) {
columnCounter = 0;
mainTitle = false;
}
if (columnCounter == 5) {
columnCounter = 0;
if (rowCounter != 0) {
data[title][0] = item[0];
data[title][1] = item[1];
data[title][2] = item[2];
data[title][3] = item[3];
item = [];
}
rowCounter++;
}
});
resolve();
});
} );
}
将代码包装在名为Promise
的{{1}}中,并使用resolve
处理错误-但您最了解如何处理错误。