我知道问题出在异步等待中。基本上在下面的代码中,行80在行76之前运行,因此返回空值。我该如何纠正?
我基本上是在使用cheerio及其它构建的刮板,当从刮板加载数据需要时间时,就会发生此问题。
https://github.com/galdiatorocks/scraper/blob/master/server.js
const request = require('request');
const cheerio = require('cheerio');
const mongoose = require("mongoose");
mongoose.connect(process.env.DB, {
useNewUrlParser: true,
useFindAndModify: false,
useCreateIndex: true,
useUnifiedTopology: true
});
mongoose.Promise = global.Promise;
let db = mongoose.connection;
db.on("error", console.error.bind(console, "MongoDB connection error:"));
scrapeItems();
function scrapeItems(){
const LeaderModel = require("./leader_model");
LeaderModel.find({}, (err,doc) => {
if(err) {console.error(err)};
doc.forEach(async (data) => {
data.booksReco = await readPage(data.storyLink, (error, dat) => {
if(err) console.error(error);
console.log(dat);
});
//console.log(data.booksReco);
})
})
}
function readPage(URL){
request(URL, async function(err, response, body){
if (err) console.error(err);
//console.log(err, response, body);
let $ = cheerio.load(body);
let booksReco = [];
await $('#page tr').each(async (roll, data) => {
let bookdetail = {};
const bookImgPath = await $(data) .find('.kniga')
.children('a')
.children('img')
.attr('src');
if(bookImgPath){
bookdetail.bookImgPath = bookImgPath;
const detail = $(data).find('.opisanie')
bookdetail.bookName = detail.children('a').text().split(/\sby\s/);
bookdetail.bookAuthor = bookdetail.bookName[1];
bookdetail.bookName = bookdetail.bookName[0];
bookdetail.amazonLink = detail.children('a').attr('href');
bookdetail.leaderComment = detail.children('.quote2').text();
if(!bookdetail.leaderComment){
bookdetail.leaderCommentImg = detail.children('.quote2').children('img').attr('src');
bookdetail.whereRecommended = detail.children('.pho').text();
} else {
bookdetail.whereRecommended = detail.children('.recom').text();
}
bookdetail.bookDesc = detail.children('.description').text();
console.log(bookdetail.bookName, bookdetail.bookAuthor);
booksReco.push(bookdetail);
console.log(booksReco);
}
});
console.log(booksReco);
return(booksReco);
})
}
答案 0 :(得分:0)
Cheerio的.each()
同步运行,无需使用async/await
。您可以轻松做到:
$('#page tr').each((roll, data) => {
let bookdetail = {};
const bookImgPath = $(data).find('.kniga')
.children('a')
.children('img')
.attr('src');
...
});
做request(URL, async function(err, response, body){
也行不通,因为这里没有返回您可以等待的承诺。您可以通过将请求包装在Promise中来解决此问题:
function readPage(URL){
return new Promise((resolve,reject) => {
request(URL, (err, response, body) => {
if(err) reject(err);
// do your cheerio logic here and resolve the result
resolve(booksReco);
});
});
答案 1 :(得分:0)
您应该使用map
而不是each
:
let booksReco = $('#page tr').map((roll, data) => {
let bookdetail = {};
// more stuff
return bookdetail
}).get()
console.log(booksReco);
答案 2 :(得分:0)
所以调用函数readPage时必须使用async / await / promise。
最后使用它来解决问题
if (roll == elements.length - 1) {
resolve(booksReco);
}
感谢大家的帮助:)