在nodejs上异步等待的小问题

时间:2020-06-09 18:05:30

标签: node.js asynchronous async-await cheerio

我知道问题出在异步等待中。基本上在下面的代码中,行80在行76之前运行,因此返回空值。我该如何纠正?

我基本上是在使用cheerio及其它构建的刮板,当从刮板加载数据需要时间时,就会发生此问题。

https://github.com/galdiatorocks/scraper/blob/master/server.js

const request = require('request');
const cheerio = require('cheerio');
const mongoose = require("mongoose");

mongoose.connect(process.env.DB, {
    useNewUrlParser: true,
    useFindAndModify: false,
    useCreateIndex: true,
    useUnifiedTopology: true
  });
  mongoose.Promise = global.Promise;
  let db = mongoose.connection;
  db.on("error", console.error.bind(console, "MongoDB connection error:"));


scrapeItems();

function scrapeItems(){

    const LeaderModel = require("./leader_model");

    LeaderModel.find({}, (err,doc) => {
        if(err) {console.error(err)};

        doc.forEach(async (data) => {

            data.booksReco = await readPage(data.storyLink, (error, dat) => {
                if(err)  console.error(error);
                console.log(dat);
            });
            //console.log(data.booksReco);
        })
    })


}

function readPage(URL){
    request(URL, async function(err, response, body){

        if (err) console.error(err);

        //console.log(err, response, body);
        let $ = cheerio.load(body);
        let booksReco = [];


        await $('#page tr').each(async (roll, data) => {

            let bookdetail = {};
            const bookImgPath = await $(data)  .find('.kniga')
                                                    .children('a')
                                                    .children('img')
                                                    .attr('src');
            if(bookImgPath){
                bookdetail.bookImgPath = bookImgPath;

                const detail = $(data).find('.opisanie')

                bookdetail.bookName = detail.children('a').text().split(/\sby\s/);
                bookdetail.bookAuthor = bookdetail.bookName[1];
                bookdetail.bookName = bookdetail.bookName[0];
                bookdetail.amazonLink = detail.children('a').attr('href');
                bookdetail.leaderComment = detail.children('.quote2').text();

                if(!bookdetail.leaderComment){
                    bookdetail.leaderCommentImg = detail.children('.quote2').children('img').attr('src');
                    bookdetail.whereRecommended = detail.children('.pho').text();
                } else {
                    bookdetail.whereRecommended = detail.children('.recom').text();
                }

                bookdetail.bookDesc = detail.children('.description').text();

                console.log(bookdetail.bookName, bookdetail.bookAuthor);
                booksReco.push(bookdetail);
                console.log(booksReco);
            }
        });
        console.log(booksReco);
        return(booksReco);

    })
}

3 个答案:

答案 0 :(得分:0)

Cheerio的.each()同步运行,无需使用async/await。您可以轻松做到:

$('#page tr').each((roll, data) => {

    let bookdetail = {};
    const bookImgPath = $(data).find('.kniga')
        .children('a')
        .children('img')
        .attr('src');
...
});

request(URL, async function(err, response, body){也行不通,因为这里没有返回您可以等待的承诺。您可以通过将请求包装在Promise中来解决此问题:

function readPage(URL){
    return new Promise((resolve,reject) => {
        request(URL, (err, response, body) => {
            if(err) reject(err);
            // do your cheerio logic here and resolve the result
            resolve(booksReco);
        });
});

答案 1 :(得分:0)

您应该使用map而不是each

let booksReco = $('#page tr').map((roll, data) => {
  let bookdetail = {};
  // more stuff
  return bookdetail
}).get()
console.log(booksReco);

答案 2 :(得分:0)

所以调用函数readPage时必须使用async / await / promise。

最后使用它来解决问题


    if (roll == elements.length - 1) {
                 resolve(booksReco);
                }

感谢大家的帮助:)