puppeteer:如何调用动作等待page.goto(`$ {url}`);在.map((index,element)=> {

时间:2019-12-11 11:17:03

标签: puppeteer

我正在从网页上抓取数据,分页也有效。 我有一张桌子。 对于每个tr,都有位置,标题和URL。

在函数.map((index,element)=> {我想为每个tr调用“ async url => {await page.goto(${url});”转到详细信息,从详细信息中抓取数据页面并返回列表。

我该怎么做? (在.map()中调用异步)

      const puppeteer = require("puppeteer");
      const cheerio = require("cheerio");


      async function scrapeListings(page) {
     await page.goto(
       "https://www.example.com/podcast-detail/nth28-2ef41/99%25-Invisible- 
      Podcast",
        { waitUntil: ["load", "domcontentloaded", "networkidle0"] }
    );

     let moreResults = true;

     while (moreResults) {
     console.log(moreResults);
      try {
      const html = await page.content();
      const $ = cheerio.load(html);
      const listings = $("#yw0 > table > tbody > tr")
        .map((index, element) => {
          const position = $(element)
            .find("td.id.tc")
            .text();
          const title = $(element)
            .find("a.title.listen-now")
            .text();
          const url = $(element)
            .find("a")
            .attr("href");
          const datetime = $(element)
            .find("span.datetime")
            .text();
          async url => {
            await page.goto(`${url}`);
            //Open URL for details
            //SCRAPE Details
            //return value and store to listing
          };
          return { position, title, datetime, url };
        })
        .get();

      await sleep(1000);
      element = await page.$x(`//a[contains(text(),'→')]`);
      await element[0].click();
      console.log(listings);
      //await page.waitForNavigation();
    } catch (error) {
      moreResults = false;
      }
      }
     }

     async function sleep(miliseconds) {
     return new Promise(resolve => setTimeout(resolve, miliseconds));
      }

    async function main() {
     //await connectToMongoDb();

    const browser = await puppeteer.launch({ headless: false });
    const page = await browser.newPage();

     await page.setViewport({ width: 1920, height: 1057 });

     listings = await scrapeListings(page);

    console.log(listings);
     //await browser.close();
   }

    main();

2 个答案:

答案 0 :(得分:0)

我尝试将您的脚本重新编码为仅使用Puppeteer。无需加油。

spring.transaction.default-timeout

答案 1 :(得分:0)

如果有人需要单击链接并打开新页面而不是调用url,则此代码为: 也许有些人有更好的解决方案,可以自由分享,谢谢

const getPodcast = async url => {
    const pageTarget = page.target();
    await page.click(".table > .items > tr > td > .title");
    const newTarget = await browser.waitForTarget(
    target => target.opener() === pageTarget
    );
    const pagePod = await newTarget.page();
    console.log("current page count ", (await browser.pages()).length);
    //const pagePod = await browser.newPage();
    //const openPod = await pagePod.goto(url, {  waitUntil: `networkidle0`,   
    timeout: 0 });