木偶:setDefaultNavigationTimeout设置为0仍然超时

时间:2019-03-12 01:11:24

标签: javascript puppeteer

我每次运行此脚本都超时。 setDefaultNavigationTimeout真的可以防止超时吗?

我正在浏览大约26个URL,每个页面上都有大量图像。无法想象Puppeteer不能仅仅因为图像重而无法处理这些页面?

const url = 'test.com';
const jsonReturn = [];


async function runScraper() {

  const browser = await puppeteer.launch(prodConfig);

  const page = await browser.newPage({
    timeout: 0
  });

  page.setDefaultNavigationTimeout(0);
  await page.goto(url, { waitUntil: 'domcontentloaded' });
  await page.waitForSelector('.featured-shows-featured-show');

  let featuredShowsURLs = await page.$$eval('.featured-shows-featured-show > a', (links) => {
    return links.map(link => {
      return link.href;
    });
  });


  featuredShowsURLs = _.uniq(featuredShowsURLs)

  for (const featuredShowsURL of featuredShowsURLs) {
    const page = await browser.newPage({
      timeout: 0
    });
    try {
      await page.goto(featuredShowsURL);
      await page.waitForSelector('.show-title');
    } catch (e) {
      featuredShowsURL;
      debugger;
    }

    const showTitle = await findAndReturnSelectorText('.show-title', page);
    const showDates = await findAndReturnSelectorText('.show-dates', page);
    const showLocation = await findAndReturnSelectorText('.show-location', page);
    const showGallery = await findAndReturnSelectorText('.entity-link', page);
    const showDetail = await findAndReturnSelectorText('.show-press-release', page);

    const newItem = {
      showTitle,
      showDates,
      showLocation,
      showGallery,
      showDetail,
    };

    const id = hash(newItem);

    jsonReturn.push({
      ...newItem,
      id
    });
  }

  await browser.close();
}

runScraper();

0 个答案:

没有答案