如何在伪造者page.evaluate

时间:2019-02-24 18:18:56

标签: javascript node.js url puppeteer

我已经尝试了Error: Evaluation Failed: ReferenceError: util is not definedHow to pass required module object to puppeteer page.evaluate中提到的所有内容。具体来说,我尝试使用browserify转换url.js(也尝试将url.js和punycode.js一起转换),并且已在页面环境中添加了相应的脚本(bundle.js)。

我试图在puppeteer的page.evaluate()内部使用url模块。这是显示错误的非常简单的示例:

const puppeteer = require('puppeteer');

puppeteer.launch({dumpio: true}).then(async browser => {
  const page = await browser.newPage();
  const response = await page.goto('https://www.google.com');
  await page.waitFor(5000);
  const pageUrl = page.url();
  await page.addScriptTag({path: 'bundle.js'});
  await page.evaluate(pageUrl => {
    const anchors = Array.from(document.querySelectorAll('a'));
    for (let anchor of anchors) {
      const href = anchor.getAttribute('href');
      let hrefUrl;
      try {
        hrefUrl = new URL(href);
      } catch (e) {
        hrefUrl = new URL(href, pageUrl);
      }
      console.log(url.format(hrefUrl, {fragment: false}));
    }
  }, pageUrl);
  await page.close();
  await browser.close();
});

此示例生成以下错误:

  

(节点:23667)UnhandledPromiseRejectionWarning:错误:评估   失败:ReferenceError:URL未定义       在pageUrl( puppeteer_evaluation_script :11:19)       在ExecutionContext.evaluateHandle(/home/webb/node_modules/puppeteer/lib/ExecutionContext.js:97:13)       在       在process._tickCallback(internal / process / next_tick.js:188:7)

我还需要做些什么才能使url模块被识别?

2 个答案:

答案 0 :(得分:2)

带有page.exposeFunction()的变体形式:

'use strict';

const url = require('url');
const puppeteer = require('puppeteer');

puppeteer.launch({ dumpio: true }).then(async browser => {
  const page = await browser.newPage();
  await page.exposeFunction('formatURL', formatURL);

  const response = await page.goto('https://www.google.com');
  await page.waitFor(5000);
  const pageUrl = page.url();

  await page.evaluate(async (pageUrl) => {
    const anchors = Array.from(document.querySelectorAll('a'));
    for (const anchor of anchors) {
      const href = anchor.getAttribute('href');
      const hrefUrl = await formatURL(href, pageUrl);
      console.log(hrefUrl);
    }
  }, pageUrl);

  await page.close();
  await browser.close();
});

function formatURL(href, base) {
  try {
    return url.format(new URL(href), { fragment: false });
  } catch (e) {
    return url.format(new URL(href, base), { fragment: false });
  }
}

答案 1 :(得分:1)

使用url公开page.exposeFunction包中的所有功能。

遍历模块的导出并添加每个函数以公开

var url = require('url');

var functionsToExpose = [];
for(let key of Object.keys(url)){
    if(typeof url[key] == 'function'){
        functionsToExpose.push({name: 'url'+key, func: url[key]});
    }
}

将其暴露在页面上

for(let item of functionsToExpose){
    await page.exposeFunction(item.name, item.func);
}

url包的每个功能都将被重命名。 url.parse可通过urlparse访问。