如何使用chrome-remote-interface获取iframe内容?

时间:2017-08-10 15:52:41

标签: javascript html google-chrome iframe chrome-remote-debugging

我正在构建一个抓取工具但是我需要抓取iframe内容chrome-remote-interface没有转储iframe内容是否有任何办法。

代码

     CDP.New({'url':url},(err,target) => {
                if(!err){
                    CDP({target},(client) => {
                        const {Network, Page, Runtime} = client;
                        Network.setUserAgentOverride({'userAgent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36'});
                        Network.enable();
                        Page.enable();
                        Runtime.enable();
                        Page.navigate({url});
                            Page.loadEventFired(() => {
                                Runtime.evaluate({
//I have no idea what to do ..
                                    expression:'document.documentElement',
                                    returnValue:true})
                                    .then(({result}) => {
                                        cb(null,{html:result.value})
                                        CDP.Close({id:target.id})
                                    })
                            })


                    })

2 个答案:

答案 0 :(得分:1)

我认为这是禁止的,你可以做的是使用DOM.getDocument来获取整个树,你不能直接将它转换为HTML字符串:

const {root} = await DOM.getDocument({depth: -1, pierce: true});

答案 1 :(得分:0)

使用Chrome启动器

https://www.npmjs.com/package/chrome-launcher

如果你使用chromeLauncher并传入'--disable-web-security'标志,这实际上很容易做到。以下是如何进行设置的示例。

const chromeLauncher = require('chrome-launcher');
const CDP = require('chrome-remote-interface');


let launchChrome = () => {
  console.log('launchChrome..');
  return chromeLauncher.launch({
    chromeFlags: [
      '--disable-web-security', // Query within iframes
    ],
    logLevel: 'error'
  }).catch(function(e) {
    console.log('Error launching chrome: ' + e);
  });
}

let initChrome = async () => {
  console.log('initChrome..');
  const chrome = await launchChrome();
  const protocol = await CDP({port: chrome.port});

  const {Page, Runtime, Network} = protocol;
  const userAgent = 'Mozilla/5.0 (X11; Linux x86_64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36';
  await Promise.all([Page.enable(), Runtime.enable(), Network.setUserAgentOverride({userAgent})]);

  return {chrome: chrome, protocol: protocol, Page: Page, Runtime: Runtime}
}

let run = async () => {

  let {chrome, protocol, Page, Runtime} = await initChrome();

  try {

    await Page.navigate({url: 'https://www.example.com/'});
    await Page.loadEventFired();

    //////////////////////////////////
    // YOU CAN NOW QUERY IN IFRAMES //
    console.log(await Runtime.evaluate({expression: `document.querySelector('iframe')`, returnByValue: true}));
    //////////////////////////////////

    console.log('..Finished');
  } catch (err) {
    console.log(err);
  }

  protocol.close();
  chrome.kill();
}