puppeteer在点击事件后识别元素内容

时间:2018-06-07 00:44:24

标签: selenium-chromedriver puppeteer

我正在尝试在输入查询并单击按钮后从页面中提取特定元素。该页面不会导航到新的URL:它只返回我需要提取的新HTML内容。

这描述了我有多远:

const puppeteer = require('puppeteer');

function timeout(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
};

const input_val = 'some query text';

(async() => {
    const browser = await puppeteer.launch()
    const page = await browser.newPage()
    await page.goto('http://target.com', { waitUntil: 'networkidle2' })
    await page.waitFor('input[name=query]')

    await page.evaluate((input_val) => {
      document.querySelector('input[name=query]').value = input_val;
      document.querySelector('.Button').click();
    }, input_val)

    // Now I want to console.log the <strong> tag fields 
    // innerText (will be 0-3 matching elements).
    // The lines below describe in non-puppeteer what 
    // I need to do. But this has no effect.

    const strongs = await page.$$('strong')
    for(var i=0; i<strongs.length; i++) {
      console.log(strongs[i].innerText);
    }

    await timeout(2000)
    await page.screenshot({path: 'example.png'}) // this renders results page ok

    browser.close();
})();

因此输入查询输入正确,点击按钮即可触发,屏幕截图显示网页已按预期响应。我无法弄清楚如何提取和报告相关位。

我一直试图绕过整个异步/等待范例,但我还是很陌生。非常感谢。

编辑 - Vaviloff方法错误:

(node:67405) UnhandledPromiseRejectionWarning: Error: Protocol error (Runtime.callFunctionOn): Cannot find context with specified id undefined
    at Promise (/Users/user/node_modules/puppeteer/lib/Connection.js:200:56)
    at new Promise (<anonymous>)
    at CDPSession.send (/Users/user/node_modules/puppeteer/lib/Connection.js:199:12)
    at ExecutionContext.evaluateHandle (/Users/user/node_modules/puppeteer/lib/ExecutionContext.js:79:75)
    at ExecutionContext.evaluate (/Users/user/node_modules/puppeteer/lib/ExecutionContext.js:46:31)
    at Frame.evaluate (/Users/user/node_modules/puppeteer/lib/FrameManager.js:326:20)
    at <anonymous>
    at process._tickCallback (internal/process/next_tick.js:160:7)
(node:67405) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:67405) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

1 个答案:

答案 0 :(得分:1)

有一个有用的辅助工具page.$$eval

  

此方法在页面中运行Array.from(document.querySelectorAll(selector))并将其作为第一个参数传递给pageFunction。

由于它将Array传递给求值函数,我们可以在其上使用.map()来提取所需属性:

const strongs = await page.$$eval('strong', items => items.map( item => item.innerText));

<强>更新 这是一个完整的测试工作脚本:

const puppeteer = require('puppeteer');

const input_val = '[puppeteer]';
const items_selector = '.question-hyperlink';

(async() => {

    const browser = await puppeteer.launch({
        headless: false,
    })
    const page = await browser.newPage()

    await page.goto('https://stackoverflow.com/', { waitUntil: 'networkidle2' })
    await page.waitFor('input[name=q]')
    await page.type('input[name=q]', input_val + '\r');
    await page.waitForNavigation();

    const items = await page.$$eval(items_selector, items => items.map( item => item.innerText));

    console.log(items);

    await browser.close();
})();

更新2
https://diplodata.shinyapps.io/puppeteer-test/

处沙箱脚本的修改版本
const puppeteer = require('puppeteer');
const input_val = 'puppeteer';
const timeout = (ms => new Promise( resolve => setTimeout(resolve, ms) ));

(async() => {

    const browser = await puppeteer.launch({
        headless: false,
    })
    const page = await browser.newPage()

    await page.goto('https://diplodata.shinyapps.io/puppeteer-test/', { waitUntil: 'networkidle2' })
    await page.waitFor('#query')
    await page.type('#query', input_val);
    await page.click('#go');
    await timeout(500);
    const items = await page.$$eval('strong', items => items.map( item => item.innerText));

    console.log(items);

    await browser.close();
})();

产生以下结果:

  

[&#39;点击下面的内容应为:&#39;,&#39;&lt; query&gt;&#39;,&#39; puppeteer&#39; ]