HTML看起来像这样:
const htmlStr = `
<div>
<div>
<h1>title1</h1>
<div>
<a>click me<a>
</div>
</div>
<div>
<h1>title2</h1>
<div>
<a>click me<a>
</div>
</div>
<div>
<h1>title3</h1>
<div>
<a>click me<a>
</div>
</div>
</div>
`
我要单击第一个click me
。
使用cypress,我可以执行以下操作:
cy.contains('div', 'title1').within(() => {
cy.get('a').click()
})
在此示例中,有很多方法可以执行此操作。但想法是find the nearest <div> who contains text 'title1'. And start from there, find <a> inside it
。
在Puppeteer中,我想要以下内容:
const element = await page.elementContains('div', 'title1') // <- narrow down
await element.click('a')
如何实现elementContains()
函数,有什么想法吗?谢谢!
-----更新-----
为了更加清楚,使用elementContains()
可以:
const element1 = await page.elementContains('div', 'title1')
await element1.click('a') // click first `click me`
const element2 = await page.elementContains('div', 'title2')
await element2.click('a') // click second `click me`
const element3 = await page.elementContains('div', 'title3')
await element3.click('a') // click third `click me`
答案 0 :(得分:2)
如果我理解正确,它们是XPath和选择器等效项(https://example.org/恰好具有类似的DOM结构):
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/');
const [elemByXPath] = await page.$x('//div[h1[contains(., "Example Domain")]]//a');
const elemBySelector = await page.evaluateHandle(
() => [...document.querySelectorAll('div')]
.find(
div => [...div.querySelectorAll('h1')]
.some(h1 => h1.innerText.includes('Example Domain'))
)
.querySelector('a')
);
console.log(elemByXPath.toString());
console.log(elemBySelector.toString());
await browser.close();
} catch (err) {
console.error(err);
}
})();
答案 1 :(得分:2)
您可以使用Page
轻松地为prototype
添加额外的功能。并使用page.elementHandle获取特定元素。
page.evaluate
和page.evaluateHandle
之间的唯一区别是page.evaluateHandle
返回页内对象(JSHandle
)。
Puppeteer模块将导出此类。您可以根据需要扩展其功能。
// extract the Page class
const { Page } = require("puppeteer/lib/Page");
通常,您创建的page
将成为原型内的this
。 page.evaluateHandle
将成为this.evaluateHandle
。
/**
* @name elementContains
* @param {String} selector specific selector globally search and match
* @param {String} text filter the elements with the specified text
* @returns {Promise} elementHandle
*/
Page.prototype.elementContains = function elementContains(...args) {
return this.evaluateHandle((selector, text) => {
// get all selectors for this specific selector
const elements = [...document.querySelectorAll(selector)];
// find element by text
const results = elements.filter(element => element.innerText.includes(text));
// get the last element because that's how querySelectorAll serializes the result
return results[results.length-1];
}, ...args);
};
现在我们有了很棒的elementContains
,是时候获取get
函数了。
/**
* Replicate the .get function
* gets an element from the executionContext
* @param {String} selector
* @returns {Promise}
*/
const { JSHandle } = require("puppeteer/lib/JSHandle");
JSHandle.prototype.get = function get(selector) {
// get the context and evaluate inside
return this._context.evaluateHandle(
(element, selector) => {
return element.querySelector(selector);
},
// pass the JSHandle which is itself
this,
selector
);
};
(async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.setContent(html); // your specified html text
// get the element
const elem = await page.elementContains('div', 'title1')
// use it like any other normal element, click it, eval it, remove it etc.
const content = await elem.$eval('h1', e=>e.innerText);
console.log(content) // prints "title1"
// OR use the built in click function
const btn = await page.$('a', elem); // <-- pass the handle here
await btn.click();
// OR use our .get function to get another element
const targetBtn = await elem.get('a');
targetBtn.click(); // click it
})();