基于这个response,有没有办法(比如使用casperjs / phantomjs)在page.evaluate()上下文中添加我们的自定义函数?
例如,包含一个带辅助函数x
的文件来调用Xpath函数:x('//a/@href')
答案 0 :(得分:5)
您可以在单独的page.evaluate()
函数中注册帮助函数。 page.exposeFunction()
看起来很诱人,但don't have access to browser context(你需要document
个对象)。
以下是使用$x()
注册帮助函数的示例:
const puppeteer = require('puppeteer');
const helperFunctions = () => {
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
await page.evaluate(helperFunctions);
const text = await page.evaluate(() => {
// $x() is now available
const featureArticle = $x('//*[@id="mp-tfa"]');
return featureArticle.textContent;
});
console.log(text);
await browser.close();
})();
(编辑 - 从文件添加帮助程序)
您还可以将帮助程序保存在单独的文件中,并通过page.addScriptTag()
将其注入浏览器上下文。
这是一个例子:
helperFunctions.js
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
并使用它:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
await page.addScriptTag({ path: './helperFunctions.js' });
const text = await page.evaluate(() => {
// $x() is now available
const featureArticle = $x('//*[@id="mp-tfa"]');
return featureArticle.textContent;
});
console.log(text);
await browser.close();
})();
答案 1 :(得分:0)
另一种基于casperjs getElementByXPath()
和getElementsByXPath()
的解决方案。优点是我们可以对特定节点使用xpath表达式(第二个参数)。
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
window.getElementByXPath = function getElementByXPath(expression, scope) {
scope = scope || document;
var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (a.snapshotLength > 0) {
return a.snapshotItem(0);
}
};
window.getElementsByXPath = function getElementsByXPath(expression, scope) {
scope = scope || document;
var nodes = [];
var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < a.snapshotLength; i++) {
nodes.push(a.snapshotItem(i));
}
return nodes;
};
现实代码示例:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://99bitcoins.com/bitcoin-rich-list-top100/#addresses', { waitUntil: 'networkidle2' });
await page.addScriptTag({ path: './helperFunctions.js' });
const result = await page.evaluate(() => {
var obj = {};
var data = getElementsByXPath('//table[@class="t99btc-rich-list"]//tr');
for (var i = 1; i<=100; i++) {
obj[i] = {
"hash": getElementByXPath('./td/a', data[i]).innerText,
"balance": getElementByXPath('./td[3]', data[i]).innerText
}
}
return obj;
});
console.log(JSON.stringify(result, null, 4));
await browser.close();
})();