我是puppeteer的新手(通常对javascript不太了解),并且正在尝试为以下内容编写一些基本功能:
我得到的错误是:
评估失败:DOMException:无法对“文档”执行“ querySelector”:“ 0”不是有效的选择器
这是我的代码:
我非常有信心所有代码都能正常工作,除了我可以用XPath单击正确的问题之外。我从中获得这些信息的网站是:
代码:
const records = await page.$x('//table[2]//tr[td[a]]//td[1]/a');
let int = 0;
for (let record in records) {
await Promise.all([
page.waitForNavigation(),
page.click(record)
]);
await Promise.all([makeDirectory('screenshots/item'+int), makeDirectory('screenshots/item'+int+'/base'), makeDirectory('screenshots/item'+int+'/record')]);
let recordPath = "screenshots/item"+int+"/record/record.html";
let basePath = "screenshots/item"+int+"/base/base.html";
page.screenshot({path: "screenshots/item"+int+"/record/record.png", fullPage: true});
let recordBody = await page.evaluate(() => document.body.innerHTML);
await saveHtml(recordPath, recordBody);
await Promise.all([
page.waitForNavigation(),
page.goBack()
]);
await page.screenshot({path: "screenshots/item"+int+"/base/base.png", fullPage: true});
let baseBody = await page.evaluate(() => document.body.innerHTML);
await saveHtml(basePath, baseBody);
int++;
console.log(record);
}
async function makeDirectory(path) {
mkdirp(path, function(err) {
if (err) throw err;
});
};
async function saveHtml(path, html) {
await fs.writeFile(path, html, (err) => {
if (err) throw err;
});
};
注意:我需要使用XPath:(
更新了6/25/18 现在,这给了我来自xpath选择器的所有链接。然后我进行迭代,然后仅使用page.goto转到正确的站点。
const linksXPath = '//table[2]//tr[td[a]]//td[1]/a';
const links = await page.evaluate((selector) => {
let results = [];
let query = document.evaluate(selector,
document,
null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (let i=0, length=query.snapshotLength; i<length; ++i) {
results.push(query.snapshotItem(i).href);
}
return results;
}, linksXPath);
答案 0 :(得分:0)
我认为这是您的选择器。
我相信您的表格选择器应为:
"body > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(1) > td > table.bodytext > tbody"
获取页面正确选择器的最简单方法是使用Chrome开发工具。
检查页面,然后转到“元素”选项卡。从那里,您应该看到所有HTML元素。右键单击您想要的那个(我去过<tbody>
,因此您可以遍历<tr>
元素。),然后选择copy>复制选择器。
答案 1 :(得分:0)
我的代码现在正在执行所需的操作,但是我希望有一种更简单的方法可以执行此操作。此外,当我遍历链接时,您会看到我正在使用page.goto函数去那里。我仍然不知道使用page.click的方法。我将不得不使用xpath来获取所有td,然后单击它们,但我始终无法使它工作。这是有效的产品:
const puppeteer = require('puppeteer');
const fs = require('fs');
const mkdirp = require('mkdirp');
async function run() {
const pageToClick = 'body > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > form > table > tbody > tr:nth-child(3) > td > div > input[type="submit"]';
const select = 'body > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > form > table > tbody > tr:nth-child(1) > td:nth-child(2) > select';
const inputField = 'body > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > form > table > tbody > tr:nth-child(2) > td:nth-child(2) > input[type="text"]:nth-child(1)';
const linksXPath = '//table[2]//tr[td[a]]//td[1]/a';
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto('https://hrlb.oregon.gov/bspa/licenseelookup/');
await page.select(select, 'lastname');
await page.focus(inputField);
await page.keyboard.type('a');
await Promise.all([
page.waitForNavigation(),
page.click(pageToClick)
]);
const links = await page.evaluate((selector) => {
let results = [];
let query = document.evaluate(selector,
document,
null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (let i=0, length=query.snapshotLength; i<length; ++i) {
results.push(query.snapshotItem(i).href);
}
return results;
}, linksXPath);
const basePic = await page.screenshot({fullPage: true});
let baseBody = await page.evaluate(() => document.body.innerHTML);
let int = 0;
for (i = 0; i < links.length; i++) {
await Promise.all([
page.waitForNavigation(),
page.goto(links[i])
]);
await Promise.all([makeDirectory('screenshots/item'+int), makeDirectory('screenshots/item'+int+'/base'), makeDirectory('screenshots/item'+int+'/record')]);
let recordPath = "screenshots/item"+int+"/record/record.html";
let basePath = "screenshots/item"+int+"/base/base.html";
let basePicPath = "screenshots/item"+int+"/base/base.png";
await page.screenshot({path: "screenshots/item"+int+"/record/record.png", fullPage: true});
let recordBody = await page.evaluate(() => document.body.innerHTML);
await saveFile(recordPath, recordBody);
await Promise.all([
page.waitForNavigation(),
page.goBack()
]);
await saveFile(basePath, baseBody);
await saveFile(basePicPath, basePic);
int++;
}
await page.close();
await browser.close();
}
async function makeDirectory(path) {
mkdirp(path, function(err) {
if (err) throw err;
});
};
async function saveFile(path, html) {
await fs.writeFile(path, html, (err) => {
if (err) throw err;
});
};
run();