对使用puppeteer还是陌生的,我想抓取某个网站的某些数据,但是我收到这些警告,并且控制台上没有显示任何数据。
为什么会收到以下警告,以及如何消除它们?
这是我使用的代码:
const puppeteer = require("puppeteer");
(async () => {
// prepare for headless chrome
const browser = await puppeteer.launch();
const page = await browser.newPage();
// set user agent (override the default headless User Agent)
await page.setUserAgent(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"
);
// go to website home page
await page.goto("https://www.nytimes.com/");
await page.waitForSelector("body");
// get the User Agent on the context of Puppeteer
const userAgent = await page.evaluate(() => navigator.userAgent);
var rposts = await page.evaluate(() => {
postItems = [];
let posts = document.body.querySelectorAll(".assetWrapper");
posts.forEach((item) => {
try {
title = item.querySelector("h2").innerText;
link = item.querySelector("a").href;
summary = item.querySelector("p").innerText;
postItems.push({ title: title, link: link, summary: summary });
} catch (e) {}
});
var items = {
posts: postItems,
};
return items;
});
// If everything correct then no 'HeadlessChrome' sub string on userAgent
console.log(userAgent);
console.log(rposts);
await browser.close();
})();
这里是越来越错误: (节点:4072)UnhandledPromiseRejectionWarning:ReferenceError:未定义浏览器 (节点:15452)UnhandledPromiseRejectionWarning:ReferenceError:初始化前无法访问“页面”
答案 0 :(得分:2)
创建无头浏览器,然后就可以使用这些代码。
#include <stdio.h>
void get_point(FILE* in)
{
int i;
float x, y;
fscanf(in, "%d %f %f", &i, &x, &y);
}
void get_rectangle(FILE* in)
{
int i;
float a, b, c, d;
fscanf(in, "%d %f %f %f %f", &i, &a, &b, &c, &d);
}
int main()
{
FILE* in = stdin; // you can replace stdin with fopen to read from a file
while (1)
{
switch (getc(in))
{
case 'P': case 'p': // follows a point
get_point(in);
break;
case 'R': case 'r': // follows a rectangle
get_rectangle(in);
break;
case 'Q': case 'q': // quit
return 0;
default: // unknown
continue;
}
}
}
此致