当我尝试使用puppeteer抓取某些页面时,我被chegg.com阻止。有什么办法可以解决这个问题?也许在铬中使用隐身浏览器?
当我尝试登录并阻止我登录时会出现错误。
我还记得阅读有关标题的信息或删除标题以解决问题吗?
我只打算每分钟大约刮一次该页面,也许一个小时刮一次。不超级频繁。
const puppeteer = require("puppeteer");
const CREDS = require("./creds");
var SlackBot = require("slackbots");
var channel = "testing";
var tryAgain =
"Try again, nerd. Post a plain text chegg link Ex. https://www.chegg.com/homework-help/questions-and-answers/assume-unc...";
var tryAgainTwo =
"Try again, nerd. Post a plain text chegg link Ex. <https://www.chegg.com/homework-help/questions-and-answers/assume-unc>...";
var imageName = "workplease2";
var bot = new SlackBot({
token: "--SNIP--",
name: "cheggy"
});
// bot.on("start", function() {
// bot.postMessageToChannel(channel, "Hello world!");
// });
bot.on("message", function(data) {
if (data.type !== "message") {
return;
}
handleMessage(data.text);
});
function handleMessage(message) {
if (
message.includes("chegg.com/") &&
message.includes("Try again, nerd.") === false
) {
bot.postMessageToChannel(
channel,
"Give me a sec to find that for you. XOXO"
);
run();
} else {
if (
message === tryAgainTwo ||
message.includes("sec to find") ||
message.includes(imageName)
) {
return;
}
bot.postMessageToChannel(channel, tryAgain);
console.log("XXXXXXXXXXXXXXXXXXXXthis is the message!" + message);
return;
}
}
function sendGreeting() {
var greeting = getGreeting();
bot.postMessageToChannel(channel, greeting);
}
function getGreeting() {
var greetings = [
"hello!",
"hi there!",
"cheerio!",
"how do you do!",
"¡hola!"
];
return greetings[Math.floor(Math.random() * greetings.length)];
}
async function timeout(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function run() {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto("https://www.chegg.com/auth?action=login&reset_password=0");
const USERNAME_SELECTOR = "#emailForSignIn";
const PASSWORD_SELECTOR = "#passwordForSignIn";
//const BUTTON_SELECTOR = '#eggshell-19 > form > div > div > div > footer > button';
await page.click(USERNAME_SELECTOR);
await page.keyboard.type(CREDS.email);
await page.click(PASSWORD_SELECTOR);
await page.keyboard.type(CREDS.password);
//await page.click(BUTTON_SELECTOR);
await page.keyboard.press(String.fromCharCode(13));
await page.waitForNavigation();
await page.goto(
"https://www.chegg.com/homework-help/questions-and-answers/assume-uncle-holds-one-stock-east-coast-bank-ecb-thinks-little-risk-agree-stock-relatively-q9069609"
);
await timeout(1000);
await page.screenshot({ path: "./myfolder/" + imageName, fullPage: true });
browser.close();
console.log("do you reach me?");
var fs = require("fs");
var request = require("request");
var SLACK_TOKEN = "--SNIP--";
var SLACK_CHANNEL = "general";
var filepath = "./myfolder/workplease2.png";
var options = {
method: "POST",
url: "https://slack.com/api/files.upload",
headers: { "cache-control": "no-cache" },
formData: {
token: SLACK_TOKEN,
channels: SLACK_CHANNEL,
file: fs.createReadStream(filepath)
}
};
request(options, function(error, response, body) {
if (error) throw new Error(error);
console.log(body);
});
}
答案 0 :(得分:0)
保存cookie,设置浏览器并降低伪造者的速度,使其达到人类的速度。
const browser = await puppeteer.launch({
headless: false,
devtools: true,
slowMo: 250
userDataDir: 'C:\\userData' // userDataDir <string> Path to a User Data Directory.
});
const page = await browser.pages();
await page[0].setUserAgent('5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36');