我有这个基本的职位要求:
'use strict';
const puppeteer = require('puppeteer');
const request_client = require('request-promise-native');
(async () => {
// Create browser instance, and give it a first tab
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
// Allows you to intercept a request; must appear before
// your first page.goto()
await page.setRequestInterception(true);
// Request intercept handler... will be triggered with
// each page.goto() statement
page.on('request', interceptedRequest => {
// Here, is where you change the request method and
// add your post data
var data = {
'method': 'POST',
'postData': 'HasSell=true&ModelYears=2005&MaxModelYearHidden=2020&Brands=BMW&ModelNames=SERIES+3&TransmissionTypes=Otomatik&FuelTypes=Dizel&CarCases=Sedan&Versions=320D+AUTO&HorsePowers=150&MaxHorsePowerHidden=163&IsDontKnowHorsePowerHidden=0&Km=5.000&IsDontKnowKmHidden=0&OuterDemage-1=1-1&OuterDemage-2=2-1&OuterDemage-3=3-1&OuterDemage-4=4-1&OuterDemage-5=5-1&OuterDemage-6=6-1&OuterDemage-7=7-1&OuterDemage-8=8-1&OuterDemage-9=9-1&OuterDemage-10=10-1&OuterDemage-11=11-1&EquipmentCheckBoxSIS+FARI=SIS+FARI&EquipmentCheckBoxSUNROOF=SUNROOF&EquipmentCheckBoxYOL+BILGISAYARI=YOL+BILGISAYARI&progressValue=10',
headers: {
"Content-Type": "application/x-www-form-urlencoded"
}
};
// Request modified... finish sending!
interceptedRequest.continue(data);
});
// Navigate, trigger the intercept, and resolve the response
const response = await page.goto('https://www.ikinciyeni.com/fiyatlandirici');
const responseBody = await response.text();
console.log(responseBody);
// Close the browser - done!
//await browser.close();
})();
在上面的代码中,我已给出了将发布请求发送到的URL。它被重定向,并最终登陆登录页面。在这种情况下如何登录?如何提供登录凭据?
编辑: 我在最后添加了以下代码:
// Navigate, trigger the intercept, and resolve the response
const response = await page.goto('https://www.ikinciyeni.com/fiyatlandirici');
await page.waitForNavigation({
waitUntil: 'networkidle0',
});
await page.type('#EmailRetail', 'scott');
await page.type('#Password', 'tiger');
await page.click('#LoginSubmitBtn');
await page.waitForNavigation();
const responseBody = await response.text();
console.log(responseBody);
但是它抛出了这个异常:
UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
at async E:\code\generic_scrapper\post.js:39:2
-- ASYNC --
at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
at E:\code\generic_scrapper\post.js:39:13
at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:11400) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:11400) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
^C
E:\code\generic_scrapper>node post.js
(node:18352) UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
at async E:\code\generic_scrapper\post.js:39:2
-- ASYNC --
at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
at E:\code\generic_scrapper\post.js:39:13
at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:18352) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:18352) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
^C
E:\code\generic_scrapper>node post.js
(node:11528) UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
at async E:\code\generic_scrapper\post.js:39:2
-- ASYNC --
at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
at E:\code\generic_scrapper\post.js:39:13
at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:11528) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:11528) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
如何解决此问题?我的直觉是等到页面加载完毕,然后单击并输入信息。
Edit1:
我删除了waitForNavigation(),这是我得到的例外情况
UnhandledPromiseRejectionWarning: Error: Protocol error (Network.getResponseBody): No resource with given identifier found
at E:\code\generic_scrapper\node_modules\puppeteer\lib\Connection.js:152:63
at new Promise (<anonymous>)
at CDPSession.send (E:\code\generic_scrapper\node_modules\puppeteer\lib\Connection.js:151:16)
at E:\code\generic_scrapper\node_modules\puppeteer\lib\HTTPResponse.js:58:53
at processTicksAndRejections (internal/process/task_queues.js:97:5)
at async HTTPResponse.text (E:\code\generic_scrapper\node_modules\puppeteer\lib\HTTPResponse.js:67:25)
at async E:\code\generic_scrapper\post.js:45:26
-- ASYNC --
at HTTPResponse.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
at E:\code\generic_scrapper\post.js:45:41
at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:15292) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:15292) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
Edit2: 有或没有凭据,它都会登陆到该页面:https://www.ikinciyeni.com/giris但是,它应该登陆到这样的页面:https://www.ikinciyeni.com/konsinye-hesap-sonuc?tempId= ....
这可能是什么原因?可能是因为它没有发送凭据吗?
Edit3: 这是来自网站(Chrome)的请求
POST https://www.ikinciyeni.com/giris HTTP/1.1
Host: www.ikinciyeni.com
Connection: keep-alive
Content-Length: 296
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
Origin: https://www.ikinciyeni.com
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Referer: https://www.ikinciyeni.com/giris?ReturnUrl=/konsinye-hesap-sonuc&tempId=2a26816b-62ed-4603-bfec-473342f58de7
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9
Cookie: __gads=ID=b7b47e7d0a959312:T=1
__RequestVerificationToken=Amij.....
这就是Pupeteer的情况
POST https://www.ikinciyeni.com/giris HTTP/1.1
Host: www.ikinciyeni.com
Connection: keep-alive
Content-Length: 595
Pragma: no-cache
Cache-Control: no-cache
Content-Type: application/x-www-form-urlencoded
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Referer: https://www.ikinciyeni.com/giris?ReturnUrl=/konsinye-hesap-sonuc&tempId=a92c918b-4745-4c79-9cd4-cb31084468b3
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.0 Safari/537.36
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9
Cookie: ASP.NET_SessionId=mfy3sm5da1b1xdiro52dtqqn; Cookie_DimensionId=477094; NSC_xxx.jljodjzfoj.dpn_iuuqt=ffffffffaf1fc83045525d5f4f58455e445a4a423660; __RequestVerificationToken=-5v7ZraCIzfPQ4ViK6x1oEBjXzzxn9v0HV_q8lkdwCGiOOT9ChF7-N1Ya2tn3D1rbvIRdw1mw_BCPe50aluylMqS5qo1
HasSell=true&ModelYears=2005&MaxModelYearHidden=2020&Brands=BMW&ModelNames=SERIES+3&TransmissionTypes=Otomatik&FuelTypes=Dizel&CarCases=Sedan&Versions=320D+AUTO&HorsePowers=150&MaxHorsePowerHidden=163&IsDontKnowHorsePowerHidden=0&Km=5.000&IsDontKnowKmHidden=0&OuterDemage-1=1-1&OuterDemage-2=2-1&OuterDemage-3=3-1&OuterDemage-4=4-1&OuterDemage-5=5-1&OuterDemage-6=6-1&OuterDemage
在后一种情况下,我需要添加电子邮件地址和密码。
编辑: 浏览器本身生成的标头会被我在POST中提到的标头覆盖。有什么办法可以将它们添加到现有标头中,而不是覆盖它们?
答案 0 :(得分:0)
您首先需要拥有其网站的有效凭据。然后,您可以在脚本中添加一个条件:检查您是否在登录页面上,如果是,则像真实用户一样通过puppeteer键入凭据。通过单击输入字段来选择它们,然后使用api的keyboard.type
。 (您也可以使用keyboard.press()
在输入字段之间导航。)
如果您这样做,别忘了将凭据存储在环境变量中!
[...]
await page.goto('https://www.ikinciyeni.com/fiyatlandirici')
if ((await page.$('#EmailRetail')) !== null) {
// select and type the email
await page.click('#EmailRetail')
await page.keyboard.type('example@example.com')
// select and type the password
await page.click('#Password')
await page.keyboard.type('password0123')
// submit the form
await page.click('#LoginSubmitBtn')
}
const responseBody = await page.content()
console.log(responseBody)
[...]