在重定向请求的情况下如何登录

时间:2020-06-11 14:05:28

标签: node.js http http-post puppeteer

我有这个基本的职位要求:

 'use strict';

const puppeteer = require('puppeteer');
const request_client = require('request-promise-native');

(async () => {

  // Create browser instance, and give it a first tab
    const browser = await puppeteer.launch({headless: false});
    const page = await browser.newPage();

    // Allows you to intercept a request; must appear before
    // your first page.goto()
    await page.setRequestInterception(true);

    // Request intercept handler... will be triggered with 
    // each page.goto() statement
    page.on('request', interceptedRequest => {

        // Here, is where you change the request method and 
        // add your post data
        var data = {
            'method': 'POST',
            'postData': 'HasSell=true&ModelYears=2005&MaxModelYearHidden=2020&Brands=BMW&ModelNames=SERIES+3&TransmissionTypes=Otomatik&FuelTypes=Dizel&CarCases=Sedan&Versions=320D+AUTO&HorsePowers=150&MaxHorsePowerHidden=163&IsDontKnowHorsePowerHidden=0&Km=5.000&IsDontKnowKmHidden=0&OuterDemage-1=1-1&OuterDemage-2=2-1&OuterDemage-3=3-1&OuterDemage-4=4-1&OuterDemage-5=5-1&OuterDemage-6=6-1&OuterDemage-7=7-1&OuterDemage-8=8-1&OuterDemage-9=9-1&OuterDemage-10=10-1&OuterDemage-11=11-1&EquipmentCheckBoxSIS+FARI=SIS+FARI&EquipmentCheckBoxSUNROOF=SUNROOF&EquipmentCheckBoxYOL+BILGISAYARI=YOL+BILGISAYARI&progressValue=10',
            headers: {

            "Content-Type": "application/x-www-form-urlencoded"
          }
       };

        // Request modified... finish sending! 
        interceptedRequest.continue(data);
    });

    // Navigate, trigger the intercept, and resolve the response
    const response = await page.goto('https://www.ikinciyeni.com/fiyatlandirici');     
    const responseBody = await response.text();
    console.log(responseBody);

    // Close the browser - done! 
    //await browser.close();

    })();

在上面的代码中,我已给出了将发布请求发送到的URL。它被重定向,并最终登陆登录页面。在这种情况下如何登录?如何提供登录凭据?

编辑: 我在最后添加了以下代码:

// Navigate, trigger the intercept, and resolve the response
    const response = await page.goto('https://www.ikinciyeni.com/fiyatlandirici'); 
    await page.waitForNavigation({
      waitUntil: 'networkidle0',
    });
    await page.type('#EmailRetail', 'scott');
    await page.type('#Password', 'tiger');

    await page.click('#LoginSubmitBtn');
    await page.waitForNavigation();
    const responseBody = await response.text();
    console.log(responseBody);

但是它抛出了这个异常:

 UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
    at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
    at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
    at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
    at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
    at async E:\code\generic_scrapper\post.js:39:2
  -- ASYNC --
    at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
    at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
    at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
    at E:\code\generic_scrapper\post.js:39:13
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:11400) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:11400) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
^C
E:\code\generic_scrapper>node post.js
(node:18352) UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
    at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
    at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
    at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
    at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
    at async E:\code\generic_scrapper\post.js:39:2
  -- ASYNC --
    at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
    at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
    at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
    at E:\code\generic_scrapper\post.js:39:13
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:18352) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:18352) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
^C
E:\code\generic_scrapper>node post.js
(node:11528) UnhandledPromiseRejectionWarning: TimeoutError: Navigation timeout of 30000 ms exceeded
    at E:\code\generic_scrapper\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
    at async FrameManager.waitForFrameNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:107:23)
    at async Frame.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\FrameManager.js:298:16)
    at async Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:16)
    at async E:\code\generic_scrapper\post.js:39:2
  -- ASYNC --
    at Frame.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
    at Page.waitForNavigation (E:\code\generic_scrapper\node_modules\puppeteer\lib\Page.js:492:53)
    at Page.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:95:27)
    at E:\code\generic_scrapper\post.js:39:13
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:11528) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:11528) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

如何解决此问题?我的直觉是等到页面加载完毕,然后单击并输入信息。

Edit1:

我删除了waitForNavigation(),这是我得到的例外情况

UnhandledPromiseRejectionWarning: Error: Protocol error (Network.getResponseBody): No resource with given identifier found
    at E:\code\generic_scrapper\node_modules\puppeteer\lib\Connection.js:152:63
    at new Promise (<anonymous>)
    at CDPSession.send (E:\code\generic_scrapper\node_modules\puppeteer\lib\Connection.js:151:16)
    at E:\code\generic_scrapper\node_modules\puppeteer\lib\HTTPResponse.js:58:53
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
    at async HTTPResponse.text (E:\code\generic_scrapper\node_modules\puppeteer\lib\HTTPResponse.js:67:25)
    at async E:\code\generic_scrapper\post.js:45:26
  -- ASYNC --
    at HTTPResponse.<anonymous> (E:\code\generic_scrapper\node_modules\puppeteer\lib\helper.js:94:19)
    at E:\code\generic_scrapper\post.js:45:41
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
(node:15292) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:15292) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

Edit2: 有或没有凭据,它都会登陆到该页面:https://www.ikinciyeni.com/giris但是,它应该登陆到这样的页面:https://www.ikinciyeni.com/konsinye-hesap-sonuc?tempId= ....

这可能是什么原因?可能是因为它没有发送凭据吗?

Edit3: 这是来自网站(Chrome)的请求

POST https://www.ikinciyeni.com/giris HTTP/1.1
Host: www.ikinciyeni.com
Connection: keep-alive
Content-Length: 296
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
Origin: https://www.ikinciyeni.com
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Referer: https://www.ikinciyeni.com/giris?ReturnUrl=/konsinye-hesap-sonuc&tempId=2a26816b-62ed-4603-bfec-473342f58de7
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9
Cookie: __gads=ID=b7b47e7d0a959312:T=1
__RequestVerificationToken=Amij.....

这就是Pupeteer的情况

POST https://www.ikinciyeni.com/giris HTTP/1.1
Host: www.ikinciyeni.com
Connection: keep-alive
Content-Length: 595
Pragma: no-cache
Cache-Control: no-cache
Content-Type: application/x-www-form-urlencoded
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Referer: https://www.ikinciyeni.com/giris?ReturnUrl=/konsinye-hesap-sonuc&tempId=a92c918b-4745-4c79-9cd4-cb31084468b3
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.0 Safari/537.36
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9
Cookie: ASP.NET_SessionId=mfy3sm5da1b1xdiro52dtqqn; Cookie_DimensionId=477094; NSC_xxx.jljodjzfoj.dpn_iuuqt=ffffffffaf1fc83045525d5f4f58455e445a4a423660; __RequestVerificationToken=-5v7ZraCIzfPQ4ViK6x1oEBjXzzxn9v0HV_q8lkdwCGiOOT9ChF7-N1Ya2tn3D1rbvIRdw1mw_BCPe50aluylMqS5qo1

HasSell=true&ModelYears=2005&MaxModelYearHidden=2020&Brands=BMW&ModelNames=SERIES+3&TransmissionTypes=Otomatik&FuelTypes=Dizel&CarCases=Sedan&Versions=320D+AUTO&HorsePowers=150&MaxHorsePowerHidden=163&IsDontKnowHorsePowerHidden=0&Km=5.000&IsDontKnowKmHidden=0&OuterDemage-1=1-1&OuterDemage-2=2-1&OuterDemage-3=3-1&OuterDemage-4=4-1&OuterDemage-5=5-1&OuterDemage-6=6-1&OuterDemage

在后一种情况下,我需要添加电子邮件地址和密码。

编辑: 浏览器本身生成的标头会被我在POST中提到的标头覆盖。有什么办法可以将它们添加到现有标头中,而不是覆盖它们?

1 个答案:

答案 0 :(得分:0)

您首先需要拥有其网站的有效凭据。然后,您可以在脚本中添加一个条件:检查您是否在登录页面上,如果是,则像真实用户一样通过puppeteer键入凭据。通过单击输入字段来选择它们,然后使用api的keyboard.type。 (您也可以使用keyboard.press()在输入字段之间导航。)

如果您这样做,别忘了将凭据存储在环境变量中!

[...]
  await page.goto('https://www.ikinciyeni.com/fiyatlandirici')

  if ((await page.$('#EmailRetail')) !== null) {
    // select and type the email
    await page.click('#EmailRetail')
    await page.keyboard.type('example@example.com')
    // select and type the password
    await page.click('#Password')
    await page.keyboard.type('password0123')
    // submit the form
    await page.click('#LoginSubmitBtn')
  }

  const responseBody = await page.content()
  console.log(responseBody)
[...]