我正在尝试使用 Google Apps 脚本 (GAS) 登录外部网站 https://login.test-aankoop.be/SignIn?wa=wsignin1.0&wtrealm=eur://euroconsumers.pro.flinesc.nl-be/。目标是验证和执行网络抓取。 我已经完成了研究,编写了代码并在下面概述了一些信息。
一般步骤
问题
在浏览器中,使用无效凭据登录会导致显示 div .error-panel。 但是,当我运行 GAS 代码时,页面中不存在 div .error-panel。此外,网页的标题是“Fout”(英文 = 错误,另见 GAS 输出)意味着身份验证有问题。有人可以帮助我吗?我的 GAS 代码中遗漏了什么?
在浏览器中,无效凭据会导致警告 div .error-panel
HTML(来自 Firefox Web 开发者工具)
<div class="error-panel">De gebruikersnaam waarmee je probeert aan te melden is bij ons onbekend. Je moet eerst <a href="...')">een account aanmaken</a> voor je kunt aanmelden.</div>
天然气
代码
function login() {
const url = "https://login.test-aankoop.be/SignIn?wa=wsignin1.0&wtrealm=eur://euroconsumers.pro.flinesc.nl-be/"; //URL with login form (Dutch)
try {
Logger.log("make a GET for %s", url);
var response = UrlFetchApp.fetch(url);
var responseSetCookie = response.getAllHeaders()['Set-Cookie']; // returns an attribute/value map of headers for the HTTP response, with headers that have multiple values returned as arrays.
Logger.log("Response headers - raw Set-Cookie \n %s", responseSetCookie);
var payload = {
"__RequestVerificationToken": GetReqVerTokenFromHtml(response),
"Identification": "bob@example.com", //not a real e-mail address
"Password": "ThePassword", //not a real password
"RememberMe" : "0",
"RememberMe" : "1"
}
var headers = {
"Accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
"Connection": "keep-alive",
"Cookie": GetConstructedCookie(responseSetCookie), //include the constructed cookie in the header for the POST
"Origin": "https://login.test-aankoop.be",
"Referer": "https://login.test-aankoop.be/SignIn?wa=wsignin1.0&wtrealm=eur%3A%2F%2Feuroconsumers.pro.flinesc.nl-be%2F",
"TE": "Trailers",
"Upgrade-Insecure-Requests":"1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
}
var parameters = {
"headers": headers,
"method" : "post",
"payload": payload
}
//2. submit the form (make a POST)
Logger.log("make a POST for %s", url);
response = UrlFetchApp.fetch(url,parameters); //Note: even with invalid credentials the server returns status 200
//a div with class 'error-panel' appears when user tries to authenticate with invalid credentials
$ = Cheerio.load(response.getContentText());
if ($('.error-panel').html() == null) {
Logger.log("No error panel found!");
Logger.log("Title of page is %s", $("title").text()); //page title is fout ("fout" means Error in English)
} else { Logger.log("OK error panel found")}
}
catch (ex) { Logger.log("something went wrong...\n %s", ex); }
}
function GetReqVerTokenFromHtml(response) {
var $ = Cheerio.load(response.getContentText());
var reqVerTokenForm = $("form.form-horizontal").find("input[name='__RequestVerificationToken']").val();
Logger.log("the __RequestVerificationToken of the form is %s \t", reqVerTokenForm);
return reqVerTokenForm;
}
function GetConstructedCookie(responseSetCookie) {
var setCookieParts = [];
//extract set-cookies from from response headers. Only the first part is needed for the cookie in de response header
for (var i = 0; i < responseSetCookie.length; i++) { setCookieParts.push(responseSetCookie[i].split('; ')[0]); }
//First use Set for array with unique values. Finally add ecpolicy=1 twice and join with a semi-colon followed by a space
var constructedCookie = Array.from(new Set(setCookieParts));
constructedCookie.push("ecpolicy=1");
constructedCookie.push("ecpolicy=1");
constructedCookie = constructedCookie.join("; ");
Logger.log("Constructed cookie \n %s:", constructedCookie);
return constructedCookie;
}
输出
make a GET for https://login.test-aankoop.be/SignIn?wa=wsignin1.0&wtrealm=eur://euroconsumers.pro.flinesc.nl-be/
Response headers - raw Set-Cookie
[ASP.NET_SessionId=a3sk22fbb0kbunu1dlbrmabj; path=/; HttpOnly; SameSite=None; Secure, ASP.NET_SessionId=a3sk22fbb0kbunu1dlbrmabj; path=/; HttpOnly; SameSite=None; Secure, beanContextCookie=latestMessage=d2E9d3NpZ25pbjEuMCZ3dHJlYWxtPWV1ciUzYSUyZiUyZmV1cm9jb25zdW1lcnMucHJvLmZsaW5lc2MubmwtYmUlMmY; path=/; HttpOnly; SameSite=None; Secure, __RequestVerificationToken=gKk6_0AFD9R9rXPAUsRh0LqDrG1-7JlrmHPAiBUX-wz0ojhKaacF3Yt9NFZvWggyv7ysv6cm4XGkbbKB6kFrMmRr1FgIEfKqup6_AD_luX41; path=/; HttpOnly; SameSite=None; Secure]
the __RequestVerificationToken of the form is 08bqWPQ995Sbm1qlqJCp8a1qkV-pvzfSDnUTnVEEg-M6NhQmcpNV_XXizKlCKsiKmyMrTpdb2xuW7witkILktmYsLqPNHIFeSAfQrS64qWk1
Constructed cookie
ASP.NET_SessionId=a3sk22fbb0kbunu1dlbrmabj; beanContextCookie=latestMessage=d2E9d3NpZ25pbjEuMCZ3dHJlYWxtPWV1ciUzYSUyZiUyZmV1cm9jb25zdW1lcnMucHJvLmZsaW5lc2MubmwtYmUlMmY; __RequestVerificationToken=gKk6_0AFD9R9rXPAUsRh0LqDrG1-7JlrmHPAiBUX-wz0ojhKaacF3Yt9NFZvWggyv7ysv6cm4XGkbbKB6kFrMmRr1FgIEfKqup6_AD_luX41; ecpolicy=1; ecpolicy=1:
make a POST for https://login.test-aankoop.be/SignIn?wa=wsignin1.0&wtrealm=eur://euroconsumers.pro.flinesc.nl-be/
No error panel found!
Title of page is Fout
标题
GET 原始响应标头
HTTP/2 200 OK
date: Sat, 10 Jul 2021 06:28:15 GMT
content-type: text/html; charset=utf-8
cache-control: private
vary: Accept-Encoding
p3p: CP="NONE"
set-cookie: ASP.NET_SessionId=...; path=/; HttpOnly; SameSite=None; Secure
ASP.NET_SessionId=...; path=/; HttpOnly; SameSite=None; Secure
beanContextCookie=latestMessage=...; path=/; HttpOnly; SameSite=None; Secure
__RequestVerificationToken=...; path=/; HttpOnly; SameSite=None; Secure
x-aspnetmvc-version: 5.2
x-frame-options: ALLOW-FROM https://www.test-aankoop.be/ https://*.test-aankoop.be
content-security-policy: upgrade-insecure-requests; frame-ancestors https://www.test-aankoop.be/ https://*.test-aankoop.be http://*.conseur.org
x-aspnet-version: 4.0.30319
request-context: appId=cid-v1:40f5f67e-1270-480e-b3cc-f108255e2977
access-control-expose-headers: Request-Context
x-powered-by: ASP.NET
cf-cache-status: DYNAMIC
expect-ct: max-age=604800, report-uri="https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct"
strict-transport-security: max-age=15552000; includeSubDomains; preload
x-content-type-options: nosniff
server: cloudflare
cf-ray: 66c7bb19aa842dd6-BRU
content-encoding: br
alt-svc: h3-27=":443"; ma=86400, h3-28=":443"; ma=86400, h3-29=":443"; ma=86400, h3=":443"; ma=86400
X-Firefox-Spdy: h2
POST 原始请求标头
POST /SignIn?wa=wsignin1.0&wtrealm=eur%3A%2F%2Feuroconsumers.pro.flinesc.nl-be%2F%22 HTTP/1.1
Cookie: ASP.NET_SessionId=...; beanContextCookie=latestMessage=...; __RequestVerificationToken=...; ecpolicy=1; ecpolicy=1
Host: login.test-aankoop.be
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Accept-Language: en-US,en;q=0.5
Accept-Encoding: gzip, deflate, br
Content-Type: application/x-www-form-urlencoded
Content-Length: 212
Origin: https://login.test-aankoop.be
DNT: 1
Connection: keep-alive
Referer: https://login.test-aankoop.be/SignIn?wa=wsignin1.0&wtrealm=eur://euroconsumers.pro.flinesc.nl-be/%22
Upgrade-Insecure-Requests: 1
表格
HTML 代码段
<form action="/SignIn?wa=wsignin1.0&wtrealm=eur%3A%2F%2Feuroconsumers.pro.flinesc.nl-be%2F%22" class="form-horizontal" method="post">
<input name="__RequestVerificationToken" type="hidden" value="...">
<input name="Identification" type="text" > <!-- username or e-mail address-->
<input name="Password" type="password">
<input checked="checked" name="RememberMe" type="checkbox" value="true">
<input type="submit" class="btn" id="LoginButton">
</form>
原始数据请求
__RequestVerificationToken=...&Identification=...&Password=...&RememberMe=true&RememberMe=false