我喜欢卷曲this之类的URL。 (当您没有网站的Cookie时,URL需要在欧盟国家/地区获得同意。)
我拼凑了一个执行此操作的木偶脚本,但对我来说它看起来很笨重。有更好的解决方案吗?
#!/usr/bin/env node
const url = process.argv[2];
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch()
const page = await browser.newPage()
await page.goto(url)
await page.waitForSelector('.con-wizard > .wizard-body > #consent-text > .content-list > .list-item:nth-child(1)')
await page.click('.con-wizard > .wizard-body > #consent-text > .content-list > .list-item:nth-child(1)')
await page.waitForSelector('.con-wizard > .wizard-body > .actions > .consent-form > .primary')
await page.click('.con-wizard > .wizard-body > .actions > .consent-form > .primary')
const timeout = ((process.env.cfTimeout) || 20) * 1000
await page.waitFor(timeout);
const html = await page.content();
console.log(html);
await browser.close()
})()
答案 0 :(得分:0)
通过监视Chrome的“网络”面板中的请求,我成功地做到了这一点:
function techcrunch-curl() {
local url="${1:?}"
local con="$(curl -o /dev/null -w %{url_effective} $url)"
if [[ "$con" =~ 'https://consent\.yahoo\.com/v2/collectConsent\?sessionId=(.*)' ]] ; then
local sid="${match[1]}"
curl -o /dev/stdout --fail --location --cookie-jar =() 'https://consent.yahoo.com/v2/collectConsent?sessionId='$sid \
-H 'Connection: keep-alive' \
-H 'Pragma: no-cache' \
-H 'Cache-Control: no-cache' \
-H 'Origin: https://consent.yahoo.com' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'DNT: 1' \
-H 'Content-Type: application/x-www-form-urlencoded' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36' \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
-H 'Sec-Fetch-Site: same-origin' \
-H 'Sec-Fetch-Mode: navigate' \
-H 'Sec-Fetch-User: ?1' \
-H 'Sec-Fetch-Dest: document' \
-H 'Referer: https://consent.yahoo.com/v2/collectConsent?sessionId='$sid \
-H 'Accept-Language: en-US,en;q=0.9' \
--data-raw 'sessionId='$sid'&originalDoneUrl='"$(<<<$url url-encode.py)"'&namespace=techcrunch&agree=agree&agree=agree' \
--compressed
else
curl $url
fi
}