Question

我想通过单击按钮下载pdf文件，并在新标签页中打开pdf文件。我的pdf文件网址被隐藏。我无法使用pdf文件的网址进行下载。

pdf文件在Web服务器上。我想以无头模式使用puppetter下载它。

scrape.js

const fs = require('fs');
const puppeteer = require('puppeteer');


// set up, invoke the function, wait for the download to complete
let scrape = async () => {
    const browser = await puppeteer.launch({headless:true, ignoreHTTPSErrors: false, userDataDir: "./download", slowMo: 100}); // , dumpio: true, , executablePath: '/usr/bin/google-chrome-stable'



    const page = await browser.newPage();

    await page.goto('http://learningphp.example.com/openlink.php', {waitUntil: 'networkidle2'});


    //await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './'})
    await page.click('body > button');
    await page.waitFor(10 * 1000);

    let result = {key: 'ok'};


    browser.close();
    return result;
};

scrape().then((value) => {
    console.log(value); // Success!
});

openlink.php

<?php
<button id="link" class="downloadLink">
    Download it!
</button>

<script type="text/javascript">
document.getElementById("link").addEventListener("click", function(){
    window.open("download.php",'_blank');
});
</script>

download.php

<?php
ob_start();
$file = "sample.pdf";

if (file_exists($file)) 
{
    header('Content-Description: File Transfer');
    header('Content-Type: application/octet-stream');
    header('Content-Disposition: attachment; filename='.basename($file));
    header('Content-Transfer-Encoding: binary');
    header('Expires: 0');
    header('Cache-Control: must-revalidate');
    header('Pragma: public');
    header('Content-Length: ' . filesize($file));
    ob_clean();
    flush();
    readfile($file);
    exit();
}

如果headless为假，我可以下载pdf文件

使用puppeteer headless模式在新选项卡中打开时下载pdf文件

0 个答案: