Question

我有通过https下载xlsx文件的问题。

这里我的代码用来获取下载网址，但它似乎将我重定向到了一个新网站。但当我把链接放在我的浏览器中时，它会立即下载文件。

有什么东西我做错了吗？

这是我用来抓取网站的代码：

import contextlib
import OpenSSL.crypto
import os
import requests
import ssl
import tempfile
import http.client
import shutil
from OpenSSL import crypto
import pem
import html2text

url = "https://signonssl.site.com"
base_url = "basedownloadurl"

p12_cert = "cert_path"
password = "password"

@contextlib.contextmanager
def pfx_to_pem(p12_path, pfx_password):
    ''' Decrypts the .p12 file to be used with requests. '''
    with tempfile.NamedTemporaryFile(suffix='.pem') as t_pem:
        f_pem = open(t_pem.name, 'wb')
        pfx = open(p12_path, 'rb').read()
        p12 = OpenSSL.crypto.load_pkcs12(pfx, pfx_password)
        f_pem.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey()))
        f_pem.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate()))
        ca = p12.get_ca_certificates()
        f_pem.close()
        yield t_pem.name

with pfx_to_pem(p12_cert, password) as cert:

     html_response = requests.get(url, cert=cert).content.decode("utf-8")
     htmlconv = html2text.html2text(html_response).split("name")[1]
     dl_link = htmlconv.split(")")[0].split("(")[1]

     dl = requests.get(dl_link, cert=cert, stream=True, allow_redirects=False)

     output = open('test.xlsx', 'wb')
     output.write(dl.content)
     output.close()

非常感谢任何指导。

谢谢！ PON

下载带有pem证书请求的xlsx

0 个答案: