我有通过https下载xlsx文件的问题。
这里我的代码用来获取下载网址,但它似乎将我重定向到了一个新网站。但当我把链接放在我的浏览器中时,它会立即下载文件。
有什么东西我做错了吗?
这是我用来抓取网站的代码:
import contextlib
import OpenSSL.crypto
import os
import requests
import ssl
import tempfile
import http.client
import shutil
from OpenSSL import crypto
import pem
import html2text
url = "https://signonssl.site.com"
base_url = "basedownloadurl"
p12_cert = "cert_path"
password = "password"
@contextlib.contextmanager
def pfx_to_pem(p12_path, pfx_password):
''' Decrypts the .p12 file to be used with requests. '''
with tempfile.NamedTemporaryFile(suffix='.pem') as t_pem:
f_pem = open(t_pem.name, 'wb')
pfx = open(p12_path, 'rb').read()
p12 = OpenSSL.crypto.load_pkcs12(pfx, pfx_password)
f_pem.write(OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey()))
f_pem.write(OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate()))
ca = p12.get_ca_certificates()
f_pem.close()
yield t_pem.name
with pfx_to_pem(p12_cert, password) as cert:
html_response = requests.get(url, cert=cert).content.decode("utf-8")
htmlconv = html2text.html2text(html_response).split("name")[1]
dl_link = htmlconv.split(")")[0].split("(")[1]
dl = requests.get(dl_link, cert=cert, stream=True, allow_redirects=False)
output = open('test.xlsx', 'wb')
output.write(dl.content)
output.close()
非常感谢任何指导。
谢谢! PON