Question

import requests
from bs4 import BeautifulSoup

useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)" \
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.115"\
"Safari/537.36"
ref = "https://czdap.icann.org/en"
org = "https://czdap.icann.org"
source = open('./tldlocations.txt')

url = 'https://czdap.icann.org/en'

pload1="name=username&pass=pass&form_build_id="
pload2 = "&form_id=user_login_block&op=Log+in"

s = requests.Session()
r = s.get(url)

soup = BeautifulSoup(r.content)

 #We have to include the form_build_id in our post request
for input in soup.find_all("input"):
    if input["name"] == "form_build_id":
        form_build_id = input["value"]

pload = pload1 + 'form_build_id' + pload2

#Spoof some of the headers
s.headers.update({'Accept-Encoding': '', 'Referer': ref, 'Origin': org, 'User-Agent': useragent,
                  'Content-Type': 'application/x-www-form-urlencoded'})

r = s.post(url, data=pload)

r = s.get('https://czdap.icann.org/en/download-zone-data/1885')
print r.content

我尝试使用这段代码从ICANN网站下载一组文件，但我无法让它运行起来。我得到的输出是一大块UNICODE，它看起来是我试图下载的.txt.gz文件的内容。

Answer 1

在您的代码中，您只是发出请求而不是实际保存任何数据。试试这个，它未经测试但应该下载，并将文件保存到名为dlfile

的文件中

r = s.get('https://czdap.icann.org/en/download-zone-data/1885', stream=True)

with open('dlfile', 'wb') as f:
    for chunk in r.iter_content(chunk_size=1024): 
        if chunk: # filter out keep-alive new chunks
            f.write(chunk)
            f.flush()

可在此处找到更多信息http://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow

使用请求库下载文件的问题

1 个答案: