import requests
from bs4 import BeautifulSoup
useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)" \
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.115"\
"Safari/537.36"
ref = "https://czdap.icann.org/en"
org = "https://czdap.icann.org"
source = open('./tldlocations.txt')
url = 'https://czdap.icann.org/en'
pload1="name=username&pass=pass&form_build_id="
pload2 = "&form_id=user_login_block&op=Log+in"
s = requests.Session()
r = s.get(url)
soup = BeautifulSoup(r.content)
#We have to include the form_build_id in our post request
for input in soup.find_all("input"):
if input["name"] == "form_build_id":
form_build_id = input["value"]
pload = pload1 + 'form_build_id' + pload2
#Spoof some of the headers
s.headers.update({'Accept-Encoding': '', 'Referer': ref, 'Origin': org, 'User-Agent': useragent,
'Content-Type': 'application/x-www-form-urlencoded'})
r = s.post(url, data=pload)
r = s.get('https://czdap.icann.org/en/download-zone-data/1885')
print r.content
我尝试使用这段代码从ICANN网站下载一组文件,但我无法让它运行起来。我得到的输出是一大块UNICODE,它看起来是我试图下载的.txt.gz文件的内容。
答案 0 :(得分:0)
在您的代码中,您只是发出请求而不是实际保存任何数据。试试这个,它未经测试但应该下载,并将文件保存到名为dlfile
r = s.get('https://czdap.icann.org/en/download-zone-data/1885', stream=True)
with open('dlfile', 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
可在此处找到更多信息http://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow