import urllib2
def download(url,user_agent = 'wswp',num_retries=2):
print 'downloading:',url
headers = {'User-Agent': 'Mozilla/5.0'}
request = urllib2.Request(url,headers=headers)
try:
html = urllib2.urlopen(request).read()
except urllib2.URLError as e:
print "download error:"
html = None
if num_retries>0:
if hasattr(e,'code') and 500<=e.code<600:
print "e.code = ",e.code
return download(url,num_retries-1)
return html
print download("http://www.huaru.cc/mobile/product/xsim.html")
结果:C:\ Python27 \ python.exe E:/ py2_7 / untitled1 / secondClass_Agent下载: http://www.huaru.cc/mobile/product/xsim.html
处理完成,退出代码为0
答案 0 :(得分:0)
在Python中,缩进是关键。
import urllib2
def download(url,user_agent = 'wswp',num_retries=2):
print('downloading:', url)
headers = {'User-Agent': 'Mozilla/5.0'}
request = urllib2.Request(url, headers=headers)
try:
html = urllib2.urlopen(request).read()
except urllib2.URLError as e:
print("download error: {}".format(e))
html = None
if num_retries > 0:
if hasattr(e, 'code') and 500 <= e.code < 600:
print("e.code = ", e.code)
return download(url, num_retries-1)
return html
print download("http://www.huaru.cc/mobile/product/xsim.html")
它显示如下:
('downloading:', 'http://www.huaru.cc/mobile/product/xsim.html')
download error: HTTP Error 404: Not Found
None
这是因为网页返回404。
在Python 2.7.10和3.6
上进行了测试