我有一个脚本,在运行时我收到一条错误消息:
urllib2.HTTPError: HTTP Error 400: Bad Request
你能帮助我吗?
from lxml import html
import urllib2
import urllib
ip_list = []
port_list = []
protocol_list = []
array = [20, 40]
ck = True
i = 0
while i < len(array) :
h = urllib2.urlopen('http://proxylist.me/proxys/index/'+ str(array[i]))
HTML_CODE = h.read()
tree = html.fromstring(HTML_CODE)
for block in tree.xpath('//tbody/tr'):
ip, port, _, protocol, _, _, _, _, _ = [
x.strip()
for x in block.xpath('.//text()')
if x.strip() not in ""
]
ip_l = "{}".format(ip)
port_l = "{}".format(port)
protocol_l = "{}".format(protocol)
if ip_l != {}:
ck = True
ip_list.append(ip_l)
port_list.append(port_l)
protocol_list.append(protocol_l)
i = i+1
else:
ck = False
print ip_list
我收到此错误:
Traceback (most recent call last):
File "C:/Users/PC0308-PC/Desktop/get_data_html.py", line 11, in <module>
h = urllib2.urlopen('http://proxylist.me/proxys/index/'+str(i))
File "C:\Python27\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 437, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 475, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 400: Bad Request
答案 0 :(得分:0)
array = [0, 20, 40]
ck = True
for item in array:
h = urllib2.urlopen('http://proxylist.me/proxys/index/%s'%(item))
HTML_CODE = h.read()
tree = html.fromstring(HTML_CODE)
for block in tree.xpath('//tbody/tr'):
ip, port, _, protocol, _, _, _, _, _ = [
x.strip()
for x in block.xpath('.//text()')
if x.strip() not in ""
]
ip_l = "{}".format(ip)
port_l = "{}".format(port)
protocol_l = "{}".format(protocol)
if ip_l != {}:
ck = True
ip_list.append(ip_l)
port_list.append(port_l)
protocol_list.append(protocol_l)
else:
ck = False
print ip_list
它适用于我的Windows机器,从http://proxylist.me/proxys/index
解析前3页 顺便说一句,你的代码从一开始就运行良好,但它只解析了第一页。