所以我正在尝试使用python 2.7来做各种需要从互联网上提取数据的事情。我不是很成功,我正在寻求帮助来诊断我做错了什么。
首先,我设法通过定义像pip install --proxy=http://username:password@someproxy.com:8080 numpy
这样的代理来获得工作。 因此python必须能够通过它!
然而,当实际编写一个可以执行相同操作的.py脚本时,我没有成功。我首先尝试使用以下代码和urllib2:
import urllib2
uri = "http://www.python.org"
http_proxy_server = "someproxyserver.com"
http_proxy_port = "8080"
http_proxy_realm = http_proxy_server
http_proxy_user = "username"
http_proxy_passwd = "password"
# Next line = "http://username:password@someproxyserver.com:8080"
http_proxy_full_auth_string = "http://%s:%s@%s:%s" % (http_proxy_user,
http_proxy_passwd,
http_proxy_server,
http_proxy_port)
def open_url_no_proxy():
urllib2.urlopen(uri)
print "Apparent success without proxy server!"
def open_url_installed_opener():
proxy_handler = urllib2.ProxyHandler({"http": http_proxy_full_auth_string})
opener = urllib2.build_opener(proxy_handler)
urllib2.install_opener(opener)
urllib2.urlopen(uri)
print "Apparent success through proxy server!"
if __name__ == "__main__":
open_url_no_proxy()
open_url_installed_opener()
但是我得到了这个错误:
URLError: <urlopen error [Errno 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond>
然后我尝试了urllib3,因为这是pip用来处理代理的模块:
from urllib3 import ProxyManager, make_headers
# Establish the Authentication Settings
default_headers = make_headers(basic_auth='username:password')
http = ProxyManager("https://www.proxy.com:8080/", headers=default_headers)
# Now you can use `http` as you would a normal PoolManager
r = http.request('GET', 'https://www.python.org/')
# Check data is from destination
print(r.data)
我收到了这个错误:
raise MaxRetryError(_pool, url, error or ResponseError(cause)) MaxRetryError: HTTPSConnectionPool(host='www.python.org', port=443): Max retries exceeded with url: / (Caused by ProxyError('Cannot connect to proxy.', error('Tunnel connection failed: 407 Proxy Authorization Required',)))
我真的很感激任何帮助诊断这个问题。
答案 0 :(得分:1)
我的问题的解决方案是使用请求模块,请参阅以下主题: Proxies with Python 'Requests' module
mtt2p列出了适合我的代码。
import requests
import time
class BaseCheck():
def __init__(self, url):
self.http_proxy = "http://user:pw@proxy:8080"
self.https_proxy = "http://user:pw@proxy:8080"
self.ftp_proxy = "http://user:pw@proxy:8080"
self.proxyDict = {
"http" : self.http_proxy,
"https" : self.https_proxy,
"ftp" : self.ftp_proxy
}
self.url = url
def makearr(tsteps):
global stemps
global steps
stemps = {}
for step in tsteps:
stemps[step] = { 'start': 0, 'end': 0 }
steps = tsteps
makearr(['init','check'])
def starttime(typ = ""):
for stemp in stemps:
if typ == "":
stemps[stemp]['start'] = time.time()
else:
stemps[stemp][typ] = time.time()
starttime()
def __str__(self):
return str(self.url)
def getrequests(self):
g=requests.get(self.url,proxies=self.proxyDict)
print g.status_code
print g.content
print self.url
stemps['init']['end'] = time.time()
#print stemps['init']['end'] - stemps['init']['start']
x= stemps['init']['end'] - stemps['init']['start']
print x
test=BaseCheck(url='http://google.com')
test.getrequests()