我正在制作的剧本应该多次更改我的IP并使用Tor浏览器访问网站。我将IP更改工作,但在使用Selenium和代理时出错。 我的代码是:
import socket
import socks
import httplib
from subprocess import check_call
from selenium import webdriver
from selenium.webdriver.common.proxy import *
from selenium.webdriver.support.wait import WebDriverWait
def connectTor():
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5,"127.0.0.1",9150,True)
socket.socket = socks.socksocket
def newIdentity():
check_call(["killall","-HUP", "tor"])
connectTor()
def showIP():
conn = httplib.HTTPConnection("my-ip.herokuapp.com")
conn.request("GET","/")
response = conn.getresponse()
print (response.read())
def process():
url = "https://www.google.bg"
port = "8118" #The Privoxy (HTTP) port
myProxy = "127.0.0.1:"+port
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'ftpProxy': myProxy,
'sslProxy': myProxy,
'noProxy': ''
})
browser = webdriver.Firefox(proxy=proxy)
browser.get(url)
WebDriverWait(browser, 10)
browser.close()
def main():
connectTor()
print("Connected to Tor")
showIP()
process()
print("Hew Id is")
newIdentity()
showIP()
process()
main()
我收到的追溯是:
Traceback (most recent call last):
File "/home/peter/.spyder2/.temp.py", line 60, in <module>
main()
File "/home/peter/.spyder2/.temp.py", line 53, in main
process()
File "/home/peter/.spyder2/.temp.py", line 43, in process
browser = webdriver.Firefox(proxy=proxy)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/firefox/webdriver.py", line 59, in __init__
self.binary, timeout),
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/firefox/extension_connection.py", line 47, in __init__
self.binary.launch_browser(self.profile)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/firefox/firefox_binary.py", line 66, in launch_browser
self._wait_until_connectable()
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/firefox/firefox_binary.py", line 97, in _wait_until_connectable
while not utils.is_connectable(self.profile.port):
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/common/utils.py", line 43, in is_connectable
socket_.connect(("127.0.0.1", port))
File "/usr/lib/python2.7/dist-packages/socks.py", line 369, in connect
self.__negotiatesocks5(destpair[0],destpair[1])
File "/usr/lib/python2.7/dist-packages/socks.py", line 236, in __negotiatesocks5
raise Socks5Error(ord(resp[1]),_generalerrors[ord(resp[1])])
TypeError: __init__() takes exactly 2 arguments (3 given)
按照路易斯的建议更改代码后,我在浏览器中收到错误消息:
代理服务器拒绝连接。 Firefox配置为使用拒绝连接的代理服务器。
我得到的输出是:
Connected to Tor
78.108.63.46
Hew Id is
tor(991): Operation not permitted
62.212.89.116
答案 0 :(得分:0)
问题在于,在您的代码中,一旦设置了socks代理,它实际上对于后面的所有内容。因此,当Selenium客户端(您的脚本)尝试与Selenium服务器(您的浏览器)通信时,它会尝试使用socks代理。您需要在需要的地方缩小对代理的使用范围,这就是检查IP时的情况。所以将connectTor
更改为:
import contextlib
@contextlib.contextmanager
def connectTor():
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150, True)
# Save the old value so that we can restore it later.
old_socket = socket.socket
# We are now using the proxy for all connections.
socket.socket = socks.socksocket
yield # Let the contents of the `with` block that will use this function execute.
# We are no longer using the proxy.
socket.socket = old_socket
将您的main
更改为:
def main():
with connectTor():
print("Connected to Tor")
showIP()
process()
print("Hew Id is")
newIdentity()
with connectTor():
showIP()
process()
使用此代码,socks代理设置在with connectTor()
块内实际上仅 。 contextmanager
装饰器及其工作原理记录在案here。我上面提到的更改有效(我已经测试过了),但我从未使用socks
库。我很确定有一种更好的方法可以将connectTor
编码为上下文管理器而不是我所做的,但至少现在你已经知道问题是什么以及如何解决它。
您还需要使用http://
前缀设置Selenium的代理地址,因此:
myProxy = "http://127.0.0.1:" + port