我正在编写脚本以使用具有多个线程的代理访问网站,但是现在我陷入了多个线程中,当我运行以下脚本时,它将打开5个浏览器,但全部5个都使用1个代理,我想要5个浏览器使用不同的代理,有人可以帮我完成它吗?谢谢
这是我的剧本:
from selenium import webdriver
from selenium import webdriver
import time , random
import threading
def e():
a = open("sock2.txt", "r")
for line in a.readlines():
b = line
prox = b.split(":")
IP = prox[0]
PORT = int(prox[1].strip("\n"))
print(IP)
print(PORT)
profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.socks", IP)
profile.set_preference("network.proxy.socks_port", PORT)
try:
driver = webdriver.Firefox(firefox_profile=profile)
driver.get("http://www.whatsmyip.org/")
except:
print("Proxy Connection Error")
driver.quit()
else:
time.sleep(random.randint(40, 70))
driver.quit()
for i in range(5):
t = threading.Thread(target=e)
t.start()
(祝大家新年快乐!
答案 0 :(得分:2)
(我个人认为存在一个问题,当您启动程序时,它将进入新线程,这将从头开始遍历整个文本文件,因为您没有删除它们)
当我做与您现在相同的事情时,我遇到了同样的问题。我知道您希望在代码方面寻求帮助,但是我急于对其进行测试并希望为您提供帮助;),所以这是一个对我有用的代码...甚至还有Chrome的杀手kill(您只是必须将其编辑为firefox)
links = [ // Link you want to go to ]
def funk(xxx , website):
link = website
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % str(xxx))
chromedriver = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chromedriver')
chrome = webdriver.Chrome(chromedriver, chrome_options=chrome_options)
try :
// Do stuff
except:
print('exception')
chrome.close()
for link in links:
f = open('proxies.txt')
line = f.readline()
x = 1
xx = 0
while line:
if number_of_used_proxies < 10:
print(line)
line = f.readline()
try:
threading.Timer(40, funk, [line, link]).start()
except Exception as e:
print(e)
time.sleep(1)
x += 1
number_of_used_proxies += 1
else:
time.sleep(100)
for x in range(1, 10):
try:
xzxzx = 'os.system("taskkill /f /im chrome.exe")'
os.system("killall 'Google Chrome'")
except:
print("NoMore")
time.sleep(10)
number_of_used_proxies = 0
f.close()
希望它会有所帮助:)
答案 1 :(得分:2)
DominikLašo正确捕获了它-每个线程从头开始处理文件。大概是这样的:
from selenium import webdriver
from selenium import webdriver
import time , random
import threading
def e(ip, port):
profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.socks", IP)
profile.set_preference("network.proxy.socks_port", PORT)
try:
driver = webdriver.Firefox(firefox_profile=profile)
driver.get("http://www.whatsmyip.org/")
except:
print("Proxy Connection Error")
driver.quit()
else:
time.sleep(random.randint(40, 70))
driver.quit()
my_threads = []
with open("sock2.txt", "r") as fd:
for line in fd.readlines():
line = line.strip()
if not line:
continue
prox = line.split(":")
ip = prox[0]
port = int(prox[1])
print('-> {}:{}'.format(ip, port))
t = threading.Thread(target=e, args=(ip, port,))
t.start()
my_threads.append(t)
for t in my_threads:
t.join()
答案 2 :(得分:1)
vantuong :这是使用ThreadPoolExecutor解决问题的方法。
参考:https://docs.python.org/3/library/concurrent.futures.html
from selenium import webdriver
from selenium import webdriver
import time , random
#import threading
import concurrent.futures
MAX_WORKERS = 5
def get_proxys(data_file):
proxys = []
with open(data_file, "r") as fd:
for line in fd.readlines():
line = line.strip()
if not line:
continue
prox = line.split(":")
ip = prox[0]
port = int(prox[1])
proxys.append((ip, port))
return proxys
def e(ip, port):
profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.socks", IP)
profile.set_preference("network.proxy.socks_port", PORT)
try:
driver = webdriver.Firefox(firefox_profile=profile)
driver.get("http://www.whatsmyip.org/")
except:
print("Proxy Connection Error")
driver.quit()
else:
time.sleep(random.randint(40, 70))
driver.quit()
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
proxys = get_proxys('sock2.txt')
tasks = {executor.submit(e, proxy[0], proxy[1]): proxy for proxy in proxys}
for task in concurrent.futures.as_completed(tasks):
proxy = tasks[task]
try:
data = task.result()
except Exception as exc:
print('{} generated an exception: {}'.format(proxy, exc))
else:
print('{} completed successfully'.format(proxy))
有趣的练习:尝试使用不同的MAX_WORKERS值。