这是我的线程模块:
import threading
import proxies
threads_list = []
good_counter = 0
run_counter = 0
worker = None
num_threads=10
timeout=10
class Worker(threading.Thread):
def __init__(self, timeout, proxy_list):
threading.Thread.__init__(self)
self.timeout = timeout
self.proxy_list = proxy_list
def run(self):
global good_counter
count()
proxy_ip = proxies.get_proxy(proxies.get_proxylist())
if proxies.is_proxy_good(proxy_ip):
good_count()
save_proxy(proxy_ip)
print('[+] HIT ! - %s' % (proxy_ip))
def set_threads(num_threads, timeout, proxy_list):
for i in range(num_threads):
worker = Worker(timeout, proxies.get_proxylist())
worker.setDaemon(True)
worker.start()
threads_list.append(worker)
def run_loop():
while proxies.proxy_list.qsize() > 0:
set_threads(num_threads, timeout, proxies.get_proxylist())
for item in threads_list:
item.join()
print('[!] Proxylist Qsize < 0 QUITTING ....')
def get_counter():
return run_counter
def count():
global run_counter
run_counter += 1
这是在代理模块中使用的is_proxy_good()方法,只要有异常处理,就打印出run_counter:
def is_proxy_good(proxy_ip):
try:
r = requests.get('https://www.example.com',proxies=proxy_ip,timeout=15,headers=headers)
if r.status_code is 200:
return True
return False
except requests.exceptions.Timeout:
print('N%d - %s - Proxy Timeout\n' % (threads.get_counter(),proxy_ip))
return False
except requests.exceptions.ProxyError:
print('N%d - %s - Proxy ProxyError\n' % (threads.get_counter(),proxy_ip))
return False
except requests.exceptions.SSLError:
print('N%d - %s - Proxy SSLError\n' % (threads.get_counter(),proxy_ip))
return False
except requests.exceptions.ConnectionError:
print('N%d - %s - Proxy ConnectionError\n' % (threads.get_counter(),proxy_ip))
return False
输出为:
N10 - {'https': 'https://x.xxx.xx.xxx:1080'} - Proxy ProxyError
N10 - {'https': 'https://x.xxx.xx.xxx:1080'} - Proxy ProxyError
N10 - {'https': 'https://x.xxx.xx.xxx:1080'} - Proxy ProxyError
..............
N20 - {'https': 'https://x.xxx.xx.xxx:1080'} - Proxy ProxyError
N20 - {'https': 'https://x.xxx.xx.xxx:1080'} - Proxy ProxyError
N20 - {'https': 'https://x.xxx.xx.xxx:1080'} - Proxy ProxyError
...........
为什么要打印第一个线程的线程总数? 我应该如何以正确的方式递增,以便打印出递增1的升序数字? 谢谢!
答案 0 :(得分:2)
在您的代码中,run_counter
是全局变量。到您使用它时,所有线程的创建已对其进行了修改。您需要将值存储在要使用的Thread实例的持久位置。我可能会用这样的东西大概:
class Worker(thread):
_ids = count()
def __init__(self):
self.id = next(self._ids)
然后,在代理代码中的某处可以执行类似getCurrentThead().id
的操作。