我在多线程脚本中使用线程库。我想在线程上实现超时。因此,如果线程在指定的时间后没有返回TypeTag
,它应该退出该函数并返回task_done
这是我的代码:
task_done
在def create_workers():
for _ in range(NUMBER_OF_THREADS):
t = threading.Thread(target=work)
t.daemon = True
t.start()
def create_jobs():
for d in Date_set :
queue.put(d)
queue.join()
scrape()
def scrape_page(thread_name, page_url):
print(thread_name + ' now working on ' + page_url)
get_active_urls_perDay(session=s,Date=page_url,County=Cty, courtSystem=CS, PT=P)
def work():
while True:
url = queue.get()
scrape_page(threading.current_thread().name, url)
Date_set.remove(url)
print str(len(Date_set)) + " days more to go!"
print "Number of threads active", threading.activeCount()
queue.task_done()
def scrape():
queued_links = Date_set
if len(queued_links) > 0:
print(str(len(queued_links)) + ' days in the queue')
create_jobs()
函数中,我想在线程上实现超时。
否则代码运行正常,但是没有返回work
的线程停止代码并且它一直等待它们返回。
答案 0 :(得分:0)
def create_jobs():
for d in Date_set :
queue.put(d)
scrape()
def create_workers():
thread_list=[]
for _ in range(NUMBER_OF_THREADS):
t = threading.Thread(target=work)
thread_list.append(t)
t.daemon = True
t.start()
return thread_list
def join_all(thread_list):
[t.join(5) for t in thread_list]
def scrape_page(thread_name, page_url):
print(thread_name + ' now working on ' + page_url)
get_active_urls_perDay(session=s,Date=page_url,County=Cty, courtSystem=CS, PT=P)
def work():
while True:
url = queue.get()
try:
scrape_page(threading.current_thread().name, url)
Date_set.remove(url)
print str(len(Date_set)) + " days more to go!"
print "Number of threads active", threading.activeCount()
finally:
queue.task_done()
def scrape():
queued_links = Date_set
if len(queued_links) > 0:
print(str(len(queued_links)) + ' days in the queue')
create_jobs()
s=session
Cty= County
CS= courtSystem
P= PT
Date_set = create_dates_set(start_filingDate, end_filingDate)
t_list= create_workers()
join_all(t_list)
scrape()
return case_urls
答案 1 :(得分:0)
import threading
import Queue
import time
lock = threading.Lock()
Date_set = ['127.0.0.1/test1', '127.0.0.1/test3', '127.0.0.1/test3', '127.0.0.1/test4']
queue = Queue.Queue()
NUMBER_OF_THREADS = 3
def create_jobs():
for d in Date_set:
queue.put(d)
# scrape()
thread_list = []
def create_workers():
for _ in range(NUMBER_OF_THREADS):
t = threading.Thread(target=work)
thread_list.append(t)
t.daemon = True
t.start()
def join_all():
[t.join(5) for t in thread_list]
def scrape_page(thread_name, page_url):
time.sleep(1)
lock.acquire()
print(thread_name + ' now working on ' + page_url)
print page_url + ' done'
lock.release()
# get_active_urls_perDay(session=s,Date=page_url,County=Cty, courtSystem=CS, PT=P)
def work():
while True:
if queue.empty() is True:
break
url = queue.get()
try:
scrape_page(threading.current_thread().name, url)
# Date_set.remove(url)
lock.acquire()
print str(len(Date_set)) + " days more to go!"
print "Number of threads active", threading.activeCount()
lock.release()
finally:
queue.task_done()
def scrape():
queued_links = Date_set
if len(queued_links) > 0:
print(str(len(queued_links)) + ' days in the queue')
create_jobs()
# s=session
# Cty= County
# CS= courtSystem
# P= PT
# Date_set = create_dates_set(start_filingDate, end_filingDate)
create_jobs()
create_workers()
join_all()
print 'main thread quit and all worker thread quit even if it is not finished'
# scrape()
# return case_urls
这个例子可行,我使用sleep(200)模拟get_active_urls_perDay
,15秒后,脚本将停止。如果你将sleep(200)替换为sleep(1),则所有线程都将完成并且主线程退出。