#Version1
main_df = pd.read_csv('Million_rows.csv')
def myfunction(args*,start,end):
for i in range(start,end):
if condition1:
for item in mainTreeSearch:
...
lock.acquire()
###write to main_df
lock.release()
noLuck = False
break
if noLuck and Acondition:
lock.acquire()
###write to main_df
lock.release()
elif
... various asymmetric decision trees...
t1 = Thread(target=myfuct, args=(args*),0,250))
t2 = Thread(target=myfuct, args=(args*),250,500))
t3 = Thread(target=myfuct, args=(args*),500,750))
t4 = Thread(target=myfuct, args=(args*),750,1000))
我的问题是我不知道如何为剩下的行提供线程,我尝试Queue
,但没有成功。
#Version2
def myfuntion(args*,q)
while True:
q.get()
....same search as above...without locking
q.task_done()
q = Queue(maxsize=0)
num_threads = 5
threads =[]
for i in range(num_threads):
worker = Thread(target=myfunction, args=(args*))
worker.setDaemon(True)
threads.append(worker)
worker.start()
for x in range(1000):
#time.sleep(.005)
q.put(x)
q.join()
在没有sleep
的版本2中,1个线程占用所有数据或随机崩溃。
在版本1中,我应该使用threading.nodify()
机制,如果是这样,它是如何实现的?
答案 0 :(得分:0)
我将其重新格式化为此并且按预期工作
from Queue import Queue
import threading
q = Queue()
def myfuntion(q):
while True:
val = q.get()
print('\n' + str(threading.currentThread()))
print('\n' + str(val))
q.task_done()
num_threads = 5
threads = []
for i in range(num_threads):
worker = threading.Thread(target=myfuntion, args=(q,))
worker.setDaemon(True)
threads.append(worker)
worker.start()
for x in range(1000):
q.put(x)
q.join()
检查出来。我认为你传递参数的方式是错误的。