class Job(object):
def __init__(self, name):
self.name = name
self.depends = []
self.waitcount = 0
def work(self):
#does some work
def add_dependent(self, another_job)
self.depends.append(another_job)
self.waitcount += 1
所以,waitcount取决于您所拥有的工作数量
job_board = {}
# create a dependency tree
for i in range(1000):
# create random jobs
j = Job(<new name goes here>)
# add jobs to depends if dependent
# record it in job_board
job_board[j.name] = j
# example
# jobC is in self.depends of jobA and jobB
# jobC would have a waitcount of 2
rdyQ = Queue.Queue()
def worker():
try:
job = rdyQ.get()
success = job.work()
# if this job was successful create dependent jobs
if success:
for dependent_job in job.depends:
dependent_job.waitcount -= 1
if dependent_job.waitcount == 0:
rdyQ.put(dependent_job)
然后我会创建线程
for i in range(10):
t = threading.Thread( target=worker )
t.daemon=True
t.start()
for job_name, job_obj in job_board.iteritems():
if job_obj.waitcount == 0:
rdyQ.put(job_obj)
while True:
# until all jobs finished wait
现在这是一个例子:
# example
# jobC is in self.depends of jobA and jobB
# jobC would have a waitcount of 2
现在在这种情况下,如果jobA和jobB都在运行,并且他们都试图减少jobC的数量,那么奇怪的事情正在发生
所以我放了一把锁
waitcount_lock = threading.Lock()
并将此代码更改为:
# if this job was successful create dependent jobs
if success:
for dependent_job in job.depends:
with waitcount_lock:
dependent_job.waitcount -= 1
if dependent_job.waitcount == 0:
rdyQ.put(dependent_job)
并且奇怪的事情仍然发生
即。多个线程正在处理相同的作业,就像将作业放入队列两次一样
当在线程中传递复杂对象时,拥抱/修改嵌套对象不是最佳做法吗?
答案 0 :(得分:1)
这是一个完整的,可执行的程序,似乎工作正常。我希望你大多看到“奇怪”的行为,因为正如我在评论中所建议的那样,你在计算工作接班人而不是工作前辈。因此,我将名字中的“succ”和“pred”重命名为更清晰。 daemon
个线程通常也是一个坏主意,所以这个代码安排在工作结束时干净地关闭所有线程。还要注意使用断言来验证隐含的信念是否真实; - )
import threading
import Queue
import random
NTHREADS = 10
NJOBS = 10000
class Job(object):
def __init__(self, name):
self.name = name
self.done = False
self.succs = []
self.npreds = 0
def work(self):
assert not self.done
self.done = True
return True
def add_dependent(self, another_job):
self.succs.append(another_job)
another_job.npreds += 1
def worker(q, lock):
while True:
job = q.get()
if job is None:
break
success = job.work()
if success:
for succ in job.succs:
with lock:
assert succ.npreds > 0
succ.npreds -= 1
if succ.npreds == 0:
q.put(succ)
q.task_done()
jobs = [Job(i) for i in range(NJOBS)]
for i, job in enumerate(jobs):
# pick some random successors
possible = xrange(i+1, NJOBS)
succs = random.sample(possible,
min(len(possible),
random.randrange(10)))
for succ in succs:
job.add_dependent(jobs[succ])
q = Queue.Queue()
for job in jobs:
if job.npreds == 0:
q.put(job)
print q.qsize(), "ready jobs initially"
lock = threading.Lock()
threads = [threading.Thread(target=worker,
args=(q, lock))
for _ in range(NTHREADS)]
for t in threads:
t.start()
q.join()
# add sentinels so threads end cleanly
for t in threads:
q.put(None)
for t in threads:
t.join()
for job in jobs:
assert job.done
assert job.npreds == 0
从某种意义上说,此代码中的锁定可以“保护太多”。它解决的潜在问题是多个线程可能尝试同时递减同一.npreds
个对象的Job
成员。在没有相互排斥的情况下,结尾处的存储值可以是从小于其初始值的1开始到正确的结果(初始值减去尝试递减它的线程数)。
但是没有必要在锁定保护下改变队列。队列执行自己的线程安全锁定。因此,例如,代码可以这样编写:
for succ in job.succs:
with lock:
npreds = succ.npreds = succ.npreds - 1
assert npreds >= 0
if npreds == 0:
q.put(succ)
通常最好的做法是在尽可能短的时间内锁定。但是,我发现这种重写更难以遵循。选择你的毒药; - )