Question

设定： init.py

import threading
...other imports ...

... vars ...

for drive in drives:
    series = folder.getFolders(drive)
    for serie in series:        
        print(str(datetime.datetime.now()))
        t = threading.Thread(target=serienchecker, args=(drive, serie, blacklist,apikeyv3,language,))        
        t.start()
        t.join()

serienchecker.py

from threading import Thread
from themoviedb import *
from folderhelper import *
class serienchecker(Thread):
    ...
    def __init__(self, path,seriesname, blacklist, apikeytmdb='', language='eng'):
        ...
        self.startSearch()
    ...

    def startSearch(self):
        print("start")
        ...

输出：

2017-02-08 21:29:04.481536
start
2017-02-08 21:29:17.385611
start
2017-02-08 21:30:00.548471
start

但我希望所有这些都能在大约同一时间计算出来。甚至可能有一种方法来排队所有任务并同时处理N个线程数量？ [这只是脚本检查几个hundert文件夹的一个小例子] 我想做错了吗？

我在几种方法上工作没有用，请帮帮我

谢谢！

编辑：//

def job():
while(jobs):
    tmp = jobs.pop()
    task(drive=tmp[0],serie=tmp[1])

def task(drive, serie):
    print("Serie[{0}]".format(serie))
    sc = serienchecker(drive, serie,blacklist,apikeyv3,language)
    sc.start()
    result = sc.result
    resultString=''
    for obj in result:
        resultString+=obj+"\n"
    print(resultString)

for drive in drives:
    series = folder.getFolders(drive)
    for serie in series:
        jobs.append([drive,serie])

while(jobs):
    job()

Answer 1

join()等待线程结束，所以你不应该在启动一个线程之后调用它（或者在前一个线程结束之前你不能创建一个新的线程）。
创建一个列表以在开头存储您的线程：

threads = []

然后在创建时将您的线程添加到列表中：

threads.append(t)

在程序结束时加入所有线程

for t in threads:
    t.join()

Answer 2

如前所述，您需要推迟join，直到所有线程都已启动。考虑使用限制并发线程数的ThreadPool，如果python的GIL处理速度慢，可以将其重新实现为进程池。它为您启动，发送和加入线程。

import multiprocessing
import itertools
import platform

...

# helper functions for process pool
#
#     linux - worker process gets a view of parent memory at time pool
#     is created, including global variables that exist at that time.
#     
#     windows - a new process is created and all needed state must be
#     passed to the child. we could pass these values on every call,
#     but assuming blacklist is large, its more efficient to set it
#     up once

do_init = platform.system() == "Windows"

if do_init:

    def init_serienchecker_process(_blacklist, _apikeyv3, _language):
        """Call once when process pool worker created to set static config"""
        global blacklist, apikeyv3, language
        blacklist, apikeyv3, language = _blacklist, _apikeyv3, _language

# this is the worker in the child process. It is called with items iterated
# in the parent Pool.map function. In our case, the item is a (drive, serie)
# tuple. Unpack, combine w/ globals and call the real function.

def serienchecker_worker(drive_serie):
    """Calls serienchecker with global blacklist, apikeyv3, language set by
    init_serienchecker_process"""
    return serienchecker(drive_serie[0], drive_serie[1], blacklist,
        apikeyv3, language)

def drive_serie_iter(folder, drives):
    """Yields (drive, serie) tuples"""
    for drive in drives:
        for serie in series:
            yield drive, serie 


# decide the number of workers. Here I just chose a random max value,
# but your number will depend on your desired workload.

max_workers = 24 
num_items = len(drive) * len(serie)
num_workers = min(num_items, max_workers)

# setup a process pool. we need to initialize windows with the global
# variables but since linux already has a view of the globals, its 
# not needed

initializer = init_serienchecker_process if do_init else None
initargs = (blacklist, apikeyv3, language) if do_init else None
pool = multiprocessing.Pool(num_workers, initializer=initializer, 
    initargs=initargs)

# map calls serienchecker_worker in the subprocess for each (drive, serie)
# pair produced by drive_serie_iter

for result in pool.map(serienchecker_worker, drive_serie_iter(folder, drives)):
    print(result) # not sure you care what the results are

pool.join()

Python线程不起作用

2 个答案: