这段Python线程池代码似乎只执行repoPathList
的一个条目,然后什么也不做。它可能会挂起。我不确定两个并行化级别是否有问题。这是简短的代码:
它仅打印此行print("STARTED:" + repoPath)
,而在第一个映射函数print("COMPLETED:" + repoPath)
中永远不会到达此行computeRepoMetrics(repoPath)
。然后,它将不再处理repoPathList
的另一个条目。我的并行化有问题吗?
repoPathList = ["/Users/soft/PycharmProjects/Pydriller/gitRepos/attic-whirr",
"/Users/soft/PycharmProjects/Pydriller/gitRepos/sqoop",
"/Users/soft/PycharmProjects/Pydriller/gitRepos/ranger",
"/Users/soft/PycharmProjects/Pydriller/gitRepos/falcon"
"/Users/soft/PycharmProjects/Pydriller/gitRepos/aries"]
def computeRepoMetrics(repoPath):
print("STARTED:" + repoPath)
gr = GitRepository(repoPath)
parallelMetricProcessing(analyzeCommits(repoPath,gr), calculateStructAndSemanticScattering(gr), 30,gr)
print("COMPLETED:" + repoPath)
def parallelMetricProcessing(intervalsData, scatteringFilePairData, threads,gr):
tupledIntervalsData =[]
for i in range(len(intervalsData)):
if i > 0:
tupledIntervalsData.append((intervalsData[i],intervalsData[i-1],i))
else:
tupledIntervalsData.append((intervalsData[i], [],i))
data = map((lambda x: (x,scatteringFilePairData,gr)), tupledIntervalsData)
pool = ThreadPool(threads)
pool.map(compute_fileMetrics, data)
pool.close()
pool.join()
def main():
pool = ThreadPool(5)
pool.map(computeRepoMetrics, repoPathList)
pool.close()
pool.join()