我正在使用Linux。我有一个名为“combine”的外部可执行文件和一个20次迭代的循环。 每次迭代时,需要使用依赖于第i次迭代的参数调用“combine”。例如:
arguments = " "
for i in range(1,20):
arguments += str(i) + "_image.jpg "
# begin of pseudo-code
execute: "./combine" + arguments # in parallel using all cores
# pseudo-code continues
wait_for_all_previous_process_to_terminate
execute: "./merge_resized_images" # use all cores - possible for one single command?
如何使用Python中的多处理模块实现这一目标?
答案 0 :(得分:10)
您可以使用subprocess.Popen
异步启动外部命令,并存储列表中返回的每个Popen
对象。一旦启动了所有流程,只需迭代它们并等待每个流程完成popen_object.wait
。
from subprocess import Popen
processes = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
processes.append(subprocess.Popen(shlex.split("./combine" + arguments)))
for p in processes:
p.wait()
subprocess.call("./merge_resized_images")
然而,这将启动20个并发进程,这可能会损害性能。
为避免这种情况,您可以使用ThreadPool
将自己限制为较少数量的并发进程(multiprocessing.cpu_count
是一个很好的数字),然后使用pool.join
等待它们一切都完成了。
import multiprocessing
import subprocess
import shlex
from multiprocessing.pool import ThreadPool
def call_proc(cmd):
""" This runs in a separate thread. """
#subprocess.call(shlex.split(cmd)) # This will block until cmd finishes
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
return (out, err)
pool = ThreadPool(multiprocessing.cpu_count())
results = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
results.append(pool.apply_async(call_proc, ("./combine" + arguments,)))
# Close the pool and wait for each running task to complete
pool.close()
pool.join()
for result in results:
out, err = result.get()
print("out: {} err: {}".format(out, err))
subprocess.call("./merge_resized_images")
每个线程都会在等待子进程完成时释放GIL,因此它们都会并行运行。
答案 1 :(得分:0)
我对此问题的解决方案是创建和管理子进程列表。请特别注意startencoder
和manageprocs
。这就是开始和管理实际工作的地方。
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# Author: R.F. Smith
# $Date: 2014-02-15 14:44:31 +0100 $
#
# To the extent possible under law, Roland Smith has waived all copyright and
# related or neighboring rights to vid2mkv.py. This work is published from the
# Netherlands. See http://creativecommons.org/publicdomain/zero/1.0/
"""Convert all video files given on the command line to Theora/Vorbis streams
in a Matroska container."""
from __future__ import print_function, division
__version__ = '$Revision: a42ef58 $'[11:-2]
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def warn(s):
"""Print a warning message.
:param s: Message string
"""
s = ' '.join(['Warning:', s])
print(s, file=sys.stderr)
def checkfor(args, rv=0):
"""Make sure that a program necessary for using this script is
available.
:param args: String or list of strings of commands. A single string may
not contain spaces.
:param rv: Expected return value from evoking the command.
"""
if isinstance(args, str):
if ' ' in args:
raise ValueError('no spaces in single command allowed')
args = [args]
try:
with open(os.devnull, 'w') as bb:
rc = subprocess.call(args, stdout=bb, stderr=bb)
if rc != rv:
raise OSError
except OSError as oops:
outs = "Required program '{}' not found: {}."
print(outs.format(args[0], oops.strerror))
sys.exit(1)
def startencoder(fname):
"""Use ffmpeg to convert a video file to Theora/Vorbis
streams in a Matroska container.
:param fname: Name of the file to convert.
:returns: a 3-tuple of a Process, input path and output path
"""
basename, ext = os.path.splitext(fname)
known = ['.mp4', '.avi', '.wmv', '.flv', '.mpg', '.mpeg', '.mov', '.ogv']
if ext.lower() not in known:
warn("File {} has unknown extension, ignoring it.".format(fname))
return (None, fname, None)
ofn = basename + '.mkv'
args = ['ffmpeg', '-i', fname, '-c:v', 'libtheora', '-q:v', '6', '-c:a',
'libvorbis', '-q:a', '3', '-sn', ofn]
with open(os.devnull, 'w') as bitbucket:
try:
p = subprocess.Popen(args, stdout=bitbucket, stderr=bitbucket)
print("Conversion of {} to {} started.".format(fname, ofn))
except:
warn("Starting conversion of {} failed.".format(fname))
return (p, fname, ofn)
def manageprocs(proclist):
"""Check a list of subprocesses tuples for processes that have ended and
remove them from the list.
:param proclist: a list of (process, input filename, output filename)
tuples.
"""
print('# of conversions running: {}\r'.format(len(proclist)), end='')
sys.stdout.flush()
for p in proclist:
pr, ifn, ofn = p
if pr is None:
proclist.remove(p)
elif pr.poll() is not None:
print('Conversion of {} to {} finished.'.format(ifn, ofn))
proclist.remove(p)
sleep(0.5)
def main(argv):
"""Main program.
:param argv: command line arguments
"""
if len(argv) == 1:
binary = os.path.basename(argv[0])
print("{} version {}".format(binary, __version__), file=sys.stderr)
print("Usage: {} [file ...]".format(binary), file=sys.stderr)
sys.exit(0)
checkfor(['ffmpeg', '-version'])
avis = argv[1:]
procs = []
maxprocs = cpu_count()
for ifile in avis:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startencoder(ifile))
while len(procs) > 0:
manageprocs(procs)
if __name__ == '__main__':
main(sys.argv)