Python多处理,打开文件过多

时间:2020-05-03 12:47:11

标签: python pandas multiprocessing

我目前正在尝试构建一个基于单个数据帧保存10,000-50,000张图像的Scipt

为了加快处理过程,我正在尝试使用以下脚本使多处理工作。在出现有关太多打开文件的错误之前,我得到了约4000张图像

# import multiprocessing
import multiprocessing as mp

# Create all image files and save to folder
week_list = df[['instrument_id', 'year', 'week_of_year']].drop_duplicates().to_numpy().tolist()

# define function that creates plot image
def create_image(instrument_id, year, week_of_year):
    plt.clf()
    # first slice the dataframe to only contain data from the first week
    weekDF = df[(df['instrument_id'] == instrument_id) & (df['year'] == year) & (df['week_of_year'] == week_of_year)]

    # plot day_pct
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(2,2))

    weekDF.plot(
        ax=axes[0,0],
        x='previous_months_rows_id',
        y='day_pct',
        kind='kde'
    ),
    weekDF.plot(
        ax=axes[0,1],
        x='previous_months_rows_id',
        y='day_pct'
    ),
    weekDF.plot(
        ax=axes[1,0],
        x='previous_months_rows_id',
        y='day_volume',
        kind='kde'
    ),
    weekDF.plot(
        ax=axes[1,1],
        x='previous_months_rows_id',
        y='day_volume'
    )
    axes[0,0].axis("off")
    axes[1,0].axis("off")
    axes[0,1].axis("off")
    axes[1,1].axis("off")

    axes[0,0].get_legend().remove()
    axes[1,0].get_legend().remove()
    axes[0,1].get_legend().remove()
    axes[1,1].get_legend().remove()

    plt.tight_layout()
    plt.ioff()
    fig.savefig('/home/henrik/Dokumenter/Stock/img/{instrument_id}_{year}_{week_of_year}.jpg'.format(instrument_id = instrument_id, year =year, week_of_year = week_of_year))
    plt.close(fig)

processes = [mp.Process(target=create_image, args=(week[0], week[1], week[2])) for week in week_list]

# Run processes
for p in processes:
    p.start()

# Exit the completed processes
for p in processes:
    p.join()

我得到的错误:

OSError                                   Traceback (most recent call last)
<ipython-input-6-901de27246d0> in <module>
     56 # Run processes
     57 for p in processes:
---> 58     p.start()
     59 
     60 # Exit the completed processes

~/anaconda3/lib/python3.7/multiprocessing/process.py in start(self)
    110                'daemonic processes are not allowed to have children'
    111         _cleanup()
--> 112         self._popen = self._Popen(self)
    113         self._sentinel = self._popen.sentinel
    114         # Avoid a refcycle if the target function holds an indirect

~/anaconda3/lib/python3.7/multiprocessing/context.py in _Popen(process_obj)
    221     @staticmethod
    222     def _Popen(process_obj):
--> 223         return _default_context.get_context().Process._Popen(process_obj)
    224 
    225 class DefaultContext(BaseContext):

~/anaconda3/lib/python3.7/multiprocessing/context.py in _Popen(process_obj)
    275         def _Popen(process_obj):
    276             from .popen_fork import Popen
--> 277             return Popen(process_obj)
    278 
    279     class SpawnProcess(process.BaseProcess):

~/anaconda3/lib/python3.7/multiprocessing/popen_fork.py in __init__(self, process_obj)
     18         self.returncode = None
     19         self.finalizer = None
---> 20         self._launch(process_obj)
     21 
     22     def duplicate_for_child(self, fd):

~/anaconda3/lib/python3.7/multiprocessing/popen_fork.py in _launch(self, process_obj)
     67     def _launch(self, process_obj):
     68         code = 1
---> 69         parent_r, child_w = os.pipe()
     70         self.pid = os.fork()
     71         if self.pid == 0:

OSError: [Errno 24] Too many open files

关于此问题的原因和解决方案的任何想法吗?

1 个答案:

答案 0 :(得分:0)

以下代码基于CJR建议解决了我的问题

some_random_X