用pandas和pyplot绘图时发生内存泄漏

时间:2019-11-28 20:50:46

标签: python pandas matplotlib

我试图为目录中的所有csv创建图。当我运行下面的脚本时,我的RAM内存消耗只是单调增加。该代码很简单,尽管时间更长:

import multiprocessing
import os
from glob import glob
import pandas as pd
from matplotlib import pyplot as plt

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

root_data_dir = '/home/user1/data/20191121'
root_img_dir = os.path.join(root_data_dir, 'figures')

if not os.path.exists(root_img_dir):
    os.mkdir(root_img_dir)

def plot_file(file):
    print("Processing {}".format(file))
    df = pd.read_csv(file, parse_dates=['date'], index_col='date', compression='xz')
    plt.plot(df)

    base_file = os.path.splitext(os.path.basename(file))[0]
    img_file = os.path.join(root_img_dir, base_file + '.png')

    plt.title(base_file)
    plt.savefig(img_file, dpi=300)
    print("Saved {}".format(img_file))
    plt.close()

multiprocessing.Pool(16).map(plot_file, sorted(glob(os.path.join(root_data_dir, '*.csv.xz'))))

1 个答案:

答案 0 :(得分:3)

添加以下代码

import gc

.. .. 然后在plot_file gc.collect()

def plot_file(file):
    print("Processing {}".format(file))
    df = pd.read_csv(file, parse_dates=['date'], index_col='date', compression='xz')
    plt.plot(df)

    base_file = os.path.splitext(os.path.basename(file))[0]
    img_file = os.path.join(root_img_dir, base_file + '.png')

    plt.title(base_file)
    plt.savefig(img_file, dpi=300)
    print("Saved {}".format(img_file))
    plt.close()
    gc.collect()