我试图为目录中的所有csv创建图。当我运行下面的脚本时,我的RAM内存消耗只是单调增加。该代码很简单,尽管时间更长:
import multiprocessing
import os
from glob import glob
import pandas as pd
from matplotlib import pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
root_data_dir = '/home/user1/data/20191121'
root_img_dir = os.path.join(root_data_dir, 'figures')
if not os.path.exists(root_img_dir):
os.mkdir(root_img_dir)
def plot_file(file):
print("Processing {}".format(file))
df = pd.read_csv(file, parse_dates=['date'], index_col='date', compression='xz')
plt.plot(df)
base_file = os.path.splitext(os.path.basename(file))[0]
img_file = os.path.join(root_img_dir, base_file + '.png')
plt.title(base_file)
plt.savefig(img_file, dpi=300)
print("Saved {}".format(img_file))
plt.close()
multiprocessing.Pool(16).map(plot_file, sorted(glob(os.path.join(root_data_dir, '*.csv.xz'))))
答案 0 :(得分:3)
添加以下代码
import gc
.. .. 然后在plot_file gc.collect()
def plot_file(file):
print("Processing {}".format(file))
df = pd.read_csv(file, parse_dates=['date'], index_col='date', compression='xz')
plt.plot(df)
base_file = os.path.splitext(os.path.basename(file))[0]
img_file = os.path.join(root_img_dir, base_file + '.png')
plt.title(base_file)
plt.savefig(img_file, dpi=300)
print("Saved {}".format(img_file))
plt.close()
gc.collect()