我是python的新手我正在尝试使用matplotlib和子图的PdfPages将大量数据保存到pdf中。问题是我发现了一个我不知道如何解决的瓶颈,代码如下:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
with PdfPages('myfigures.pdf') as pdf:
for i in range(1000):
f,axarr = plt.subplots(2, 3)
plt.subplots(2, 3)
axarr[0, 0].plot(x1, y1)
axarr[1, 0].plot(x2, y2)
pdf.savefig(f)
plt.close('all')
每个循环创建一个数字是非常耗时的,但如果我把它放在循环之外它不会清除每个图。我尝试的其他选项如clear()或clf()也没有工作或者最终创建了多个不同的数字,任何人都想知道如何以不同的方式放置它以使它更快?
答案 0 :(得分:5)
要在单个pdf中包含大量子图作为多页输出,您需要做的就是在检测到最新的子图添加已最大化当前页面中的可用空间后立即创建新页面。子图数组布局。这是一种方法,可以轻松更改控制每页子图数量的维度:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import sys
import timeit
from matplotlib.backends.backend_pdf import PdfPages
matplotlib.rcParams.update({'font.size': 6})
# Dimensions for any n-rows x m-cols array of subplots / pg.
n, m = 4, 5
# Don't forget to indent after the with statement
with PdfPages('auto_subplotting.pdf') as pdf:
# Let's time the execution required to create and save
# each full page of subplots to the pdf
start_time = timeit.default_timer()
# Before beginning the iteration through all the data,
# initialize the layout for the plots and create a
# representation of the subplots that can be easily
# iterated over for knowing when to create the next page
# (and also for custom settings like partial axes labels)
f, axarr = plt.subplots(n, m, sharex='col', sharey='row')
arr_ij = [(x,y) for x,y in np.ndindex(axarr.shape)]
subplots = [axarr[index] for index in arr_ij]
# To conserve needed plotting real estate,
# only label the bottom row and leftmost subplots
# as determined automatically using n and m
splot_index = 0
for s,splot in enumerate(subplots):
splot.set_ylim(0,.15)
splot.set_xlim(0,50)
last_row = ( n*m-s < m+1 )
first_in_row = ( s % m == 0 )
if last_row:
splot.set_xlabel("X-axis label")
if first_in_row:
splot.set_ylabel("Y-axis label")
# Iterate through each sample in the data
for sample in range(33):
# As a stand-in for real data, let's just make numpy take 100 random draws
# from a poisson distribution centered around say ~25 and then display
# the outcome as a histogram
scaled_y = np.random.randint(20,30)
random_data = np.random.poisson(scaled_y, 100)
subplots[splot_index].hist(random_data, bins=12, normed=True,
fc=(0,0,0,0), lw=0.75, ec='b')
# Keep subplotting through the samples in the data and increment
# a counter each time. The page will be full once the count is equal
# to the product of the user-set dimensions (i.e. n * m)
splot_index += 1
# We can basically repeat the same exact code block used for the
# first layout initialization, but with the addition of 4 new lines:
# 2 for saving the just-finished page to the pdf, 1 for the
# page's execution time, & 1 more to reset the subplot index
if splot_index == n*m:
pdf.savefig()
plt.close(f)
print(timeit.default_timer()-start_time)
start_time = timeit.default_timer()
f, axarr = plt.subplots(n, m, sharex='col', sharey='row')
arr_ij = [(x,y) for x,y in np.ndindex(axarr.shape)]
subplots = [axarr[index] for index in arr_ij]
splot_index = 0
for s,splot in enumerate(subplots):
splot.set_ylim(0,.15)
splot.set_xlim(0,50)
last_row = ( (n*m)-s < m+1 )
first_in_row = ( s % m == 0 )
if last_row:
splot.set_xlabel("X-axis label")
if first_in_row:
splot.set_ylabel("Y-axis label")
# Done!
# But don't forget the last page
pdf.savefig()
plt.close(f)
对于2x3布局,只需在代码块的开头相应地更改n和m的声明。作为一个更紧凑的布局的例子,上面的4x5矩阵有33个样本来制作图,给出了以下两页输出:
在第二页上您可以看到轴框架是针对整个布局制作的,但最后7个是空白的(4x5 x 2 = 40 - 33 = 7)。
timeit
的打印输出为1.81540203094
秒,用于创建第一页。
注意: 可以通过创建
new_page
函数来简化多页处理;最好不要逐字重复代码,特别是如果你开始自定义绘图,在这种情况下,你不想要镜像每个变化并输入两次相同的东西。基于seaborn的更加个性化的美学和利用如下所示的matplotlib参数可能也是可取的。
添加new_page
功能&amp;子图样式的一些自定义:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import random
import seaborn as sns
import timeit
from matplotlib.backends.backend_pdf import PdfPages
# this erases labels for any blank plots on the last page
sns.set(font_scale=0.0)
n, m = 4, 6
datasize = 37
ctheme = ['k', 'gray', 'magenta', 'fuchsia', '#be03fd', '#1e488f',
(0.44313725490196076, 0.44313725490196076, 0.88627450980392153),
'#75bbfd', 'teal', 'lime', 'g', (0.6666674, 0.6666663, 0.29078014184397138),
'y', '#f1da7a', 'tan','orange', 'maroon', 'r']
colors = sns.blend_palette(ctheme, datasize)
fz = 7 # labels fontsize
def new_page(n, m):
global splot_index
splot_index = 0
fig, axarr = plt.subplots(n, m, sharey='row')
plt.subplots_adjust(hspace=0.5, wspace=0.15)
arr_ij = [(x,y) for x,y in np.ndindex(axarr.shape)]
subplots = [axarr[index] for index in arr_ij]
for s,splot in enumerate(subplots):
splot.grid(b=True, which='major', color='gray', linestyle='-',
alpha=0.25, zorder=1, lw=0.5)
splot.set_ylim(0,.15)
splot.set_xlim(0,50)
last_row = ( n*m-s < m+1 )
first_in_row = ( s % m == 0 )
if last_row:
splot.set_xlabel("X-axis label", labelpad=8, fontsize=fz)
if first_in_row:
splot.set_ylabel("Y-axis label", labelpad=8, fontsize=fz)
return(fig, subplots)
with PdfPages('auto_subplotting_colors.pdf') as pdf:
start_time = timeit.default_timer()
fig, subplots = new_page(n, m)
for sample in xrange(datasize):
splot = subplots[splot_index]
splot_index += 1
scaled_y = np.random.randint(20,30)
random_data = np.random.poisson(scaled_y, 100)
splot.hist(random_data, bins=12, normed=True, zorder=2, alpha=0.99,
fc='white', lw=0.75, ec=colors.pop())
splot.set_title("Sample {}".format(sample+1), fontsize=fz)
# tick fontsize & spacing
splot.xaxis.set_tick_params(pad=4, labelsize=6)
splot.yaxis.set_tick_params(pad=4, labelsize=6)
# make new page:
if splot_index == n*m:
pdf.savefig()
plt.close(fig)
print(timeit.default_timer()-start_time)
start_time = timeit.default_timer()
fig, subplots = new_page(n, m)
if splot_index > 0:
pdf.savefig()
plt.close(f)
这次第一页花了2.51897096634
秒: