我有一个简单格式的大型(~6GB)文本文件
x1 y1 z1
x2 y2 z2
...
由于我可能会多次加载此数据,因此出于效率原因我创建了一个np.memmap
文件:
X,Y,Z = np.memmap(f_np_mmap,dtype='float32',mode='r',shape=shape).T
我想做的是剧情:
plt.scatter(X, Y,
color=custom_colorfunction(Z),
alpha=.01, s=.001, marker='s', linewidth=0)
这适用于较小的数据集。但是,对于这个更大的数据集,我的内存不足。我已经检查过plt.scatter
占用了所有的记忆;我可以顺便通过X,Y,Z
。有没有一种方法我“光栅化”画布,所以我不会耗尽内存?我不需要缩放和平移图像,它将直接进入磁盘。我意识到我可以对数据进行分类并对其进行绘制,但我不确定如何使用自定义色图和进行alpha值。
答案 0 :(得分:7)
@tcaswell建议覆盖Axes.draw
方法绝对是解决此问题的最灵活方式。
但是,您可以使用/滥用blitting来执行此操作,而无需继承Axes
。每次只需使用draw_artist
而无需恢复画布。
还有一个额外的技巧:我们需要一个特殊的save
方法,因为所有其他方法在保存前绘制画布,这将消除我们之前在其上绘制的所有内容。
另外,正如tcaswell注意到的那样,为每个项目调用draw_artist
的速度相当慢,因此对于大量的点,您将需要对输入数据进行分块。分块将提供显着的加速,但这种方法总是比绘制单PathCollection
慢。
无论如何,这些答案中的任何一个都应该可以缓解您的记忆问题。这是一个简单的例子。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import _png
from itertools import izip
def main():
# We'll be saving the figure's background, so let's make it transparent.
fig, ax = plt.subplots(facecolor='none')
# You'll have to know the extent of the input beforehand with this method.
ax.axis([0, 10, 0, 10])
# We need to draw the canvas before we start adding points.
fig.canvas.draw()
# This won't actually ever be drawn. We just need an artist to update.
col = ax.scatter([5], [5], color=[0.1, 0.1, 0.1], alpha=0.3)
for xy, color in datastream(int(1e6), chunksize=int(1e4)):
col.set_offsets(xy)
col.set_color(color)
ax.draw_artist(col)
save(fig, 'test.png')
def datastream(n, chunksize=1):
"""Returns a generator over "n" random xy positions and rgb colors."""
for _ in xrange(n//chunksize):
xy = 10 * np.random.random((chunksize, 2))
color = np.random.random((chunksize, 3))
yield xy, color
def save(fig, filename):
"""We have to work around `fig.canvas.print_png`, etc calling `draw`."""
renderer = fig.canvas.renderer
with open(filename, 'w') as outfile:
_png.write_png(renderer._renderer.buffer_rgba(),
renderer.width, renderer.height,
outfile, fig.dpi)
main()
另外,你可能会注意到顶部和左侧的刺都被拉过来了。你可以通过在保存之前重新绘制这两个刺(ax.draw_artist(ax.spines['top'])
等)来解决这个问题。
答案 1 :(得分:6)
这样的事情(对于长代码感到抱歉,大部分是从标准axes.Axes.draw
复制的):
from operator import itemgetter
class generator_scatter_axes(matplotlib.axes.Axes):
def __init__(self, *args, **kwargs):
matplotlib.axes.Axes.__init__(self, *args, **kwargs)
self._big_data = None
def draw(self, renderer=None, inframe=None):
# copied from original draw (so you can still add normal artists ect)
if renderer is None:
renderer = self._cachedRenderer
if renderer is None:
raise RuntimeError('No renderer defined')
if not self.get_visible():
return
renderer.open_group('axes')
locator = self.get_axes_locator()
if locator:
pos = locator(self, renderer)
self.apply_aspect(pos)
else:
self.apply_aspect()
artists = []
artists.extend(self.collections)
artists.extend(self.patches)
artists.extend(self.lines)
artists.extend(self.texts)
artists.extend(self.artists)
if self.axison and not inframe:
if self._axisbelow:
self.xaxis.set_zorder(0.5)
self.yaxis.set_zorder(0.5)
else:
self.xaxis.set_zorder(2.5)
self.yaxis.set_zorder(2.5)
artists.extend([self.xaxis, self.yaxis])
if not inframe:
artists.append(self.title)
artists.append(self._left_title)
artists.append(self._right_title)
artists.extend(self.tables)
if self.legend_ is not None:
artists.append(self.legend_)
# the frame draws the edges around the axes patch -- we
# decouple these so the patch can be in the background and the
# frame in the foreground.
if self.axison and self._frameon:
artists.extend(self.spines.itervalues())
if self.figure.canvas.is_saving():
dsu = [(a.zorder, a) for a in artists]
else:
dsu = [(a.zorder, a) for a in artists
if not a.get_animated()]
# add images to dsu if the backend support compositing.
# otherwise, does the manaul compositing without adding images to dsu.
if len(self.images) <= 1 or renderer.option_image_nocomposite():
dsu.extend([(im.zorder, im) for im in self.images])
_do_composite = False
else:
_do_composite = True
dsu.sort(key=itemgetter(0))
# rasterize artists with negative zorder
# if the minimum zorder is negative, start rasterization
rasterization_zorder = self._rasterization_zorder
if (rasterization_zorder is not None and
len(dsu) > 0 and dsu[0][0] < rasterization_zorder):
renderer.start_rasterizing()
dsu_rasterized = [l for l in dsu if l[0] < rasterization_zorder]
dsu = [l for l in dsu if l[0] >= rasterization_zorder]
else:
dsu_rasterized = []
# the patch draws the background rectangle -- the frame below
# will draw the edges
if self.axison and self._frameon:
self.patch.draw(renderer)
if _do_composite:
# make a composite image blending alpha
# list of (mimage.Image, ox, oy)
zorder_images = [(im.zorder, im) for im in self.images
if im.get_visible()]
zorder_images.sort(key=lambda x: x[0])
mag = renderer.get_image_magnification()
ims = [(im.make_image(mag), 0, 0, im.get_alpha()) for z, im in zorder_images]
l, b, r, t = self.bbox.extents
width = mag * ((round(r) + 0.5) - (round(l) - 0.5))
height = mag * ((round(t) + 0.5) - (round(b) - 0.5))
im = mimage.from_images(height,
width,
ims)
im.is_grayscale = False
l, b, w, h = self.bbox.bounds
# composite images need special args so they will not
# respect z-order for now
gc = renderer.new_gc()
gc.set_clip_rectangle(self.bbox)
gc.set_clip_path(mtransforms.TransformedPath(
self.patch.get_path(),
self.patch.get_transform()))
renderer.draw_image(gc, round(l), round(b), im)
gc.restore()
if dsu_rasterized:
for zorder, a in dsu_rasterized:
a.draw(renderer)
renderer.stop_rasterizing()
for zorder, a in dsu:
a.draw(renderer)
############################
# new bits
############################
if self._big_data is not None:
for x, y, z in self._big_data:
# add the (single point) to the axes
a = self.scatter(x, y, color='r',
alpha=1, s=10, marker='s', linewidth=0)
# add the point, in Agg this will render + composite
a.draw(renderer)
# remove the artist from the axes, shouldn't let the render know
a.remove()
# delete the artist for good measure
del a
#######################
# end new bits
#######################
# again, from original to clean up
renderer.close_group('axes')
self._cachedRenderer = renderer
像这样使用它:
In [42]: fig = figure()
In [43]: ax = generator_scatter_axes(fig, [.1, .1, .8, .8])
In [44]: fig.add_axes(ax)
Out[44]: <__main__.generator_scatter_axes at 0x56fe090>
In [45]: ax._big_data = rand(500, 3)
In [46]: draw()
我将散点函数更改为具有少量可见的形状。由于您每次设置一个scatter
对象,因此速度非常慢。我要么采取合理的数据块并绘制这些数据,要么将scatter
的调用替换为基础艺术家对象,或者使用Joe的建议并更新单个艺术家。
答案 2 :(得分:0)
仅就接受的答案进行扩展,似乎“解决方法”保存功能由于write_png
的签名已更改而不再起作用。我的解决方法如下:
import numpy as np
from PIL import Image
def png_write(fig, filename):
width, height = map(int, fig.get_size_inches() * fig.get_dpi())
image = np.frombuffer(fig.canvas.tostring_argb(), dtype='uint8')
image = image.reshape(width, height, 4)
image = np.roll(image, -1, 2)
Image.fromarray(image, 'RGBA').save(filename)