Question

我正在尝试使用python和matplotlib绘制一个非常大的文件（~5 GB）。我能够将整个文件加载到内存中（机器中可用的总数为16 GB），但是当我使用简单的imshow绘制它时，我会遇到分段错误。这最有可能是我设置为15000的ulimit，但我无法设置更高。我得出结论，我需要批量绘制我的数组，因此制作了一个简单的代码来做到这一点。我的主要观点是，当我绘制一批大数组时，x坐标始终从0开始，我无法覆盖图像以创建最终的大图像。如果您有任何建议，请告诉我。此外，由于管理权限，我无法在此计算机上安装“Image”等新软件包。下面是一个代码示例，它读取数组的前12行并生成3个图。

import os
import sys
import scipy
import numpy as np
import pylab as pl
import matplotlib as mpl
import matplotlib.cm as cm
from optparse import OptionParser
from scipy import fftpack
from scipy.fftpack import *
from cmath import *
from pylab import *
import pp
import fileinput
import matplotlib.pylab as plt
import pickle

def readalllines(file1,rows,freqs):
    file = open(file1,'r')
    sizer = int(rows*freqs)
    i = 0
    q = np.zeros(sizer,'float')
    for i in range(rows*freqs):
        s =file.readline()
        s = s.split()
        #print s[4],q[i]
        q[i] = float(s[4])
        if i%262144 == 0:
            print '\r ',int(i*100.0/(337*262144)),'  percent complete',
        i += 1
    file.close()
    return q

parser = OptionParser()
parser.add_option('-f',dest="filename",help="Read dynamic spectrum from FILE",metavar="FILE")
parser.add_option('-t',dest="dtime",help="The time integration used in seconds, default 10",default=10)
parser.add_option('-n',dest="dfreq",help="The bandwidth of each frequency channel in Hz",default=11.92092896)
parser.add_option('-w',dest="reduce",help="The chuncker divider in frequency channels, integer default 16",default=16)
(opts,args) = parser.parse_args()
rows=12
freqs = 262144

file1 = opts.filename

s = readalllines(file1,rows,freqs)
s = np.reshape(s,(rows,freqs))
s = s.T
print s.shape
#raw_input()

#s_shift = scipy.fftpack.fftshift(s)


#fig = plt.figure()

#fig.patch.set_alpha(0.0)
#axes = plt.axes()
#axes.patch.set_alpha(0.0)
###plt.ylim(0,8)

plt.ion()

i = 0
for o in range(0,rows,4):

    fig = plt.figure()
    #plt.clf()

    plt.imshow(s[:,o:o+4],interpolation='nearest',aspect='auto', cmap=cm.gray_r, origin='lower')
    if o == 0:
        axis([0,rows,0,freqs])
    fdf, fdff = xticks()
    print fdf
    xticks(fdf+o)
    print xticks()
    #axis([o,o+4,0,freqs])
    plt.draw()

    #w, h = fig.canvas.get_width_height()
    #buf = np.fromstring(fig.canvas.tostring_argb(), dtype=np.uint8)
    #buf.shape = (w,h,4)

    #buf = np.rol(buf, 3, axis=2)
    #w,h,_ = buf.shape
    #img = Image.fromstring("RGBA", (w,h),buf.tostring())

    #if prev:
    #    prev.paste(img)
    #    del prev
    #prev = img
    i += 1
pl.colorbar()
pl.show()

Answer 1

如果你在图形链中的某些东西上绘制任何大于2k像素的数组将以某种方式对图像进行采样，以便在显示器上显示它。我建议以受控方式进行下采样，例如

data = convert_raw_data_to_fft(args) # make sure data is row major
def ds_decimate(row,step = 100):
    return row[::step]
def ds_sum(row,step):
    return np.sum(row[:step*(len(row)//step)].reshape(-1,step),1)
# as per suggestion from tom10 in comments
def ds_max(row,step): 
    return np.max(row[:step*(len(row)//step)].reshape(-1,step),1)
data_plotable = [ds_sum(d) for d in data] # plug in which ever function you want

或interpolation。

Answer 2

在绘制图像时，Matplotlib的内存效率非常低。它创建了几个全分辨率的中间数组，这可能是你的程序崩溃的原因。

正如@tcaswell建议的那样，一种解决方案是在将图像送入matplotlib之前对图像进行下采样。

我还写了一些包装代码，根据你的屏幕分辨率自动进行下采样。它位于https://github.com/ChrisBeaumont/mpl-modest-image，如果有用的话。它还具有即时重新采样图像的优势，因此您仍然可以平移和缩放，而不会在您需要的地方牺牲分辨率。

Answer 3

我认为您错过了extent=(left, right, bottom, top)中的plt.imshow关键字参数。

x = np.random.randn(2, 10)
y = np.ones((4, 10))
x[0] = 0  # To make it clear which side is up, etc
y[0] = -1

plt.imshow(x, extent=(0, 10, 0, 2))
plt.imshow(y, extent=(0, 10, 2, 6))
# This is necessary, else the plot gets scaled and only shows the last array
plt.ylim(0, 6)
plt.colorbar()
plt.show()

enter image description here

用x轴偏移绘制python（5GB）中真正大的文件

3 个答案: