绘制数据帧时的内存错误(matplotlib)

时间:2016-04-23 06:28:28

标签: python pandas matplotlib jupyter-notebook

我正在使用Pandas和Jupyter Notebook并尝试绘制一个小数据帧: enter image description here

当我插入以下行时:

df9.plot(x='Time', y='Pressure mean')

我收到以下错误:

---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-8-c789b8162a1a> in <module>()
----> 1 df9.plot(x='Time', y='Pressure mean')

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   3735                           fontsize=fontsize, colormap=colormap, table=table,
   3736                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3737                           sort_columns=sort_columns, **kwds)
   3738     __call__.__doc__ = plot_frame.__doc__
   3739 

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2609                  yerr=yerr, xerr=xerr,
   2610                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 2611                  **kwds)
   2612 
   2613 

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   2436         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2437 
-> 2438     plot_obj.generate()
   2439     plot_obj.draw()
   2440     return plot_obj.result

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in generate(self)
   1029 
   1030         for ax in self.axes:
-> 1031             self._post_plot_logic_common(ax, self.data)
   1032             self._post_plot_logic(ax, self.data)
   1033 

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _post_plot_logic_common(self, ax, data)
   1157                 ax.set_xticklabels(xticklabels)
   1158             self._apply_axis_properties(ax.xaxis, rot=self.rot,
-> 1159                                         fontsize=self.fontsize)
   1160             self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize)
   1161         elif self.orientation == 'horizontal':

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _apply_axis_properties(self, axis, rot, fontsize)
   1205 
   1206     def _apply_axis_properties(self, axis, rot=None, fontsize=None):
-> 1207         labels = axis.get_majorticklabels() + axis.get_minorticklabels()
   1208         for label in labels:
   1209             if rot is not None:

C:\Anaconda3\lib\site-packages\matplotlib\axis.py in get_majorticklabels(self)
   1159     def get_majorticklabels(self):
   1160         'Return a list of Text instances for the major ticklabels'
-> 1161         ticks = self.get_major_ticks()
   1162         labels1 = [tick.label1 for tick in ticks if tick.label1On]
   1163         labels2 = [tick.label2 for tick in ticks if tick.label2On]

C:\Anaconda3\lib\site-packages\matplotlib\axis.py in get_major_ticks(self, numticks)
   1288         'get the tick instances; grow as necessary'
   1289         if numticks is None:
-> 1290             numticks = len(self.get_major_locator()())
   1291         if len(self.majorTicks) < numticks:
   1292             # update the new tick label properties from the old

C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in __call__(self)
    876             vmin, vmax = vmax, vmin
    877         if self.isdynamic:
--> 878             locs = self._get_default_locs(vmin, vmax)
    879         else:  # pragma: no cover
    880             base = self.base

C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in _get_default_locs(self, vmin, vmax)
    857 
    858         if self.plot_obj.date_axis_info is None:
--> 859             self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)
    860 
    861         locator = self.plot_obj.date_axis_info

C:\Anaconda3\lib\site-packages\pandas\tseries\converter.py in _daily_finder(vmin, vmax, freq)
    481                     Period(ordinal=int(vmax), freq=freq))
    482     span = vmax.ordinal - vmin.ordinal + 1
--> 483     dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq)
    484     # Initialize the output
    485     info = np.zeros(span,

C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in __new__(cls, data, ordinal, freq, start, end, periods, copy, name, tz, **kwargs)
    186             else:
    187                 data, freq = cls._generate_range(start, end, periods,
--> 188                                                  freq, kwargs)
    189         else:
    190             ordinal, freq = cls._from_arraylike(data, freq, tz)

C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in _generate_range(cls, start, end, periods, freq, fields)
    200                 raise ValueError('Can either instantiate from fields '
    201                                  'or endpoints, but not both')
--> 202             subarr, freq = _get_ordinal_range(start, end, periods, freq)
    203         elif field_count > 0:
    204             subarr, freq = _range_from_fields(freq=freq, **fields)

C:\Anaconda3\lib\site-packages\pandas\tseries\period.py in _get_ordinal_range(start, end, periods, freq, mult)
   1026                              dtype=np.int64)
   1027     else:
-> 1028         data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
   1029 
   1030     return data, freq

MemoryError: 

有什么问题?我无法弄明白。 谢谢!

2 个答案:

答案 0 :(得分:2)

问题源于使用TimedeltaIndex(或timedelta)作为时间列。据报道:https://github.com/pydata/pandas/issues/8711 还没有解决方案。

作为替代解决方案,我邀请您将数据转换为DateTime或DateTimeIndex。假设YourDate包含观察的开始日期。

df9.index = pd.DatetimeIndex(pd.datetime.strptime(YourDate,'%d.%m.%Y %H:%M:%S')
                            +df9['Time'])
df9.plot(y='Pressure mean')

请注意,如果您的时间少于24小时,则仅绘制小时数。

编辑(2016-11-07): 我现在可以使用timedelta作为索引并正确绘图。这就是我的进展方式(假设我有浮点数表示小时数):

converter = {'Time[h]' : lambda x: pd.to_timedelta(float(x),unit='h')}#converts float to timedelta
df = pd.read_csv(fpath, sep='\t', 
                 skiprows=len(comments),#header
                 names=dt.keys(),#you need of course your own dtype
                 dtype=dt,#you need of course your own dtype
                 encoding='latin-1',#European data...
                 skipinitialspace=True,
                 converters=converter)
df = df.set_index('Time[h]')#time column to index.

答案 1 :(得分:0)

正如Wli所说,这是一个仍有待解决的问题。但作为一种解决方法,这对我有用。 -

plt.plot(s.index,s.values)