我正在尝试使用to_hdf()方法保存一个相对较大的DataFrame(根据info()方法的内存使用量返回663+ MB)到HDFstore。
但每次我遇到"内存错误"。
因此,我有两个问题:
我知道有可能将其保存为表格'而不是“修复”这允许追加,但我还没有尝试过,但主要是因为我希望有一个更简单的替代方案。
提前多多感谢:)
P.S。我想补充一点,我试过to_pickle()并且运行顺利,所以我的假设是,它不可能是物理RAM问题。
错误:
MemoryError Traceback (most recent call last)
<ipython-input-10-05bb5886160a> in <module>()
1 train_data = pd.HDFStore('strat_train_data.h5')
----> 2 strat_train_set.to_hdf(train_data, 'strat_train_set')
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\generic.py in to_hdf(self, path_or_buf, key, **kwargs)
1280
1281 from pandas.io import pytables
-> 1282 return pytables.to_hdf(path_or_buf, key, self, **kwargs)
1283
1284 def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
268 f(store)
269 else:
--> 270 f(path_or_buf)
271
272
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in <lambda>(store)
260 f = lambda store: store.append(key, value, **kwargs)
261 else:
--> 262 f = lambda store: store.put(key, value, **kwargs)
263
264 path_or_buf = _stringify_path(path_or_buf)
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in put(self, key, value, format, append, **kwargs)
869 format = get_option("io.hdf.default_format") or 'fixed'
870 kwargs = self._validate_format(format, kwargs)
--> 871 self._write_to_group(key, value, append=append, **kwargs)
872
873 def remove(self, key, where=None, start=None, stop=None):
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
1311
1312 # write the object
-> 1313 s.write(obj=value, append=append, complib=complib, **kwargs)
1314
1315 if s.is_table and index:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in write(self, obj, **kwargs)
2890 # I have no idea why, but writing values before items fixed #2299
2891 blk_items = data.items.take(blk.mgr_locs)
-> 2892 self.write_array('block%d_values' % i, blk.values, items=blk_items)
2893 self.write_index('block%d_items' % i, blk_items)
2894
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\pytables.py in write_array(self, key, value, items)
2658 vlarr = self._handle.create_vlarray(self.group, key,
2659 _tables().ObjectAtom())
-> 2660 vlarr.append(value)
2661 else:
2662 if empty_array:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\vlarray.py in append(self, sequence)
517 atom = self.atom
518 if not hasattr(atom, 'size'): # it is a pseudo-atom
--> 519 sequence = atom.toarray(sequence)
520 statom = atom.base
521 else:
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\atom.py in toarray(self, object_)
1051
1052 def toarray(self, object_):
-> 1053 buffer_ = self._tobuffer(object_)
1054 array = numpy.ndarray(buffer=buffer_, dtype=self.base.dtype,
1055 shape=len(buffer_))
C:\Users\IQBALSH\AppData\Local\Continuum\Anaconda3\lib\site-packages\tables\atom.py in _tobuffer(self, object_)
1171
1172 def _tobuffer(self, object_):
-> 1173 return pickle.dumps(object_, pickle.HIGHEST_PROTOCOL)
1174
1175 def fromarray(self, array):
MemoryError: