试图用另一个列表过滤我的pandas列,但收到内存错误

时间:2018-02-23 21:43:58

标签: python pandas indexing out-of-memory

我的数据框ag_data包含一个名为state的列,其中包含州缩写,但并非状态中的所有数据都是正确且已确认的US州缩写。 SA是州缩写的列表。我想检查我的状态列中的状态缩写是否也在SA列表中并过滤我的数据集,但是我一直收到错误。还有另一种方法吗?

ag_data[ag_data.state.isin(SA)]

---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-26-8e3eefc5127e> in <module>()
      1 #ag_data["state"] = ag_data[ag_data[ag_data.columns[0]].isin(SA)]
----> 2 ag_data[ag_data.state.isin(SA)]

~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   1956         if isinstance(key, (Series, np.ndarray, Index, list)):
   1957             # either boolean or fancy integer index
-> 1958             return self._getitem_array(key)
   1959         elif isinstance(key, DataFrame):
   1960             return self._getitem_frame(key)

~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
   1998             key = check_bool_indexer(self.index, key)
   1999             indexer = key.nonzero()[0]
-> 2000             return self.take(indexer, axis=0, convert=False)
   2001         else:
   2002             indexer = self.loc._convert_to_indexer(key, axis=1)

~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\generic.py in take(self, indices, axis, convert, is_copy, **kwargs)
   1926         new_data = self._data.take(indices,
   1927                                    axis=self._get_block_manager_axis(axis),
-> 1928                                    convert=True, verify=True)
   1929         result = self._constructor(new_data).__finalize__(self)
   1930 

~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\internals.py in take(self, indexer, axis, verify, convert)
   4009         new_labels = self.axes[axis].take(indexer)
   4010         return self.reindex_indexer(new_axis=new_labels, indexer=indexer,
-> 4011                                     axis=axis, allow_dups=True)
   4012 
   4013     def merge(self, other, lsuffix='', rsuffix=''):

~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
   3895             new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
   3896                 fill_value if fill_value is not None else blk.fill_value,))
-> 3897                 for blk in self.blocks]
   3898 
   3899         new_axes = list(self.axes)

~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\internals.py in <listcomp>(.0)
   3895             new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
   3896                 fill_value if fill_value is not None else blk.fill_value,))
-> 3897                 for blk in self.blocks]
   3898 
   3899         new_axes = list(self.axes)

~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\internals.py in take_nd(self, indexer, axis, new_mgr_locs, fill_tuple)
   1044             fill_value = fill_tuple[0]
   1045             new_values = algos.take_nd(values, indexer, axis=axis,
-> 1046                                        allow_fill=True, fill_value=fill_value)
   1047 
   1048         if new_mgr_locs is None:

~\AppData\Local\Continuum\anaconda3\envs\acnPJ\lib\site-packages\pandas\core\algorithms.py in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
   1465             out = np.empty(out_shape, dtype=dtype, order='F')
   1466         else:
-> 1467             out = np.empty(out_shape, dtype=dtype)
   1468 
   1469     func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis,

MemoryError: 

0 个答案:

没有答案