ValueError:传递的项目数量错误47,展示位置意味着1和KeyError:'size'

时间:2018-05-24 11:39:56

标签: python pandas dataframe

这是我的数据框形状

a.shape
(4899, 48)

然后我做了

a['size'] = a.groupby(['customer_id']).transform(np.size)

错误结果是

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2441             try:
-> 2442                 return self._engine.get_loc(key)
   2443             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'size'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in set(self, item, value, check)
   3714         try:
-> 3715             loc = self.items.get_loc(item)
   3716         except KeyError:

~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2443             except KeyError:
-> 2444                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2445 

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'size'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-18-413c0b2fb69e> in <module>()
----> 1 a['size'] = a.groupby(['customer_id']).transform(np.size)

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   2329         else:
   2330             # set column
-> 2331             self._set_item(key, value)
   2332 
   2333     def _setitem_slice(self, key, value):

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   2396         self._ensure_valid_index(value)
   2397         value = self._sanitize_column(key, value)
-> 2398         NDFrame._set_item(self, key, value)
   2399 
   2400         # check if we are modifying a copy

~/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in _set_item(self, key, value)
   1757 
   1758     def _set_item(self, key, value):
-> 1759         self._data.set(key, value)
   1760         self._clear_item_cache()
   1761 

~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in set(self, item, value, check)
   3716         except KeyError:
   3717             # This item wasn't present, just insert at end
-> 3718             self.insert(len(self.items), item, value)
   3719             return
   3720 

~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates)
   3817 
   3818         block = make_block(values=value, ndim=self.ndim,
-> 3819                            placement=slice(loc, loc + 1))
   3820 
   3821         for blkno, count in _fast_count_smallints(self._blknos[loc:]):

~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   2717                      placement=placement, dtype=dtype)
   2718 
-> 2719     return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
   2720 
   2721 # TODO: flexible with index=None and/or items=None

~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath)
    113             raise ValueError('Wrong number of items passed %d, placement '
    114                              'implies %d' % (len(self.values),
--> 115                                              len(self.mgr_locs)))
    116 
    117     @property

ValueError: Wrong number of items passed 47, placement implies 1

1 个答案:

答案 0 :(得分:1)

您需要在groupby之后定义一列,如果每列使用size,则此处获取DataFrame - 每列的计数:

a = pd.DataFrame({'A':list('abcdef'),
                   'B':[4,5,4,5,5,4],
                   'C':[7,8,9,4,2,3],
                   'D':[1,3,5,7,1,0],
                   'E':[5,3,6,9,2,4],
                   'customer_id':list('aaabbc')})

print (a)
   A  B  C  D  E customer_id
0  a  4  7  1  5           a
1  b  5  8  3  3           a
2  c  4  9  5  6           a
3  d  5  4  7  9           b
4  e  5  2  1  2           b
5  f  4  3  0  4           c
a['size'] = a.groupby(['customer_id'])['customer_id'].transform(np.size)

#a['size'] = a.groupby(['customer_id'])['A'].transform(np.size)
print (a)
   A  B  C  D  E customer_id  size
0  a  4  7  1  5           a     3
1  b  5  8  3  3           a     3
2  c  4  9  5  6           a     3
3  d  5  4  7  9           b     2
4  e  5  2  1  2           b     2
5  f  4  3  0  4           c     1

#no define column get all columns counts
print (a.groupby(['customer_id']).transform(np.size))
   A  B  C  D  E  size
0  3  3  3  3  3     3
1  3  3  3  3  3     3
2  3  3  3  3  3     3
3  2  2  2  2  2     2
4  2  2  2  2  2     2
5  1  1  1  1  1     1