这是我的数据框形状
a.shape
(4899, 48)
然后我做了
a['size'] = a.groupby(['customer_id']).transform(np.size)
错误结果是
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'size'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in set(self, item, value, check)
3714 try:
-> 3715 loc = self.items.get_loc(item)
3716 except KeyError:
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'size'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-18-413c0b2fb69e> in <module>()
----> 1 a['size'] = a.groupby(['customer_id']).transform(np.size)
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
2329 else:
2330 # set column
-> 2331 self._set_item(key, value)
2332
2333 def _setitem_slice(self, key, value):
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
2396 self._ensure_valid_index(value)
2397 value = self._sanitize_column(key, value)
-> 2398 NDFrame._set_item(self, key, value)
2399
2400 # check if we are modifying a copy
~/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in _set_item(self, key, value)
1757
1758 def _set_item(self, key, value):
-> 1759 self._data.set(key, value)
1760 self._clear_item_cache()
1761
~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in set(self, item, value, check)
3716 except KeyError:
3717 # This item wasn't present, just insert at end
-> 3718 self.insert(len(self.items), item, value)
3719 return
3720
~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates)
3817
3818 block = make_block(values=value, ndim=self.ndim,
-> 3819 placement=slice(loc, loc + 1))
3820
3821 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
2717 placement=placement, dtype=dtype)
2718
-> 2719 return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
2720
2721 # TODO: flexible with index=None and/or items=None
~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath)
113 raise ValueError('Wrong number of items passed %d, placement '
114 'implies %d' % (len(self.values),
--> 115 len(self.mgr_locs)))
116
117 @property
ValueError: Wrong number of items passed 47, placement implies 1
答案 0 :(得分:1)
您需要在groupby
之后定义一列,如果每列使用size
,则此处获取DataFrame
- 每列的计数:
a = pd.DataFrame({'A':list('abcdef'),
'B':[4,5,4,5,5,4],
'C':[7,8,9,4,2,3],
'D':[1,3,5,7,1,0],
'E':[5,3,6,9,2,4],
'customer_id':list('aaabbc')})
print (a)
A B C D E customer_id
0 a 4 7 1 5 a
1 b 5 8 3 3 a
2 c 4 9 5 6 a
3 d 5 4 7 9 b
4 e 5 2 1 2 b
5 f 4 3 0 4 c
a['size'] = a.groupby(['customer_id'])['customer_id'].transform(np.size)
#a['size'] = a.groupby(['customer_id'])['A'].transform(np.size)
print (a)
A B C D E customer_id size
0 a 4 7 1 5 a 3
1 b 5 8 3 3 a 3
2 c 4 9 5 6 a 3
3 d 5 4 7 9 b 2
4 e 5 2 1 2 b 2
5 f 4 3 0 4 c 1
#no define column get all columns counts
print (a.groupby(['customer_id']).transform(np.size))
A B C D E size
0 3 3 3 3 3 3
1 3 3 3 3 3 3
2 3 3 3 3 3 3
3 2 2 2 2 2 2
4 2 2 2 2 2 2
5 1 1 1 1 1 1