我有一个数据框,尝试使用np.select创建新列时遇到错误。
conditions_decisions = [(Daily_Report['Payment_type']=='a'),
(Daily_Report['Payment_type']=='b'),
(Daily_Report['Payment_type']=='c'),
(Daily_Report['Payment_type']=='d'),
(Daily_Report['Payment_type']=='e'),
(Daily_Report['Payment_type']=='f'),
(Daily_Report['Payment_type']=='g')]
choices_decision=['1', '2', '3', '4', '5', '6', '7']
Daily_Report['Paymentmethod']=np.select(conditions_decisions, choices_decision)
我想基于“ Payment_type”列中的元素在数据框Daily_Report中获得一个名为“ Paymentmethod”的新列:如果“ Payment_type”中为“ a”,则“ Paymentmethod”中为“ 1”,等等。< / p>
KeyError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3077 try:
-> 3078 return self._engine.get_loc(key)
3079 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._get_loc_duplicates()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._maybe_get_bool_indexer()
KeyError: 'Paymentmethod'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in set(self, item, value, check)
4242 try:
-> 4243 loc = self.items.get_loc(item)
4244 except KeyError:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3079 except KeyError:
-> 3080 return self._engine.get_loc(self._maybe_cast_indexer(key))
3081
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._get_loc_duplicates()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine._maybe_get_bool_indexer()
KeyError: 'Paymentmethod'
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-16-4fca93a5ac39> in <module>()
7 (Daily_Report['Payment_type']=='g')]
8 choices_decision=['1', '2', '3', '4', '5', '6', '7']
----> 9 Daily_Report['Paymentmethod']=np.select(conditions_decisions, choices_decision)
10 #Daily_Report
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in __setitem__(self, key, value)
3117 else:
3118 # set column
-> 3119 self._set_item(key, value)
3120
3121 def _setitem_slice(self, key, value):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in _set_item(self, key, value)
3193 self._ensure_valid_index(value)
3194 value = self._sanitize_column(key, value)
-> 3195 NDFrame._set_item(self, key, value)
3196
3197 # check if we are modifying a copy
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py in _set_item(self, key, value)
2598
2599 def _set_item(self, key, value):
-> 2600 self._data.set(key, value)
2601 self._clear_item_cache()
2602
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in set(self, item, value, check)
4244 except KeyError:
4245 # This item wasn't present, just insert at end
-> 4246 self.insert(len(self.items), item, value)
4247 return
4248
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in insert(self, loc, item, value, allow_duplicates)
4345
4346 block = make_block(values=value, ndim=self.ndim,
-> 4347 placement=slice(loc, loc + 1))
4348
4349 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
3203 placement=placement, dtype=dtype)
3204
-> 3205 return klass(values, ndim=ndim, placement=placement)
3206
3207 # TODO: flexible with index=None and/or items=None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in __init__(self, values, placement, ndim)
2301
2302 super(ObjectBlock, self).__init__(values, ndim=ndim,
-> 2303 placement=placement)
2304
2305 @property
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\internals.py in __init__(self, values, placement, ndim)
123 raise ValueError(
124 'Wrong number of items passed {val}, placement implies '
--> 125 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
126
127 def _check_ndim(self, values, ndim):
ValueError: Wrong number of items passed 2, placement implies 1
答案 0 :(得分:0)
我认为最好在Series.replace
中使用Series.map
或Series.fillna
:
d = {'a':'1', 'b':'2', 'c':'3', 'd':'4', 'e':'5', 'f':'6', 'g':'7'}
Daily_Report['Paymentmethod'] = Daily_Report['Payment_type'].replace(d)
Daily_Report['Paymentmethod'] = (Daily_Report['Payment_type'].map(d)
.fillna(Daily_Report['Payment_type']))
您的错误意味着列名重复:
Daily_Report = pd.DataFrame({
'Payment_type':list('abcdef'),
'F':list('aaabbb')
}).rename(columns={'F':'Payment_type'})
print (Daily_Report)
Payment_type Payment_type
0 a a
1 b a
2 c a
3 d b
4 e b
5 f b
conditions_decisions = [(Daily_Report['Payment_type']=='a'),
(Daily_Report['Payment_type']=='b'),
(Daily_Report['Payment_type']=='c'),
(Daily_Report['Payment_type']=='d'),
(Daily_Report['Payment_type']=='e'),
(Daily_Report['Payment_type']=='f'),
(Daily_Report['Payment_type']=='g')]
choices_decision=['1', '2', '3', '4', '5', '6', '7']
Daily_Report['Paymentmethod']=np.select(conditions_decisions, choices_decision)
print (Daily_Report)
ValueError:传递的项目数错误2,展示位置表示1
解决方案将重命名重复的列,例如:
Daily_Report = pd.DataFrame({
'Payment_type':list('abcdef'),
'A':list('aaabbb'),
'F':list('aaabbb')
}).rename(columns={'F':'Payment_type'})
print (Daily_Report)
Payment_type A Payment_type
0 a a a
1 b a a
2 c a a
3 d b b
4 e b b
5 f b b
s = Daily_Report.columns.to_series()
Daily_Report.columns = s + s.groupby(s).cumcount().astype(str).radd('_').replace('_0', '')
print (Daily_Report)
Payment_type A Payment_type_1
0 a a a
1 b a a
2 c a a
3 d b b
4 e b b
5 f b b
conditions_decisions = [(Daily_Report['Payment_type']=='a'),
(Daily_Report['Payment_type']=='b'),
(Daily_Report['Payment_type']=='c'),
(Daily_Report['Payment_type']=='d'),
(Daily_Report['Payment_type']=='e'),
(Daily_Report['Payment_type']=='f'),
(Daily_Report['Payment_type']=='g')]
choices_decision=['1', '2', '3', '4', '5', '6', '7']
Daily_Report['Paymentmethod']=np.select(conditions_decisions, choices_decision)
print (Daily_Report)
Payment_type A Payment_type_1 Paymentmethod
0 a a a 1
1 b a a 2
2 c a a 3
3 d b b 4
4 e b b 5
5 f b b 6