DataFrame是:
A = pd.DataFrame({ 'key':['II','I','I','III','II'],
'Z':['a', 'b', 'c', 'd', 'e'],
'd':[1,2,0,2,0],
'e':[0,2,0,3,0],
'f':[0,3,0,4,0],})
我有一个功能(它只是在features
中编码data_aggregate
):
def Encode(data_aggregate, features):
for feature in features:
l = len(data_aggregate)
groups = data_aggregate.groupby(by=feature)
groups_sizes = groups.size()
for key in groups_sizes.keys():
ratio = groups_sizes[key] / l
data_aggregate.loc[data_aggregate.feature == key, [feature]] = ratio
return data_aggregate
当我将A
传递给Encode
时:
new = Encode(A, ['key'])
它返回错误:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-31-e896b315a285> in <module>()
----> 1 A.loc[A.feature == key, [feature]] = 111
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
2742 if name in self._info_axis:
2743 return self[name]
-> 2744 return object.__getattribute__(self, name)
2745
2746 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'feature'
答案 0 :(得分:2)
您不能使用点表示法引用包含字符串的变量名称的列。
试试这个
def Encode(data_aggregate, features):
for feature in features:
l = len(data_aggregate)
groups = data_aggregate.groupby(by=feature)
groups_sizes = groups.size()
for key in groups_sizes.keys():
ratio = groups_sizes[key] / l
data_aggregate.loc[data_aggregate[feature] == key, [feature]] = ratio
return data_aggregate
<强> 溶液 强>
也就是说,你可以用
A.assign(key=A.key.map(A.key.value_counts(normalize=True)))
Z d e f key
0 a 1 0 0 0.4
1 b 2 2 3 0.4
2 c 0 0 0 0.4
3 d 2 3 4 0.2
4 e 0 0 0 0.4
功能化
def Encode(df, features):
d = {k: df[k].map(df[k].value_counts(normalize=True)) for k in features}
return df.assign(**d)
Encode(A, ['key'])
Z d e f key
0 a 1 0 0 0.4
1 b 2 2 3 0.4
2 c 0 0 0 0.4
3 d 2 3 4 0.2
4 e 0 0 0 0.4
Encode(A, ['key', 'Z'])
Z d e f key
0 0.2 1 0 0 0.4
1 0.2 2 2 3 0.4
2 0.2 0 0 0 0.4
3 0.2 2 3 4 0.2
4 0.2 0 0 0 0.4