我想编写一个泛型函数来用平均值或中值替换Null值或删除表,
def fill_null(df,col_name,action):
df_tmp = df.apply(df[col_name].action())
return df_tmp
if __name__ == '__main__':
df_sal = pd.read_excel("sample_sal.xlsx")
df = fill_null(df_sal,"Salary","mean")
print(df)
错误获取:
AttributeError: 'Series' object has no attribute 'action'
我可以通过以下方式获取输出,但这不是req
if action == "mean":
df[col_name].fillna(df[col_name].mean(), inplace=True)
return df
elif action == "median":
df[col_name].fillna(df[col_name].median(), inplace=True)
return df
elif action == "drop":
df = df.dropna(subset = [col_name])
return df
答案 0 :(得分:0)
我认为需要使用aggregate
或apply
来处理聚合函数:
def fill_null(df,col_name,action):
df[col_name].fillna(df[col_name].agg(action), inplace=True)
#alternative
#df[col_name].fillna(df[col_name].apply(action), inplace=True)
return df
print (fill_null(df, 'D', 'mean'))
但如果还需要dropna
:
def fill_null(df,col_name,action):
if action == "dropna":
df = df.dropna(subset = [col_name])
else:
df[col_name].fillna(df[col_name].agg(action), inplace=True)
return df