import pandas as pd
def dummies_(df_e):
df_e = pd.get_dummies(df_e)
df_e.fillna(value=0, inplace=True)
dummies_(df)
df.head()
customerID gender SeniorCitizen Partner Dependents tenure PhoneService MultipleLines InternetService OnlineSecurity ... DeviceProtection TechSupport StreamingTV StreamingMovies Contract PaperlessBilling PaymentMethod MonthlyCharges TotalCharges Churn
185 1024-GUALD Female 0 Yes No 1 No No phone service DSL No ... No No No No Month-to-month Yes Electronic check 24.80 24.8 Yes
2715 0484-JPBRU Male 0 No No 41 Yes Yes No No internet service ... No internet service No internet service No internet service No internet service Month-to-month Yes Bank transfer (automatic) 25.25 996.45 No
3825 3620-EHIMZ Female 0 Yes Yes 52 Yes No No No internet service ... No internet service No internet service No internet service No internet service Two year No Mailed check 19.35 1031.7 No
1807 6910-HADCM Female 0 No No 1 Yes No Fiber optic No ... Yes No No No Month-to-month No Electronic check 76.35 76.35 Yes
132 8587-XYZSF Male 0 No No 67 Yes No DSL No ... No Yes No No Two year No Bank transfer (automatic) 50.55 3260.1 No
什么都没有发生,也没有改变!有什么我可能会想念的东西吗?
----更新---- 如果我将return df_e放在函数dummies_()上,它将起作用,但是,如果我尝试将其放在sklearn管道上,则会发生错误。有解决这个问题的主意吗?
def dummies_(df_e):
df_e = pd.get_dummies(df_e)
df_e.fillna(value=0, inplace=True)
return df_e
from sklearn.pipeline import Pipeline
pre_pipeline = Pipeline([
('dummy',dummies_(df2)),
])
prepared_data = pre_pipeline.fit_transform(df2)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-315-ee004e435831> in <module>
----> 7 ('dummy',dummies_(df2)),
8 ])
9
/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in __init__(self, steps, memory)
117 def __init__(self, steps, memory=None):
118 self.steps = steps
--> 119 self._validate_steps()
120 self.memory = memory
121
/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py in _validate_steps(self)
171 raise TypeError("Last step of Pipeline should implement fit. "
172 "'%s' (type %s) doesn't"
--> 173 % (estimator, type(estimator)))
174
175 @property
TypeError: Last step of Pipeline should implement fit. ' SeniorCitizen Partner Dependents tenure PhoneService \
185 0 1 0 1 0
2715 0 0 0 41 1
3825 0 1 1 52 1
1807 0 0 0 1 1
132 0 0 0 67 1
1263 1 1 0 68 1
3732 0 1 1 23 1
1672 0 1 1 72 1
811 0 0 0 70 1
2526 0 0 0 1 1
2892 0 0 0 1 1
4129 0 1 1 34 1
2622 1 1 1 28 1
3801 1 0 0 18 1
4357 0 1 0 15 1
6179 1 0 0 11 1
5275 0 0 0 11 1
4640 0 1 1 1 1
833 0 1 1 37 1
5194 0 0 1 22 1
3328 0 0 0 1 1
101 0 1 1 1 1
6298 0 1 1 46 1
1090 0 0 0 51 1
1744 0 1 0 41 1
6486 0 1 0 5 1
6370 0 0 0 45 1
1882 0 1 1 29 1
3469 1 0 0 15 1
2310 0 0 0 72 0
... ... ... ... ... ...
6848 0 1 0 2 1
710 0 0 0 1 1
2357 0 1 0 67 1
6493 0 1 0 17 0
558 0 0 0 11 1
5927 0 0 0 51 1
2306 0 0 0 3 1
1371 1 0 0 1 1
5872 0 0 1 1 1
6629 1 1 0 72 1
907 0 0 0 19 1
6226 0 0 0 65 1
4376 0 0 0 45 1
5970 0 1 0 5 1
6253 0 0 0 56 0
2453 1 0 0 4 1
4715 0 0 0 64 1
1872 0 1 1 41 1
5773 0 0 0 35 1
3773 0 0 0 1 0
926 1 0 0 3 1
2481 1 1 0 61 1
5481 1 0 0 1 1
3404 0 0 1 35 1
15 0 1 1 69 1
6366 0 1 0 64 1
315 0 1 1 51 1
2439 0 1 1 17 1
5002 0 1 1 69 0
1161 0 0 1 1 0
[1409 rows x 42 columns]' (type <class 'pandas.core.frame.DataFrame'>) doesn't
当我尝试在管道上应用该功能时,发生错误。关于如何使其在sklearn.pipeline中起作用的任何想法或建议?