这是我的代码。
import pandas as pd
import numpy as np
import json
import seaborn as sb
from sklearn.metrics import log_loss
from sklearn import linear_model
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from scipy.stats import zscore
from Transformers import TextTransformer
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV
%matplotlib inline
df = pd.read_json('data/train.json', encoding = 'utf-8', dtype = {'description': str})
from sklearn.pipeline import Pipeline, FeatureUnion
a = TextTransformer('description', max_features=50)
b = TextTransformer('features', max_features=10)
pipeline = Pipeline([
('feats', FeatureUnion([
('description',a ), # can pass in either a pipeline
('features',b ) # or a transformer
])),
('clf', SVC()) # classifier
])
pipeline.fit(df)
我很好奇的是,我试图预测目标变量df [' interest_level']。但是,pipeline.fit只接受2个参数,其中一个参数是self。我如何传递目标变量呢?
另一点需要注意的是,我尝试使用pipeline.fit(df,y = df [' interest_level']),它也会抛出相同的异常。我正在使用最新版本的pandas / numpy / sklearn。
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-17-0a34f1c24eca> in <module>()
7 ('clf', SVC()) # classifier
8 ])
----> 9 pipeline.fit(df,df['interest_level'])
10 # pg = {'clf__C': [0.1,1]}
11 # grid = GridSearchCV(pipeline, param_grid= pg ,cv = 2)
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/pipeline.pyc in fit(self, X, y, **fit_params)
266 This estimator
267 """
--> 268 Xt, fit_params = self._fit(X, y, **fit_params)
269 if self._final_estimator is not None:
270 self._final_estimator.fit(Xt, y, **fit_params)
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/pipeline.pyc in _fit(self, X, y, **fit_params)
232 pass
233 elif hasattr(transform, "fit_transform"):
--> 234 Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
235 else:
236 Xt = transform.fit(Xt, y, **fit_params_steps[name]) \
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/base.pyc in fit_transform(self, X, y, **fit_params)
495 else:
496 # fit method of arity 2 (supervised transformation)
--> 497 return self.fit(X, y, **fit_params).transform(X)
498
499
TypeError: fit() takes exactly 2 arguments (3 given)
答案 0 :(得分:2)
根据文件你是对的:
您还可以查看此示例:
pipeline / featureunion example
你得到的错误是什么?我会运行以下内容:
pipeline.fit(df[:, -1], df[:, len(df.columns)]-1)