以下是passing an extra argument to GenericUnivariateSelect without scope tricks
的部分后续内容我需要在sklearn中使用自定义分数执行单变量特征选择,因此我使用的是GenericUnivariateSelect。 但是,如文档中那样,
选择器的模式:{'百分位','k_best','fpr','fdr','fwe'}
在我的情况下,我需要选择得分高于某个值的功能,所以我已经实现了:
class SelectMinScore(_BaseFilter):
# Sklearn documentation: modes for selectors : {‘percentile’, ‘k_best’, ‘fpr’, ‘fdr’, ‘fwe’}
def __init__(self, score_func=f_classif, minScore=0.7):
super(SelectMinScore, self).__init__(score_func)
self.minScore = minScore
self.score_func=score_func
def _check_params(self, X, y):
if not (self.minScore == "all" or 0 <= self.minScore <= 1):
raise ValueError("minScore should be >=0, <= 1; got %r."
"Use minScore='inf' to return all features."
% self.minScore)
def _get_support_mask(self):
check_is_fitted(self, 'scores_')
if self.minScore == 'all':
return np.ones(self.scores_.shape, dtype=bool)
else:
scores = _clean_nans(self.scores_)
mask = np.zeros(scores.shape, dtype=bool)
-------------------------------------
# Custom part
# only score above the min
mask=scores>self.minScore
if not np.any(mask):
mask[np.argmax(scores)]=True
return mask
这对我来说似乎是合法的。当我尝试使用它时:
from sklearn.feature_selection.univariate_selection import _clean_nans
from sklearn.feature_selection.univariate_selection import f_classif
import numpy as np
import pandas as pd
from sklearn.feature_selection import GenericUnivariateSelect
from sklearn.metrics import make_scorer
from sklearn.feature_selection.univariate_selection import _BaseFilter
from sklearn.pipeline import Pipeline
def Custom_Score(X,Y,extradata):
return 1
class myClass:
def my_method():
_extradata=np.random.rand(500,1)
my_scorer = make_scorer(Custom_Score,extradata=_extradata)
custom_filter=GenericUnivariateSelect(my_scorer,mode='MinScore',param=0.7)
custom_filter._selection_modes.update({'MinScore': SelectMinScore})
MyProcessingPipeline=Pipeline(steps=[('filter_step', custom_filter)])
# finally test it
X=pd.DataFrame(data=np.random.rand(500,3))
y=pd.DataFrame(data=np.random.rand(500,1))
MyProcessingPipeline.fit(X,y)
MyProcessingPipeline.transform(X,y)
Xt=MyProcessingPipeline.transform(X)
我的期望是让Xt与X相同
这可行吗?
相反,我得到以下内容:
Traceback (most recent call last):
File "<ipython-input-31-f493745d7e1b>", line 1, in <module>
runfile('C:/Users/\_______\/Desktop/pd-sk-integration.py', wdir='C:/Users/\_______\/Desktop')
File "C:\Users\\_______\\AppData\Local\Continuum\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\Users\\_______\\AppData\Local\Continuum\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/\_______/Desktop/pd-sk-integration.py", line 59, in <module>
MyProcessingPipeline.fit(X,y)
File "C:\Users\\_______\\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\pipeline.py", line 270, in fit
self._final_estimator.fit(Xt, y, **fit_params)
File "C:\Users\_______\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\feature_selection\univariate_selection.py", line 330, in fit
score_func_ret = self.score_func(X, y)
TypeError: __call__() takes at least 4 arguments (3 given)
编辑:
如果我直接通过评分功能
custom_filter=GenericUnivariateSelect(Custom_Score,mode='MinScore',param=0.7)
我明白了:
File "C:\Users\\_____\\AppData\Local\Continuum\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/\_____\/Desktop/pd-sk-integration.py", line 59, in <module>
MyProcessingPipeline.fit(X,y)
File "C:\Users\\_____\\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\pipeline.py", line 270, in fit
self._final_estimator.fit(Xt, y, **fit_params)
File "C:\Users\_____\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\feature_selection\univariate_selection.py", line 330, in fit
score_func_ret = self.score_func(X, y)
TypeError: Custom_Score() takes exactly 3 arguments (2 given)