具有最低分数的单变量特征选择

时间:2017-10-06 06:20:51

标签: python scikit-learn

以下是passing an extra argument to GenericUnivariateSelect without scope tricks

的部分后续内容

我需要在sklearn中使用自定义分数执行单变量特征选择,因此我使用的是GenericUnivariateSelect。 但是,如文档中那样,

选择器的

模式:{'百分位','k_best','fpr','fdr','fwe'}

在我的情况下,我需要选择得分高于某个值的功能,所以我已经实现了:

 class SelectMinScore(_BaseFilter):
     # Sklearn documentation: modes for selectors : {‘percentile’, ‘k_best’,      ‘fpr’, ‘fdr’, ‘fwe’}
def __init__(self, score_func=f_classif, minScore=0.7):
    super(SelectMinScore, self).__init__(score_func)
    self.minScore = minScore
    self.score_func=score_func
def _check_params(self, X, y):
    if not (self.minScore == "all" or 0 <= self.minScore <= 1):
        raise ValueError("minScore should be >=0, <= 1; got %r."
                         "Use minScore='inf' to return all features."
                         % self.minScore)

def _get_support_mask(self):
    check_is_fitted(self, 'scores_')

    if self.minScore == 'all':
        return np.ones(self.scores_.shape, dtype=bool)
    else:
        scores = _clean_nans(self.scores_)
        mask = np.zeros(scores.shape, dtype=bool)
        -------------------------------------
        # Custom part
        # only score above the min
        mask=scores>self.minScore
        if not np.any(mask):
            mask[np.argmax(scores)]=True
        return mask

这对我来说似乎是合法的。当我尝试使用它时:

from sklearn.feature_selection.univariate_selection import _clean_nans
from sklearn.feature_selection.univariate_selection import f_classif                        
import numpy as np
import pandas as pd
from  sklearn.feature_selection import GenericUnivariateSelect
from sklearn.metrics import make_scorer 
from sklearn.feature_selection.univariate_selection import _BaseFilter
from sklearn.pipeline import Pipeline 

def Custom_Score(X,Y,extradata):
  return 1
class myClass:
    def my_method():
     _extradata=np.random.rand(500,1)

     my_scorer = make_scorer(Custom_Score,extradata=_extradata)
     custom_filter=GenericUnivariateSelect(my_scorer,mode='MinScore',param=0.7)   
     custom_filter._selection_modes.update({'MinScore': SelectMinScore})
     MyProcessingPipeline=Pipeline(steps=[('filter_step', custom_filter)])
     # finally test it 
     X=pd.DataFrame(data=np.random.rand(500,3))
     y=pd.DataFrame(data=np.random.rand(500,1))
     MyProcessingPipeline.fit(X,y)
     MyProcessingPipeline.transform(X,y)
     Xt=MyProcessingPipeline.transform(X)

我的期望是让Xt与X相同

这可行吗?

相反,我得到以下内容:

 Traceback (most recent call last):

File "<ipython-input-31-f493745d7e1b>", line 1, in <module>
runfile('C:/Users/\_______\/Desktop/pd-sk-integration.py', wdir='C:/Users/\_______\/Desktop')
File "C:\Users\\_______\\AppData\Local\Continuum\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\Users\\_______\\AppData\Local\Continuum\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/\_______/Desktop/pd-sk-integration.py", line 59, in <module>
MyProcessingPipeline.fit(X,y)
File "C:\Users\\_______\\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\pipeline.py", line 270, in fit
self._final_estimator.fit(Xt, y, **fit_params)
File "C:\Users\_______\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\feature_selection\univariate_selection.py", line 330, in fit
score_func_ret = self.score_func(X, y)
TypeError: __call__() takes at least 4 arguments (3 given)

编辑:

如果我直接通过评分功能  custom_filter=GenericUnivariateSelect(Custom_Score,mode='MinScore',param=0.7)

我明白了:

File "C:\Users\\_____\\AppData\Local\Continuum\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/\_____\/Desktop/pd-sk-integration.py", line 59, in <module>
MyProcessingPipeline.fit(X,y)
File "C:\Users\\_____\\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\pipeline.py", line 270, in fit
self._final_estimator.fit(Xt, y, **fit_params)
File "C:\Users\_____\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\feature_selection\univariate_selection.py", line 330, in fit
score_func_ret = self.score_func(X, y)
TypeError: Custom_Score() takes exactly 3 arguments (2 given)

0 个答案:

没有答案