sklearn中的GridSearchCV的自定义估算器

时间:2020-07-16 15:21:10

标签: python scikit-learn cross-validation gridsearchcv

我正在尝试实现自定义估算器,并使用GridSearchCV优化四个参数。我尚未完全了解这一点,并且遇到了一些问题(请参见下面的更新)。

这是我对coeffs的估计,其中包含我要优化的四个参数。 v1, v2, sm是输入数据(NumPy数组)。

from sklearn.base import BaseEstimator, RegressorMixin
import numpy as np

class wc(BaseEstimator, RegressorMixin):
        def __init__(self, verbose=False):
            self.verbose = verbose
    
        def fit(self, coeffs, v1, v2, sm):
            return self
    
        def predict(self, coeffs, v1, v2, sm):
    
            a = coeffs[0]
            b = coeffs[1]
            c = coeffs[2]
            d = coeffs[3]
    
            theta = 40.0
            t = np.exp(-2 * b * v2 * np.arccos(np.cos(theta * np.pi / 180)))
            sigmav = a * v1 * np.cos(theta) * (1 - t ** 2)
    
            sigmas = c + d * sm
            sigma = sigmav + t ** 2 * sigmas
            return sigma
    
    
wcm = wc()

现在,我定义一个函数来进行网格搜索:

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

def wcm_params_selection(v1, v2, sm, y, nfolds):

    a = np.arange(0.0, 10.0, 0.1)
    b = np.arange(0.0, 10.0, 0.1)
    c = np.arange(-10.0, 0, 0.1)
    d = np.arange(0.0, 10.0, 0.1)

    param_grid = {'a': a, 'b': b, 'c': c, 'd': d}
    my_func = make_scorer(mean_squared_error, greater_is_better=False)
    grid_search = GridSearchCV(wcm, param_grid, scoring=my_func, cv=nfolds)
    grid_search.fit(v1=v1, v2=v2, sm=sm, y=y)
    return grid_search.best_params_ 

运行

v1,v2,sm = np.array([5.0]),np.array([5.0]),np.array([0.5])  # single predictor test values
y = np.array([-10])  # measurement

wcm_params_selection(v1=v1, v2=v2, sm=sm, y=y, nfolds=5)

这会给我错误消息TypeError: fit() missing 1 required positional argument: 'X'

更新: 我已经更新了我的代码很多,我想我越来越近了。得到一些关于此方法的正确性的反馈将是很棒的。有了一些测试数据,我看来最终ab始终为0。

估计器类:

class wc(BaseEstimator, RegressorMixin):
    def __init__(self, a, b, c, d):
        self.a = a
        self.b = b
        self.c = c
        self.d = d

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.is_fitted_ = True
        return self

    def predict(self, X):
        X = check_array(X)
        check_is_fitted(self, 'is_fitted_')

        v1 = X[:, 0]
        v2 = X[:, 0]
        sm = X[:, 1]

        theta = 40.0
        t = np.exp(-2 * self.b * v2 * np.arccos(np.cos(theta * np.pi / 180)))
        sigmav = self.a * v1 * np.cos(theta) * (1 - np.power(t, 2))

        sigmas = self.c + self.d * sm
        sigma = sigmav + np.power(t, 2) * sigmas
        return sigma

网格搜索功能:

def wcm_params_selection(X, y, nfolds):
    a = np.arange(0.0, 5.0, 1)
    b = np.arange(0.0, 10.0, 1)
    c = np.arange(-35, 0, 2)
    d = np.arange(15, 60, 1)

    param_grid = {'a': a, 'b': b, 'c': c, 'd': d}
    my_func = make_scorer(mean_squared_error, greater_is_better=False)
    grid_search = GridSearchCV(wc, param_grid, scoring=my_func, cv=2)
    grid_search.fit(X, y)
    return grid_search.best_params_

并调整模型以预测值:

wcm_params = wcm_params_selection(X, y, 5)
wcmfit = wc(a=wcm_params['a'], b=wcm_params['b'], c=wcm_params['c'], d=wcm_params['d'])
wcmfit = wcmfit.fit(X, y)
out = wcmfit.predict(X)
score = wcmfit.score(X, y)

不幸的是,我完全迷路了,我总是为a和b都收到0,这是不正确的。任何支持将不胜感激。

0 个答案:

没有答案