我正在尝试实现自定义估算器,并使用GridSearchCV
优化四个参数。我尚未完全了解这一点,并且遇到了一些问题(请参见下面的更新)。
这是我对coeffs
的估计,其中包含我要优化的四个参数。 v1, v2, sm
是输入数据(NumPy数组)。
from sklearn.base import BaseEstimator, RegressorMixin
import numpy as np
class wc(BaseEstimator, RegressorMixin):
def __init__(self, verbose=False):
self.verbose = verbose
def fit(self, coeffs, v1, v2, sm):
return self
def predict(self, coeffs, v1, v2, sm):
a = coeffs[0]
b = coeffs[1]
c = coeffs[2]
d = coeffs[3]
theta = 40.0
t = np.exp(-2 * b * v2 * np.arccos(np.cos(theta * np.pi / 180)))
sigmav = a * v1 * np.cos(theta) * (1 - t ** 2)
sigmas = c + d * sm
sigma = sigmav + t ** 2 * sigmas
return sigma
wcm = wc()
现在,我定义一个函数来进行网格搜索:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
def wcm_params_selection(v1, v2, sm, y, nfolds):
a = np.arange(0.0, 10.0, 0.1)
b = np.arange(0.0, 10.0, 0.1)
c = np.arange(-10.0, 0, 0.1)
d = np.arange(0.0, 10.0, 0.1)
param_grid = {'a': a, 'b': b, 'c': c, 'd': d}
my_func = make_scorer(mean_squared_error, greater_is_better=False)
grid_search = GridSearchCV(wcm, param_grid, scoring=my_func, cv=nfolds)
grid_search.fit(v1=v1, v2=v2, sm=sm, y=y)
return grid_search.best_params_
运行
v1,v2,sm = np.array([5.0]),np.array([5.0]),np.array([0.5]) # single predictor test values
y = np.array([-10]) # measurement
wcm_params_selection(v1=v1, v2=v2, sm=sm, y=y, nfolds=5)
这会给我错误消息TypeError: fit() missing 1 required positional argument: 'X'
。
更新:
我已经更新了我的代码很多,我想我越来越近了。得到一些关于此方法的正确性的反馈将是很棒的。有了一些测试数据,我看来最终a
和b
始终为0。
估计器类:
class wc(BaseEstimator, RegressorMixin):
def __init__(self, a, b, c, d):
self.a = a
self.b = b
self.c = c
self.d = d
def fit(self, X, y):
X, y = check_X_y(X, y)
self.is_fitted_ = True
return self
def predict(self, X):
X = check_array(X)
check_is_fitted(self, 'is_fitted_')
v1 = X[:, 0]
v2 = X[:, 0]
sm = X[:, 1]
theta = 40.0
t = np.exp(-2 * self.b * v2 * np.arccos(np.cos(theta * np.pi / 180)))
sigmav = self.a * v1 * np.cos(theta) * (1 - np.power(t, 2))
sigmas = self.c + self.d * sm
sigma = sigmav + np.power(t, 2) * sigmas
return sigma
网格搜索功能:
def wcm_params_selection(X, y, nfolds):
a = np.arange(0.0, 5.0, 1)
b = np.arange(0.0, 10.0, 1)
c = np.arange(-35, 0, 2)
d = np.arange(15, 60, 1)
param_grid = {'a': a, 'b': b, 'c': c, 'd': d}
my_func = make_scorer(mean_squared_error, greater_is_better=False)
grid_search = GridSearchCV(wc, param_grid, scoring=my_func, cv=2)
grid_search.fit(X, y)
return grid_search.best_params_
并调整模型以预测值:
wcm_params = wcm_params_selection(X, y, 5)
wcmfit = wc(a=wcm_params['a'], b=wcm_params['b'], c=wcm_params['c'], d=wcm_params['d'])
wcmfit = wcmfit.fit(X, y)
out = wcmfit.predict(X)
score = wcmfit.score(X, y)
不幸的是,我完全迷路了,我总是为a和b都收到0,这是不正确的。任何支持将不胜感激。