我试图让这个估算器scikit-learn-compatible,以便我可以使用GridSearchCV搜索参数空间。
编辑:
我已根据建议修改了脚本(见下文)。
fit(self, X, y)
__init__
GripdSearchCV仍然存在兼容性问题,可能是因为估算器是一个多标签分类器。
ValueError: Can't handle mix of multilabel-indicator and continuous-multioutput
但这不是重点;属性错误现在消失了。因此,我们可以有把握地得出结论,所建议的修改使估算器与scikit-learn-compatible。
最终代码脚本:
import numpy as np
from sklearn.grid_search import GridSearchCV
from sklearn.datasets import make_classification
from sklearn.preprocessing import LabelBinarizer
from sklearn.cross_validation import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin
class LogisticClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, basis=None, itrs=100, learn_rate=0.1, reg=0.1, momentum=0.5, proj_layer_size=10):
self.W = []
self.A = None
if basis == 'rectifier':
self.basisfunc = self.rectifier_basis
else:
self.basisfunc = self.identity
self.itrs = itrs
self.learn_rate = learn_rate
self.reg = reg
self.momentum = momentum
self.proj_layer_size = proj_layer_size
def identity(self, x):
return np.hstack((x, 1))
def rectifier_basis(self, x):
xn = np.dot(self.A, x)
return self.identity(np.maximum(xn, 0))
def basismap(self, X):
new_dimensions = self.basisfunc(X[0,:]).shape[0]
Xn = np.zeros((X.shape[0], new_dimensions))
for i, xi in enumerate(X):
Xn[i,:] = self.basisfunc(xi)
return Xn
def fit(self, X, Y):
self.A = np.random.uniform(-1, 1, (self.proj_layer_size, X.shape[1]))
Xn = self.basismap(X)
self.W = np.array(np.random.uniform(-0.1, 0.1, (Y.shape[1], Xn.shape[1])))
costs_train, costs_test = [], []
previous_grad = np.zeros(self.W.shape)
for i in range(self.itrs):
grad = self.grad(Xn, Y)
self.W = self.W - self.learn_rate*(grad+self.momentum*previous_grad)
previous_grad = grad
costs_train.append(self.loss(X, Y))
#costs_test.append(self.loss(Xtest, Ytest))
#return (costs_train, costs_test)
return costs_train
def softmax(self, Z):
Z = np.maximum(Z, -1e3)
Z = np.minimum(Z, 1e3)
numerator = np.exp(Z)
return numerator/np.sum(numerator, axis=1).reshape((-1,1))
def predict(self, X):
Xn = self.basismap(X)
return self.softmax(np.dot(Xn, self.W.T))
def grad(self, Xn, Y):
Yh = self.softmax(np.dot(Xn, self.W.T))
return -np.dot(Y.T-Yh.T,Xn)/Xn.shape[0] + self.reg*self.W
def loss(self, X, Y):
Yh = self.predict(X)
return -np.mean(np.mean(Y*np.log(Yh)))-self.reg*np.trace(np.dot(self.W,self.W.T))/self.W.shape[0]
def get_params(self, deep=True):
return {"itrs": self.itrs, "learn_rate": self.learn_rate, "reg": self.reg, "momentum": self.momentum,
"report_cost": self.report_cost, "proj_layer_size": self.proj_layer_size, "iseed": self.iseed}
def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value)
#make data
X, Y = make_classification(n_features=2, n_informative=2, n_redundant=0, n_repeated=0, n_classes=3,
n_clusters_per_class=1, random_state=31)
lb = LabelBinarizer()
Y = lb.fit_transform(Y)
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.25, random_state=5)
#model optimization
param_grid = {'learn_rate': [0.1, 0.01, 0.001],
'reg': [0.001, 0.01]
}
clf = LogisticClassifier(basis='rectifier')
gs_cv = GridSearchCV(clf, param_grid, scoring='accuracy').fit(Xtrain, Ytrain)
print('Best hyperparameters: %r' % gs_cv.best_params_)
答案 0 :(得分:4)
在get_params
方法中,您调用self.itrs
,但您的对象没有此类属性。
另外,我建议您将fit
签名更改为fit(self, X, y)
和
__init__
X
和y
以使用sklearn.cross_validation.train_test_split
进行培训和测试。这使得您的代码更像sklearn,并且与库函数更兼容。