我注意到sklearn.linear_model.SGDClassifier
实现了线性模型的梯度下降,因此可以说明该类将拟合程序(SGD)和模型(线性模型)合二为一类。
特别是,我使用theano实现了一个模型并将其包装在fit / predict界面中。 Theano很酷,因为它允许人们定义一个可调用,它在一个样本或一组样本上应用梯度下降,以及一个在调试集上返回错误的可调用。但是这种酷感并不是theano固有的,更多的模型可以简单地定义更新和错误评估函数,然后可以通过不同的迭代和停止策略来使用它们。
theano示例通常使用minibatch,并且只需稍加调整就可以复制粘贴或重新实现miniatch代码,这些调整很容易被考虑在内。所以我希望sklearn实现一些你用一些参数初始化的东西,并且可以调用更新/错误来适应“任何模型”。或者可能有一些关于如何自己做这个的好习惯(特别是w.r.t.钳工的界面)。
有没有这样的(在sklearn中),那是没有定义模型的Fitters?
*在线性模型和l2成本函数的特定情况下,当然不存在局部最优,但仍然存在。
修改
很公平,这需要一个建议。我对这两个类进行了编码,这些类并非100%干净,但它们让我知道我的意思:
import numpy
class StochasticUpdate():
def __init__(self, model, update, n_epochs, n_data_points, error=None, test_fraction=None):
self.update = update
self.n_epochs = n_epochs
self.n_data_points = n_data_points
self.error = error
self.model = model
if self.error is None and test_fraction is not None:
raise ValueError('error parameter must be specified if a test_faction (value: %s) should be used.' % test_fraction)
self.do_test = test_fraction is not None
self.n_train_samples = int(n_data_points - test_fraction) if self.do_test else n_data_points
if self.do_test:
self.test_range = numpy.arange(self.n_train_samples, n_data_points)
self.n_test_samples = int(n_data_points * test_fraction)
self.train_range = numpy.arange(0, self.n_train_samples)
def fit(self):
if self.do_test: self.test_errors = []
self.train_errors = []
self.mean_cost_values = []
for epoch in range(self.n_epochs):
order = numpy.random.permutation(self.n_train_samples)
mean_cost_value = 0
for i in range(self.n_train_samples):
mean_cost_value += self.update([order[i]])
self.mean_cost_values.append(mean_cost_value/ self.n_data_points)
if self.error is not None:
self.train_errors.append(self.error(self.train_range))
if self.do_test:
self.test_errors.append(self.error(self.test_range))
return self.model
from math import ceil
class MinibatchStochasticUpdate(StochasticUpdate):
def __init__(self, model, update, n_epochs, n_data_points, error, batch_size, patience=5000, patience_increase=2,
improvement_threshold = 0.995, validation_frequency=None, validate_faction=0.1, test_fraction=None):
super().__init__(self, model, update, n_data_points, error, test_fraction)
self.update = update
self.n_epochs = n_epochs
self.n_data_points = n_data_points
self.model = model
self.batch_size = batch_size
self.patience = patience
self.patience_increase = patience_increase
self.improvement_threshold = improvement_threshold
self.n_validation_samples = int(n_data_points * validate_faction)
self.validation_range = numpy.arange(self.n_train_samples, self.n_train_samples + self.n_validation_samples)
self.n_train_batches = int(ceil(n_data_points / batch_size))
self.n_train_batches = int(ceil(self.n_train_samples / self.batch_size))
self.train_batch_ranges = [
numpy.arange(minibatch_index * self.batch_size, min((minibatch_index+1) * self.batch_size, self.n_train_samples))
for minibatch_index in range(self.n_train_batches)
]
self.validation_frequency = min(self.n_train_batches, patience/2) if validation_frequency is None else validation_frequency
def fit(self):
self.best_validation_error = numpy.inf
best_params = None
iteration = 0
for epoch in range(self.n_epochs):
for minibatch_index in range(self.n_train_batches):
self.update(self.train_batch_ranges[minibatch_index])
if (iter + 1) % self.validation_frequency == 0:
current_validation_error = self.error(self.validation_error)
if current_validation_error < self.best_validation_error:
if current_validation_error < self.best_validation_error * self.improvement_threshold:
patience = max(self.patience, iter * self.patience_increase)
best_params = self.model.copy_parameters()
self.best_validation_error = current_validation_error
if iteration <= patience:
self.model.set_parameters(best_params)
return self.model
iteration += 1
self.model.set_parameters(best_params)
return self.model
然后,为了适应模型,可以支持不同的训练方法并停止这样的标准:
def fit(self, X, y):
X_shared = theano.shared(X, borrow=True)
y_shared = theano.shared(y, borrow=True)
learning_rate = self.training_method_options['learning_rate']
trainer = {
'stochastic_gradient_descent': lambda: StochasticUpdate(
self,
update=self.update_stochastic_gradient_descent_function(X_shared, y_shared, learning_rate),
n_epochs=self.training_method_options['n_epochs'],
n_data_points=X.shape[0],
error=self.evaluation_function(X_shared, y_shared),
),
'minibatch_gradient_descent': lambda: MinibatchStochasticUpdate(
self,
update=self.update_stochastic_gradient_descent_function(X_shared, y_shared, learning_rate),
n_epochs=self.training_method_options['n_epochs'],
n_data_points=X.shape[0],
error=self.evaluation_function(X_shared, y_shared),
batch_size=self.training_method_options['batch_size']
)
}[self.training_method]()
trainer.fit()
return self
很明显,哈希映射部分是hacky,并且可以使用上面两个类的标准化接口更优雅地完成(因为对于N个fitters和M模型,哈希映射的大小仍为O(N * M))。 / p>