我建立了线性回归分类器。
import numpy as np
np.seterr(all='ignore');
def normalize(X):
X = np.array(X)
mean = X.mean(axis=0)
std = X.std(axis=0)
return (np.nan_to_num(np.divide(np.subtract(X, mean), std)), mean, std)
class LinearModel():
def fit(self, X=[[]], y=[], alpha=0, lambda_=0, tol=0):
try:
X = np.array(X)
X, self.mean, self.std = normalize(X)
m = X.shape[0]
X = np.concatenate([np.ones((m, 1)), X], axis=1)
n = X.shape[1]
theta = np.zeros((n, 1))
J_hist = np.array([-1, 0])
y = np.array(y).reshape(-1, 1)
except ValueError:
self.theta = theta
return theta, J_hist[2:]
while not np.isclose(J_hist[-1], J_hist[-2], rtol=tol):
pred = np.matmul(X, theta)
J = 1/(2*m) * np.sum((pred - y) ** 2) + lambda_/(2*m) * np.sum(theta ** 2)
grad = 1/m * np.matmul(X.T, pred - y)
grad[2:] += lambda_/m * theta[2:]
theta -= alpha * grad
J_hist = np.append(J_hist, J)
self.J_hist_ = J_hist[2:]
self.theta_ = theta
return self
def predict(self, X=[[]]):
X = np.array(X)
X = np.nan_to_num((X - self.mean) / self.std)
m = X.shape[0]
return np.matmul(np.concatenate([np.ones((m, 1)), X], axis=1), self.theta_)
在python中。 在模型中,我尝试绘制成本函数
(1/(2*#of_training_examples)*sum((prediction - target)^2))
在不同的正则化参数lambda的验证集上的。成本图如下所示:
为什么它表现得如此怪异并到处跳跃?代码中是否有错误?还是这是成本函数的逻辑/数学性质? 顺便说一句,我尝试了另一套,成本函数的行为方式相同...