Question

我建立了线性回归分类器。

import numpy as np
np.seterr(all='ignore');
def normalize(X):
    X = np.array(X)
    mean = X.mean(axis=0)
    std = X.std(axis=0)
    return (np.nan_to_num(np.divide(np.subtract(X, mean), std)), mean, std)
class LinearModel():

    def fit(self, X=[[]], y=[], alpha=0, lambda_=0, tol=0):
        try:
            X = np.array(X)
            X, self.mean, self.std = normalize(X)
            m = X.shape[0]
            X = np.concatenate([np.ones((m, 1)), X], axis=1)
            n = X.shape[1]
            theta = np.zeros((n, 1))
            J_hist = np.array([-1, 0])
            y = np.array(y).reshape(-1, 1)
        except ValueError:
            self.theta = theta
            return theta, J_hist[2:]
        while not np.isclose(J_hist[-1], J_hist[-2], rtol=tol):
            pred = np.matmul(X, theta)
            J = 1/(2*m) * np.sum((pred - y) ** 2) + lambda_/(2*m) * np.sum(theta ** 2)
            grad = 1/m * np.matmul(X.T, pred - y)
            grad[2:] += lambda_/m * theta[2:]
            theta -= alpha * grad
            J_hist = np.append(J_hist, J)
        self.J_hist_ = J_hist[2:]
        self.theta_ = theta
        return self

    def predict(self, X=[[]]):
        X = np.array(X)
        X = np.nan_to_num((X - self.mean) / self.std)
        m = X.shape[0]
        return np.matmul(np.concatenate([np.ones((m, 1)), X], axis=1), self.theta_)

在python中。在模型中，我尝试绘制成本函数

(1/(2*#of_training_examples)*sum((prediction - target)^2))

在不同的正则化参数lambda的验证集上的

。成本图如下所示：

为什么它表现得如此怪异并到处跳跃？代码中是否有错误？还是这是成本函数的逻辑/数学性质？顺便说一句，我尝试了另一套，成本函数的行为方式相同...

成本函数相对于正则化参数的怪异行为（线性回归）

0 个答案: