Question

我训练了一个简单的机器学习模型，一个多项式回归。预测函数的伪代码如下：

def f(x):
    """
    x is a np.ndarray of shape (m, )
    """
    # X is stacked of x ** 0, x ** 1, x ** 2, ..., x ** (n - 1) by rows
    # X is of shape of (m, n)
    # m is the number of training examples
    X = generate(x)

    Y = np.dot(X, W)
    return Y

W是经过训练的参数。这里Y的形状是（m，1），但是如果我返回Y.squeeze（），比如说形状（m，），我在测试集上得到一个非常不同的标准偏差，比如前者为70，对于前者为8。后者。

我使用随机初始化，但我已经多次训练和测试，压缩版本的标准要小得多。所以我只是想知道为什么。

我只是在下面显示完整的代码，您可以自己测试。我的问题在第90行和第91行

# python: 3.5.2
# encoding: utf-8
# numpy: 1.14.1

import numpy as np
import matplotlib.pyplot as plt


def load_data(filename):
    xys = []
    with open(filename, 'r') as f:
        for line in f:
            xys.append(map(float, line.strip().split()))
        xs, ys = zip(*xys)
        return np.asarray(xs), np.asarray(ys)


def evaluate(ys, ys_pred):
    std = np.sqrt(np.mean(np.abs(ys - ys_pred) ** 2))
    return std


def linear_regression(x_train, y_train, n=2, learning_rate=0.0005, epochs=1000, l2=0, Print=False):
    """
    This target function is: y = b + w1 * x^1 + w2 * x^2 + ...
    also y = b + np.dot(w.T, x)

    :param x_train: np.ndarray
    :param y_train: np.ndarray

    :return: a trained model (as a function), trained by x_train and y_train
    """

    # get the number of train e.g.
    m = x_train.shape[0]

    # set and initialize parameters here
    # intercept
    b = np.float64(-10)
    # weights
    w = np.float64(np.random.randn(n, 1))

    # convert the x_train matrix to a design matrix
    X = np.zeros((n, m), dtype=np.float64)
    for i in range(n):
        X[i, :] = x_train ** (i + 1)
    X = np.float64(X)
    Y = np.float64(np.reshape(y_train, newshape=(1, m)))

    # if plot of the training process is needed
    costs = []

    # train on the dataset
    for epoch in range(epochs):
        # compute the gradient of cost on w

        Z = b + np.dot(w.T, X)

        dZ = Z - Y
        dw = 1./m * np.dot(X, dZ.T)
        db = 1./m * np.squeeze(np.sum(dZ))

        # update the parameters, for w, I also set "weight decay"
        w -= learning_rate * dw + l2 * w
        b -= learning_rate * db

        cost = np.squeeze(0.5/m * np.dot(dZ, dZ.T))
        costs.append(cost)
        if Print == True and epoch % 25 == 0:
            print("Cost after " + str(epoch) + " iterations " + ": " + str(cost))

    # plot the costs
    if Print == True:
        plt.plot(costs)
        plt.show()

    def pred(x):

        assert type(x) is np.ndarray
        m = x.shape[0]

        # convert the x_train matrix to a design matrix
        X = np.zeros((n, m))
        for i in range(n):
            X[i, :] = x ** (i + 1)

        # to predict
        Y = b + np.dot(w.T, X)

        return Y.T
        # return Y.squeeze()

    return pred


if __name__ == '__main__':
    train_file = 'train.txt'
    test_file = 'test.txt'

    # load data
    x_train, y_train = load_data(train_file)
    x_test, y_test = load_data(test_file)
    print(x_train.shape)
    print(x_test.shape)

    # use a trained linear-regression model
    f = linear_regression(x_train, y_train, n=2, epochs=10000, Print=False, learning_rate=1e-8, l2=5e-2)

    # compute the predictions
    y_test_pred = f(x_test)

    # use the test set to evaluate the model
    std = evaluate(y_test, y_test_pred)
    print('the standard deviation：{:.1f}'.format(std))

    # show the result
    plt.plot(x_train, y_train, 'ro', markersize=3)
    plt.plot(x_test, y_test, 'k')
    plt.plot(x_test, y_test_pred)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title('Linear Regression')
    plt.legend(['train', 'test', 'pred'])
    plt.show()

numpy挤压副作用

0 个答案: