我训练了一个简单的机器学习模型,一个多项式回归。预测函数的伪代码如下:
def f(x):
"""
x is a np.ndarray of shape (m, )
"""
# X is stacked of x ** 0, x ** 1, x ** 2, ..., x ** (n - 1) by rows
# X is of shape of (m, n)
# m is the number of training examples
X = generate(x)
Y = np.dot(X, W)
return Y
W
是经过训练的参数。这里Y的形状是(m,1),但是如果我返回Y.squeeze(),比如说形状(m,),我在测试集上得到一个非常不同的标准偏差,比如前者为70,对于前者为8。后者。
我使用随机初始化,但我已经多次训练和测试,压缩版本的标准要小得多。所以我只是想知道为什么。
我只是在下面显示完整的代码,您可以自己测试。我的问题在第90行和第91行
# python: 3.5.2
# encoding: utf-8
# numpy: 1.14.1
import numpy as np
import matplotlib.pyplot as plt
def load_data(filename):
xys = []
with open(filename, 'r') as f:
for line in f:
xys.append(map(float, line.strip().split()))
xs, ys = zip(*xys)
return np.asarray(xs), np.asarray(ys)
def evaluate(ys, ys_pred):
std = np.sqrt(np.mean(np.abs(ys - ys_pred) ** 2))
return std
def linear_regression(x_train, y_train, n=2, learning_rate=0.0005, epochs=1000, l2=0, Print=False):
"""
This target function is: y = b + w1 * x^1 + w2 * x^2 + ...
also y = b + np.dot(w.T, x)
:param x_train: np.ndarray
:param y_train: np.ndarray
:return: a trained model (as a function), trained by x_train and y_train
"""
# get the number of train e.g.
m = x_train.shape[0]
# set and initialize parameters here
# intercept
b = np.float64(-10)
# weights
w = np.float64(np.random.randn(n, 1))
# convert the x_train matrix to a design matrix
X = np.zeros((n, m), dtype=np.float64)
for i in range(n):
X[i, :] = x_train ** (i + 1)
X = np.float64(X)
Y = np.float64(np.reshape(y_train, newshape=(1, m)))
# if plot of the training process is needed
costs = []
# train on the dataset
for epoch in range(epochs):
# compute the gradient of cost on w
Z = b + np.dot(w.T, X)
dZ = Z - Y
dw = 1./m * np.dot(X, dZ.T)
db = 1./m * np.squeeze(np.sum(dZ))
# update the parameters, for w, I also set "weight decay"
w -= learning_rate * dw + l2 * w
b -= learning_rate * db
cost = np.squeeze(0.5/m * np.dot(dZ, dZ.T))
costs.append(cost)
if Print == True and epoch % 25 == 0:
print("Cost after " + str(epoch) + " iterations " + ": " + str(cost))
# plot the costs
if Print == True:
plt.plot(costs)
plt.show()
def pred(x):
assert type(x) is np.ndarray
m = x.shape[0]
# convert the x_train matrix to a design matrix
X = np.zeros((n, m))
for i in range(n):
X[i, :] = x ** (i + 1)
# to predict
Y = b + np.dot(w.T, X)
return Y.T
# return Y.squeeze()
return pred
if __name__ == '__main__':
train_file = 'train.txt'
test_file = 'test.txt'
# load data
x_train, y_train = load_data(train_file)
x_test, y_test = load_data(test_file)
print(x_train.shape)
print(x_test.shape)
# use a trained linear-regression model
f = linear_regression(x_train, y_train, n=2, epochs=10000, Print=False, learning_rate=1e-8, l2=5e-2)
# compute the predictions
y_test_pred = f(x_test)
# use the test set to evaluate the model
std = evaluate(y_test, y_test_pred)
print('the standard deviation:{:.1f}'.format(std))
# show the result
plt.plot(x_train, y_train, 'ro', markersize=3)
plt.plot(x_test, y_test, 'k')
plt.plot(x_test, y_test_pred)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Linear Regression')
plt.legend(['train', 'test', 'pred'])
plt.show()