线性回归:成本函数独立工作,但不在scipy.optimize函数中

时间:2017-12-22 20:30:08

标签: python machine-learning linear-regression

我正在实施正则化线性回归。数据可在此处找到:https://onedrive.live.com/?cid=506A31CDF6E4A865&id=506A31CDF6E4A865%21107&parId=root&o=OneUp

我的代码如下:

import numpy as np 
import scipy.optimize as optimize
from scipy.io import loadmat

data = loadmat('ex5data1.mat')
X = data['X']
X = np.insert(X, 0, 1, axis=1)
y = data['y']
theta = np.ones((2, 1))

def cost_function(theta, X, y, reg_param):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    m = y.shape[0]
    h = X * theta
    error = np.power((h - y), 2)
    error = np.sum(error)
    term = error / (2*m)
    reg = (reg_param * np.sum(np.power(theta[1:, :], 2))) / (2*m)

    return term + reg

print "Cost function: \n %s" % (cost_function(theta, X, y, 1))

def cost_function_gradient(theta, X, y, reg_param):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    m = y.shape[0]

    grad = np.zeros((len(X[0]) + 1, 1))
    reg = np.multiply(theta[1:, :], reg_param/m)

    for j in xrange(len(X[0])):
        term = np.multiply((X * theta) - y, X[:, j + 1])
        term = np.sum(term) / m
        grad[j + 1, 0] = term + reg

    grad[0, 0] = np.sum(np.multiply((X*theta - y), X[:, 0])) / m

    return grad

print "Cost function gradient: \n %s" % (cost_function_gradient(theta, X, y, 1))

reg_param = 1
opt = optimize.fmin_cg(cost_function, theta, args=(X, y, reg_param), maxiter=200)

我的问题

在开始尝试优化参数以最小化成本函数之前,cost_function()和cost_function_gradient()函数正常工作,输出正确的结果。但是,然后我开始优化参数,我得到以下错误:

Traceback (most recent call last):
  File "ex5.py", line 49, in <module>
    opt = optimize.fmin_cg(cost_function, theta, args=(X, y, reg_param), maxiter=200)
  File "/Users/Olly/anaconda2/lib/python2.7/site-packages/scipy/optimize/optimize.py", line 1177, in fmin_cg
    res = _minimize_cg(f, x0, args, fprime, callback=callback, **opts)
  File "/Users/Olly/anaconda2/lib/python2.7/site-packages/scipy/optimize/optimize.py", line 1228, in _minimize_cg
    gfk = myfprime(x0)
  File "/Users/Olly/anaconda2/lib/python2.7/site-packages/scipy/optimize/optimize.py", line 292, in function_wrapper
    return function(*(wrapper_args + args))
  File "/Users/Olly/anaconda2/lib/python2.7/site-packages/scipy/optimize/optimize.py", line 688, in approx_fprime
    return _approx_fprime_helper(xk, f, epsilon, args=args)
  File "/Users/Olly/anaconda2/lib/python2.7/site-packages/scipy/optimize/optimize.py", line 622, in _approx_fprime_helper
    f0 = f(*((xk,) + args))
  File "/Users/Olly/anaconda2/lib/python2.7/site-packages/scipy/optimize/optimize.py", line 292, in function_wrapper
    return function(*(wrapper_args + args))
  File "ex5.py", line 17, in cost_function
    h = X * theta
  File "/Users/Olly/anaconda2/lib/python2.7/site-packages/numpy/matrixlib/defmatrix.py", line 309, in __mul__
    return N.dot(self, asmatrix(other))
ValueError: shapes (12,2) and (1,2) not aligned: 2 (dim 1) != 1 (dim 0)

因此,当fmin_cg()函数启动时,X和theta的维度会以不同方式更改/使用。我尝试在应用fmin_cg()之前将X,y和theta更改为矩阵,但这并没有改变任何内容。

有人可以解释一下为什么它在fmin_cg()函数之外工作但不在其中吗?

我如何更改代码才能使其正常工作?

提前致谢。

1 个答案:

答案 0 :(得分:0)

问题在于将参​​数数组定义为

theta = np.ones((2, 1))

这表现在错误中:

ValueError: shapes (12,2) and (1,2) not aligned: 2 (dim 1) != 1 (dim 0)

描述了theta数组和X数组的尺寸不匹配。这可以通过定义θ来解决:

theta = np.ones(2)

这创建了与theta((2,1))完全相同的东西,除了它被重新整形。现在需要对上面的代码进行唯一的区别在于,在它们的乘法中对所有组进行转置,以确保矩阵乘法是合法的。这是完全有效的代码:

import numpy as np 
import matplotlib.pyplot as plt 
import scipy.optimize as optimize 
from scipy.io import loadmat

data = loadmat('ex5data1.mat')
x = data['X']
X = data['X']
X = np.insert(X, 0, 1, axis=1)
y = data['y']
theta = np.ones(2)

def cost_function(theta, X, y, reg_param):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    m = float(y.shape[0])
    h = X * theta.T
    error = np.power((h - y), 2)
    error = np.sum(error)
    term = error / (2*m)
    reg = (reg_param * np.sum(np.power(theta[1:, :], 2))) / (2*m)

return term + reg

print "Cost function: \n %s" % (cost_function(theta, X, y, 1))

def cost_function_gradient(theta, X, y, reg_param):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    m = float(y.shape[0])

    grad = np.zeros((len(X[0]) + 1, 1))
    reg = np.multiply(theta.T[1:, :], reg_param/m)

    for j in xrange(len(X[0])):
        term = np.multiply((X * theta.T) - y, X[:, j + 1])
        term = np.sum(term) / m
        grad[j + 1, 0] = term + reg

    grad[0, 0] = np.sum(np.multiply((X*theta.T - y), X[:, 0])) / m

    return grad

print "Cost function gradient: \n %s" % (cost_function_gradient(theta, X, y, 1))

reg_param = 0
opt = optimize.fmin_cg(cost_function, theta, args=(X, y, reg_param), maxiter=200)