在Octave成功解决问题后,我试图将Octave代码转换为Python。
这是我的代码,它没有使用fmin_cg。我看到人们在Python中使用各种各样的最小化器,但我会坚持使用相同名称的那个。
代码:
# Not yet working
#
#
# Reimplementation of Octave code
#
#
# X = [ones(m, 1) X]
#
# [theta] = trainLinearReg([ones(m, 1) X], y, lambda);
#
# initial_theta = zeros(size(X, 2), 1);
# costFunction = @(t) linearRegCostFunction(X, y, t, lambda);
# options = optimset('MaxIter', 200, 'GradObj', 'on');
# % Minimize using fmincg
# theta = fmincg(costFunction, initial_theta, options);
def lr_cost(X, y, theta, lambda_):
m = X.shape[0]
cost = np.sum(np.power(((X * theta.T) - y), 2))/2/m + np.sum(lambda_/2/m * np.power(theta[:, 0:-1], 2))
return cost
def lr_grad(X, y, theta, lambda_):
m = X.shape[0]
grad = 1/m * X.T * (X*theta.T - y) + lambda_/m * np.vstack((0,theta[:, 0:-1]))
return grad.flatten()
def train_lr(X, y, lambda_):
from scipy.optimize import fmin_cg
initial_theta = np.zeros((X.shape[1], 1))
#initial_theta = np.zeros((1, 2))
print(f'initial_theta {initial_theta.shape}')
print(f'X {X.shape}\n{X}')
print(f'y {y.shape}\n{y}')
theta = fmin_cg(lr_cost, x0=initial_theta, args=(X, y, lambda_), maxiter=200)
#theta = fmin_cg(lr_cost, x0=initial_theta, fprime=lr_grad , args=(X, y, lambda_), maxiter=200)
print(f'theta {theta}')
# return theta
lambda_ = 0
train_lr(np.insert(X, 0 , 1, axis=1), y, lambda_)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-171-6fc8b0780a49> in <module>()
39
40 lambda_ = 0
---> 41 train_lr(np.insert(X, 0 , 1, axis=1), y, lambda_)
<ipython-input-171-6fc8b0780a49> in train_lr(X, y, lambda_)
35 print(f'y {y.shape}\n{y}')
36
---> 37 theta = fmin_cg(lr_cost, x0=initial_theta, args=(X, y, lambda_), maxiter=200)
38 print(f'theta {theta}')
39
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in fmin_cg(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback)
1175 'return_all': retall}
1176
-> 1177 res = _minimize_cg(f, x0, args, fprime, callback=callback, **opts)
1178
1179 if full_output:
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in _minimize_cg(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
1226 else:
1227 grad_calls, myfprime = wrap_function(fprime, args)
-> 1228 gfk = myfprime(x0)
1229 k = 0
1230 xk = x0
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
290 def function_wrapper(*wrapper_args):
291 ncalls[0] += 1
--> 292 return function(*(wrapper_args + args))
293
294 return ncalls, function_wrapper
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in approx_fprime(xk, f, epsilon, *args)
686
687 """
--> 688 return _approx_fprime_helper(xk, f, epsilon, args=args)
689
690
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in _approx_fprime_helper(xk, f, epsilon, args, f0)
620 """
621 if f0 is None:
--> 622 f0 = f(*((xk,) + args))
623 grad = numpy.zeros((len(xk),), float)
624 ei = numpy.zeros((len(xk),), float)
/Users/apple/anaconda/lib/python3.6/site-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
290 def function_wrapper(*wrapper_args):
291 ncalls[0] += 1
--> 292 return function(*(wrapper_args + args))
293
294 return ncalls, function_wrapper
<ipython-input-171-6fc8b0780a49> in lr_cost(X, y, theta, lambda_)
18 def lr_cost(X, y, theta, lambda_):
19 m = X.shape[0]
---> 20 cost = np.sum(np.power(((X * theta.T) - y), 2))/2/m + np.sum(lambda_/2/m * np.power(theta[:, 0:-1], 2))
21 return cost
22
ValueError: operands could not be broadcast together with shapes (2,) (1,12)
lr_cost和lr_grad使用:
J = lr_cost(np.insert(X, 0 , 1, axis=1), y, theta, 1)
grad = lr_grad(np.insert(X, 0 , 1, axis=1), y, theta, 1)
J, grad
输出:
(303.99319222026429, matrix([[ -15.30301567, 598.25074417]]))
有没有更好的方法来调试它?
答案 0 :(得分:0)
---> 20 cost = np.sum(np.power(((X * theta.T) - y), 2))/2/m + np.sum(lambda_/2/m * np.power(theta[:, 0:-1], 2))
ValueError: operands could not be broadcast together with shapes (2,) (1,12)
这意味着对于其中一个操作*
,/
或`+&#39;,一个参数是1d,包含2个元素(shape(2,)),另一个参数是是2d(1,12)。
在numpy
数组中,数组可以是0或1d或更高。在Octave / MATLAB中,2d是最小的。 numpy
可以广播。 Octave复制了那个。但是numpy
可以在开始时自动添加维度,例如把a(2,)变成(1,2)。但是对于这对,这没有任何帮助。 A(2,1)可以用(1,12)广播以形成(2,12)结果。
所以你/我们需要打印一些形状,或者从之前的代码中推断其他形式的各种元素的形状。
如果我计算了(),sum
都没有指定轴,所以它们对所有值求和,产生标量。所以+
不是问题所在。