好的,这是我的代码,用于多分类任务,使用一对一逻辑回归和一些正则化。在过去的两天里,我一直在努力解决这个问题,我不知道为什么它不起作用。
import pandas as pd
import numpy as np
import scipy.optimize as sp
Data = pd.read_csv(Location,
sep=';',
dtype = np.float64,
header = None)
X = Data.ix[:,0:1]
y = Data.ix[:,2:]
y.columns = [0]
def sigmoid(z) :
g = 1.0/(1.0+np.exp(-z))
return g
def lrCostFunction(theta, X, y, lambd):
m , n = X.shape
J=-(y.T.dot(np.log(sigmoid(X.dot(theta))))+(1-y).T.dot(np.log(1-sigmoid(X.dot(theta)))))/m
J = J + (theta.T.dot(theta)- np.power(theta[0,0],2))*(lambd)/(2*m);
return J.ix[0,0]
def Gradient(theta, X, y, lambd):
m , n = X.shape
grad = X.T.dot(sigmoid(X.dot(theta))-y)/m
grad.ix[1:(n-1),:] = grad.ix[1:(n-1),:] + lambd*theta.ix[1:(n-1),:]/m;
return grad.values.flatten().tolist()
def oneVsAll(X, y, num_labels, lambd):
m , n = X.shape
all_theta = pd.DataFrame(data = [[0 for col in range(n+1)] for row in range(num_labels)])
ones = pd.DataFrame(data = [1 for i in range(X.shape[0])])
X = pd.concat([ones,X], axis = 1)
for c in range(0,num_labels-1) :
initial_theta = pd.DataFrame(data = [0 for i in range(n+1)])
theta = sp.minimize(fun = lrCostFunction,
x0 = initial_theta,
args = (X,y,lambd),
method = 'TNC',
jac = Gradient)
all_theta.ix[c,:] = theta
return all_theta
oneVsAll(X, y, 4, 0.1)
它说:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-27-b18648b06674> in <module>()
1 theta = pd.DataFrame(data = [0 for i in range(X.shape[1])])
----> 2 oneVsAll(X, y, 4, 0.1)
<ipython-input-26-ba0f7093d1f6> in oneVsAll(X, y, num_labels, lambd)
10 args = (X,y,lambd),
11 method = 'TNC',
---> 12 jac = Gradient)
13 all_theta.ix[c,:] = theta
14 return all_theta
/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
381 elif meth == 'tnc':
382 return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
--> 383 **options)
384 elif meth == 'cobyla':
385 return _minimize_cobyla(fun, x0, args, constraints, **options)
/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/tnc.pyc in _minimize_tnc(fun, x0, args, jac, bounds, eps, scale, offset, mesg_num, maxCGit, maxiter, eta, stepmx, accuracy, minfev, ftol, xtol, gtol, rescale, disp, callback, **unknown_options)
396 offset, messages, maxCGit, maxfun,
397 eta, stepmx, accuracy, fmin, ftol,
--> 398 xtol, pgtol, rescale, callback)
399
400 funv, jacv = func_and_grad(x)
/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/tnc.pyc in func_and_grad(x)
358 else:
359 def func_and_grad(x):
--> 360 f = fun(x, *args)
361 g = jac(x, *args)
362 return f, g
<ipython-input-24-5f31e87e00da> in lrCostFunction(theta, X, y, lambd)
2 m , n = X.shape
3 J=-(y.T.dot(np.log(sigmoid(X.dot(theta))))+(1-y).T.dot(np.log(1-sigmoid(X.dot(theta)))))/m
----> 4 J = J + (theta.T.dot(theta)- np.power(theta[0,0],2))*(lambd)/(2*m);
5 return J.ix[0,0]
IndexError: too many indices
答案 0 :(得分:0)
我对数学一无所知,但错误来自这段代码:
theta[0,0]
Theta是一个1d数组,所以你需要将其作为theta [0]索引,除非你有一些理由期望它是2d?