IndexError:使用Pandas Dataframe的索引太多了

时间:2014-06-05 14:15:51

标签: python

好的,这是我的代码,用于多分类任务,使用一对一逻辑回归和一些正则化。在过去的两天里,我一直在努力解决这个问题,我不知道为什么它不起作用。

import pandas as pd
import numpy as np
import scipy.optimize as sp

Data = pd.read_csv(Location, 
            sep=';',
            dtype = np.float64,
            header = None)
X = Data.ix[:,0:1]
y = Data.ix[:,2:]
y.columns = [0]

def sigmoid(z) :
    g = 1.0/(1.0+np.exp(-z))
    return g

def lrCostFunction(theta, X, y, lambd):
    m , n = X.shape
    J=-(y.T.dot(np.log(sigmoid(X.dot(theta))))+(1-y).T.dot(np.log(1-sigmoid(X.dot(theta)))))/m 
    J = J + (theta.T.dot(theta)- np.power(theta[0,0],2))*(lambd)/(2*m); 
    return J.ix[0,0]

def Gradient(theta, X, y, lambd):
    m , n  = X.shape
    grad = X.T.dot(sigmoid(X.dot(theta))-y)/m
    grad.ix[1:(n-1),:] = grad.ix[1:(n-1),:] + lambd*theta.ix[1:(n-1),:]/m;
    return grad.values.flatten().tolist()    

def oneVsAll(X, y, num_labels, lambd):
    m , n = X.shape
    all_theta = pd.DataFrame(data = [[0 for col in range(n+1)] for row in range(num_labels)])
    ones = pd.DataFrame(data = [1 for i in range(X.shape[0])])
    X = pd.concat([ones,X], axis = 1)
    for c in range(0,num_labels-1) : 
        initial_theta = pd.DataFrame(data = [0 for i in range(n+1)])
        theta = sp.minimize(fun = lrCostFunction, 
                                    x0 = initial_theta, 
                                    args = (X,y,lambd),
                                    method = 'TNC',
                                    jac = Gradient)
        all_theta.ix[c,:] = theta
    return all_theta
oneVsAll(X, y, 4, 0.1)

它说:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-27-b18648b06674> in <module>()
      1 theta = pd.DataFrame(data = [0 for i in range(X.shape[1])])
----> 2 oneVsAll(X, y, 4, 0.1)

<ipython-input-26-ba0f7093d1f6> in oneVsAll(X, y, num_labels, lambd)
     10                                     args = (X,y,lambd),
     11                                     method = 'TNC',
---> 12                                     jac = Gradient)
     13         all_theta.ix[c,:] = theta
     14     return all_theta

/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
    381     elif meth == 'tnc':
    382         return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
--> 383                              **options)
    384     elif meth == 'cobyla':
    385         return _minimize_cobyla(fun, x0, args, constraints, **options)

/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/tnc.pyc in _minimize_tnc(fun, x0, args, jac, bounds, eps, scale, offset, mesg_num, maxCGit, maxiter, eta, stepmx, accuracy, minfev, ftol, xtol, gtol, rescale, disp, callback, **unknown_options)
    396                                         offset, messages, maxCGit, maxfun,
    397                                         eta, stepmx, accuracy, fmin, ftol,
--> 398                                         xtol, pgtol, rescale, callback)
    399 
    400     funv, jacv = func_and_grad(x)

/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/tnc.pyc in func_and_grad(x)
    358     else:
    359         def func_and_grad(x):
--> 360             f = fun(x, *args)
    361             g = jac(x, *args)
    362             return f, g

<ipython-input-24-5f31e87e00da> in lrCostFunction(theta, X, y, lambd)
      2     m , n = X.shape
      3     J=-(y.T.dot(np.log(sigmoid(X.dot(theta))))+(1-y).T.dot(np.log(1-sigmoid(X.dot(theta)))))/m
----> 4     J = J + (theta.T.dot(theta)- np.power(theta[0,0],2))*(lambd)/(2*m);
      5     return J.ix[0,0]

IndexError: too many indices

1 个答案:

答案 0 :(得分:0)

我对数学一无所知,但错误来自这段代码:

theta[0,0]

Theta是一个1d数组,所以你需要将其作为theta [0]索引,除非你有一些理由期望它是2d?