在Python中规范化Logistic回归

时间:2016-10-24 16:51:39

标签: python machine-learning scipy logistic-regression

我在python中使用以下代码进行逻辑回归和正则化。它使我在训练集上的准确率达到80%。

我正在使用最小化方法'TNC'。使用BFG,结果为50%。 用于梯度下降的理想方法(相当于Octave中的fminunc)是什么? 如何增加或减少迭代? 什么是默认迭代? 还有其他改善绩效的建议/方法吗?

Octave with fminunc中的算法在训练集上的准确率为83%。

import numpy as np
import scipy.optimize as op
from sklearn import preprocessing
import matplotlib.pyplot as plt
from matplotlib import style
from pylab import scatter, show, legend, xlabel, ylabel
from numpy import loadtxt, where
from sklearn.preprocessing import PolynomialFeatures

def sigmoid(z):
    return 1/(1 + np.exp(-z));

def Gradient(theta,X,y,l):
    m,n = X.shape
    #print("theta shape")
    #print(theta.shape)
    theta = theta.reshape((n,1))
    thetaR = theta[1:n,:]
    y = y.reshape((m,1))
    h = sigmoid(X.dot(theta))
    nonRegGrad = ((np.sum(((h-y)*X),axis=0))/m).reshape(n,1)
    reg = np.insert((l/m)*thetaR,0,0,axis=0)
    grad = nonRegGrad + reg
    return grad.flatten();

def CostFunc(theta,X,y,l):
    h = sigmoid(X.dot(theta))
    m,n=X.shape;
    #print("theta shape")
    #print(theta.shape)
    theta = theta.reshape((n,1))
    thetaR = theta[1:n,:]
    cost=np.sum((np.multiply(-y,np.log(h))-np.multiply((1-y),np.log(1-h))))/m
    reg=(l/(2*m))* np.sum(np.square(thetaR))
    J=cost+reg
    return J;


def predict(theta,X):
    m,n=X.shape;
    return  np.round(sigmoid(X.dot(theta.reshape(n,1))));

data = np.loadtxt(open("ex2data2.txt","rb"),delimiter=",",skiprows=1)
nr,nc = data.shape
X=data[:,0:nc - 1]
#X=preprocessing.scale(X)
#X=np.insert(X,0,1,axis=1)
y= data[:,[nc - 1]]

pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Microchip Test 1')
ylabel('Microchip Test 2')
legend(['Passed', 'Failed'])
show()
storeX=X
poly = PolynomialFeatures(6)
X=poly.fit_transform(X)
#print(X.shape)
m , n = X.shape;
initial_theta = np.zeros((n,1));
#initial_theta = zeros(shape=(it.shape[1], 1))
l = 1

# Compute and display initial cost and gradient for regularized logistic
# regression
#cost, grad = cost_function_reg(initial_theta, X, y, l)

#def decorated_cost(theta):
#    return cost_function_reg(theta, X, y, l)

#print fmin_bfgs(decorated_cost, initial_theta, maxfun=400)
print("Calling optimization")
Result = op.minimize(fun = CostFunc, 
                                 x0 = initial_theta, 
                                 args = (X, y,l),
                                 method = 'TNC',
                                 jac = Gradient);
optimal_theta = Result.x;
print(Result.x.shape)
print("optimal theta")
print(optimal_theta)
p=predict(optimal_theta,X)
accuracy = np.mean(np.double(p==y))
print("accuracy")
print(accuracy)

    enter code here

0 个答案:

没有答案