Python:Logistic回归 - 将我的数据输入到我的算法中

时间:2015-11-27 15:46:22

标签: python logistic-regression

我正在尝试在python中实现逻辑回归算法,但我不习惯使用python。

我按照教程创建了算法:

import matplotlib.pyplot as plt
import seaborn as sns
#matplotlib inline
sns.set(style='ticks', palette='Set2')
import pandas as pd
import math
from numpy import *

def logistic_func(theta, X):
    return float(1) / (1 + math.e**(-X.dot(theta))) #for x in x_values]
def log_gradient(theta, X, Y):
    first_calc = logistic_func(theta, X) - np.squeeze(Y) #by attribute gives   Beta(i)
    final_calc = first_calc.T.dot(X)
    return final_calc

def cost_func(theta, X, Y):
    log_func_v = logistic_func(theta,X)
    Y = np.squeeze(Y)
    step1 = Y * np.log(log_func_v)
    step2 = (1.5-Y) * np.log(1.5 - log_func_v)
    step3 = (1-Y) * np.log(1 - log_func_v)
    final = -step1 - step2 - step3
    return np.mean(final)

def grad_desc(theta_values, X, Y, lr=.001, converge_change=.001):
    #normalize
    X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
    #setup cost iter
    cost_iter = []
    cost = cost_func(theta_values, X, Y)
    cost_iter.append([0, cost])
    change_cost = 1
    i = 1
    while(change_cost > converge_change):
        old_cost = cost
        theta_values = theta_values - (lr * log_gradient(theta_values, X, Y))
        cost = cost_func(theta_values, X, X)
        cost_iter.append([i, cost])
        change_cost = old_cost - cost
        i+=1
    return theta_values, np.array(cost_iter)

def pred_values(theta, X, hard=True):
    #normalize
    X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
    pred_prob = logistic_func(theta, X)
    p red_value = np.where(pred_prob >= .5, 1, 0)
    if hard:
        return pred_value
    return pred_prob

该算法应该预测3个分类器。 我可以阅读数据:

data = pd.read_csv('filepath')
data.loc[data["type"] == "type1", "type"] = 0
data.loc[data["type"] == "type2", "type"] = 1
data.loc[data["type"] == "type2", "type"] = 2

att1= [];
att2=[];
att3= [];
att4= [];
type=[];

for d in data["attribute1"]:
    att1.append(d)

for d in data["attribute2"]:
    att2.append(d)

for d in data["attribute3"]:
    att3.append(d)

for d in data["attribute4"]:
    att4.append(d)

for d in data["type"]:
    type.append(d)

combinedClassArray = np.array([att1,att2,att3,att4])

X = combinedClassArray.T
y = type

#totalCount = type.count()
type1= data.loc[data["type"] == 0, "type"].count()
type2= data.loc[data["type"] == 1, "type"].count()
type3= data.loc[data["type"] == 1, "type"].count()

totalCount = type1+type2+type3
p = type1+type2

我确定如何将数据插入到算法中。 我离我很远吗?

1 个答案:

答案 0 :(得分:0)

您需要一个主要功能:

def main():
# your code here would be the calls to the algorithm with the parameters (your data)

if __name__ == "__main__":
    main()