我正在尝试在python中实现逻辑回归算法,但我不习惯使用python。
我按照教程创建了算法:
import matplotlib.pyplot as plt
import seaborn as sns
#matplotlib inline
sns.set(style='ticks', palette='Set2')
import pandas as pd
import math
from numpy import *
def logistic_func(theta, X):
return float(1) / (1 + math.e**(-X.dot(theta))) #for x in x_values]
def log_gradient(theta, X, Y):
first_calc = logistic_func(theta, X) - np.squeeze(Y) #by attribute gives Beta(i)
final_calc = first_calc.T.dot(X)
return final_calc
def cost_func(theta, X, Y):
log_func_v = logistic_func(theta,X)
Y = np.squeeze(Y)
step1 = Y * np.log(log_func_v)
step2 = (1.5-Y) * np.log(1.5 - log_func_v)
step3 = (1-Y) * np.log(1 - log_func_v)
final = -step1 - step2 - step3
return np.mean(final)
def grad_desc(theta_values, X, Y, lr=.001, converge_change=.001):
#normalize
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
#setup cost iter
cost_iter = []
cost = cost_func(theta_values, X, Y)
cost_iter.append([0, cost])
change_cost = 1
i = 1
while(change_cost > converge_change):
old_cost = cost
theta_values = theta_values - (lr * log_gradient(theta_values, X, Y))
cost = cost_func(theta_values, X, X)
cost_iter.append([i, cost])
change_cost = old_cost - cost
i+=1
return theta_values, np.array(cost_iter)
def pred_values(theta, X, hard=True):
#normalize
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
pred_prob = logistic_func(theta, X)
p red_value = np.where(pred_prob >= .5, 1, 0)
if hard:
return pred_value
return pred_prob
该算法应该预测3个分类器。 我可以阅读数据:
data = pd.read_csv('filepath')
data.loc[data["type"] == "type1", "type"] = 0
data.loc[data["type"] == "type2", "type"] = 1
data.loc[data["type"] == "type2", "type"] = 2
att1= [];
att2=[];
att3= [];
att4= [];
type=[];
for d in data["attribute1"]:
att1.append(d)
for d in data["attribute2"]:
att2.append(d)
for d in data["attribute3"]:
att3.append(d)
for d in data["attribute4"]:
att4.append(d)
for d in data["type"]:
type.append(d)
combinedClassArray = np.array([att1,att2,att3,att4])
X = combinedClassArray.T
y = type
#totalCount = type.count()
type1= data.loc[data["type"] == 0, "type"].count()
type2= data.loc[data["type"] == 1, "type"].count()
type3= data.loc[data["type"] == 1, "type"].count()
totalCount = type1+type2+type3
p = type1+type2
我确定如何将数据插入到算法中。 我离我很远吗?
答案 0 :(得分:0)
您需要一个主要功能:
def main():
# your code here would be the calls to the algorithm with the parameters (your data)
if __name__ == "__main__":
main()