您好我是python和机器学习编码的初学者,我正在尝试了解逻辑回归的内容,并让它从头开始在python中运行。我的任务是绘制和排序下面的逻辑回归的权重/系数,以便删除影响最小的特征。但是,虽然我添加了一个基本的情节,但它并没有帮助我对系数/ thetas进行排名。我最初尝试使用seaborn的sns.coefplot(),但这已被弃用。任何指向正确方向的帮助都将受到赞赏。
这也是使用威斯康宁乳腺癌数据集(https://www.kaggle.com/uciml/breast-cancer-wisconsin-data)
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
df = pd.read_csv("cancerdata.csv")
X = df.values[:,2:-1].astype('float64')
X = (X - np.mean(X, axis =0)) / np.std(X, axis = 0)
X = np.hstack([np.ones((X.shape[0], 1)),X])
X = MinMaxScaler().fit_transform(X)
Y = df["diagnosis"].map({'M':1,'B':0})
Y = np.array(Y)
X_train,X_test,Y_train,Y_test =
train_test_split(X,Y,test_size=0.25)
def Sigmoid(z):
return 1/(1 + np.exp(-z))
def Hypothesis(theta, x):
return Sigmoid(x @ theta)
def Cost_Function(X,Y,theta,m):
hi = Hypothesis(theta, X)
_y = Y.reshape(-1, 1)
J = 1/float(m) * np.sum(-_y * np.log(hi) - (1-_y) * np.log(1-hi))
return J
def Cost_Function_Derivative(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T @ (hi - _y)
return J
def Gradient_Descent(X,Y,theta,m,alpha):
new_theta = theta - Cost_Function_Derivative(X,Y,theta,m,alpha)
return new_theta
def Accuracy(theta):
correct = 0
length = len(X_test)
prediction = (Hypothesis(theta, X_test) > 0.5)
_y = Y_test.reshape(-1, 1)
correct = prediction == _y
my_accuracy = (np.sum(correct) / length)*100
print ('LR Accuracy %: ', my_accuracy)
def Logistic_Regression(X,Y,alpha,theta,num_iters):
m = len(Y)
for x in range(num_iters):
new_theta = Gradient_Descent(X,Y,theta,m,alpha)
theta = new_theta
if x % 100 == 0:
#print ('theta: ', theta)
Accuracy(theta)
x = np.linspace(-6, 6, 50)
y = -(theta[0] + theta[1]*x)/theta[2]
plt.plot(x, y)
plt.plot(theta)
plt.show()
ep = .012
initial_theta = np.random.rand(X_train.shape[1],1) * 2 * ep - ep
alpha = 0.5
iterations = 2000
Logistic_Regression(X_train,Y_train,alpha,initial_theta,iterations)