Question

我一直在尝试使用python和Scipy.opt编写Andrew NG的Logistic回归问题，以优化该功能。但是，我收到一个VALUE错误，说我的尺寸不匹配。我已经尝试过用flatten（）我的theta数组，因为scipy.opt不能与单列/行向量一起很好地工作，但是问题仍然存在。我还调整了数组的形状，但是代码没有响应并且显示了相同的错误。

请向正确的方向指出导致该问题的原因以及如何避免该问题。

感谢一百万！

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt

dataset = pd.read_csv("Students Exam Dataset.txt", names=["Exam 1", "Exam 2", "Admitted"])
print(dataset.head())

positive = dataset[dataset["Admitted"] == 1]
negative = dataset[dataset["Admitted"] == 0]

#Visualizing Dataset
plt.scatter(positive["Exam 1"], positive["Exam 2"], color="blue", marker="o", label="Admitted")
plt.scatter(negative["Exam 1"], negative["Exam 2"], color="red", marker="x", label="Not Admitted")
plt.xlabel("Exam 1 Score")
plt.ylabel("Exam 2 Score")
plt.title("Admission Graph")
plt.legend()
#plt.show()

#Preprocessing Data
dataset.insert(0, "x0", 1)
col = len(dataset.columns)
x = dataset.iloc[:,0:col-1].values
y = dataset.iloc[:,col-1:col].values
b = np.zeros([1,col-1])
m = len(y)
print(f"X Shape: {x.shape}   Y Shape: {y.shape}   B Shape: {b.shape}")

#Defining Functions
def hypothesis(x, y, b):
    h = 1 / (1+np.exp(-x @ b.T))
    return h

def cost(x, y, b):
    first = (y.T @ np.log(hypothesis(x, y, b)))
    second = (1-y).T @ np.log(1 - hypothesis(x, y, b))
    j = (-1/m) * np.sum(first+second)
    return j

def gradient(x, y, b):
    grad_step = ((hypothesis(x, y, b) - y) @ x.T) / m
    return b

#Output
initial_cost = cost(x, y, b)
print(f"\nInitial Cost = {initial_cost}")
final_cost = opt.fmin_tnc(func=cost, x0=b.flatten() , fprime=gradient, args=(x,y))
print(f"Final Cost = {final_cost} \nTheta = {b}")

数据集：Student Dataset.txt

使用Scipy.opt进行Logistic回归的优化错误

0 个答案: