我一直在尝试使用python和Scipy.opt编写Andrew NG的Logistic回归问题,以优化该功能。但是,我收到一个VALUE错误,说我的尺寸不匹配。我已经尝试过用flatten()我的theta数组,因为scipy.opt不能与单列/行向量一起很好地工作,但是问题仍然存在。我还调整了数组的形状,但是代码没有响应并且显示了相同的错误。
请向正确的方向指出导致该问题的原因以及如何避免该问题。
感谢一百万!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt
dataset = pd.read_csv("Students Exam Dataset.txt", names=["Exam 1", "Exam 2", "Admitted"])
print(dataset.head())
positive = dataset[dataset["Admitted"] == 1]
negative = dataset[dataset["Admitted"] == 0]
#Visualizing Dataset
plt.scatter(positive["Exam 1"], positive["Exam 2"], color="blue", marker="o", label="Admitted")
plt.scatter(negative["Exam 1"], negative["Exam 2"], color="red", marker="x", label="Not Admitted")
plt.xlabel("Exam 1 Score")
plt.ylabel("Exam 2 Score")
plt.title("Admission Graph")
plt.legend()
#plt.show()
#Preprocessing Data
dataset.insert(0, "x0", 1)
col = len(dataset.columns)
x = dataset.iloc[:,0:col-1].values
y = dataset.iloc[:,col-1:col].values
b = np.zeros([1,col-1])
m = len(y)
print(f"X Shape: {x.shape} Y Shape: {y.shape} B Shape: {b.shape}")
#Defining Functions
def hypothesis(x, y, b):
h = 1 / (1+np.exp(-x @ b.T))
return h
def cost(x, y, b):
first = (y.T @ np.log(hypothesis(x, y, b)))
second = (1-y).T @ np.log(1 - hypothesis(x, y, b))
j = (-1/m) * np.sum(first+second)
return j
def gradient(x, y, b):
grad_step = ((hypothesis(x, y, b) - y) @ x.T) / m
return b
#Output
initial_cost = cost(x, y, b)
print(f"\nInitial Cost = {initial_cost}")
final_cost = opt.fmin_tnc(func=cost, x0=b.flatten() , fprime=gradient, args=(x,y))
print(f"Final Cost = {final_cost} \nTheta = {b}")