我正在研究一个简单的线性回归预测。它是单变量线性回归,即y = mx + c。我的问题数据集只有一个自变量和因变量,分别是学生分数,分别研究了几个小时。我已经估计了系数(m,c)。现在,为了了解我的成本函数,我正在尝试使用CONTOUR PLOT将两个因子(m,c)绘制为关于成本函数的 Theta1 和 Theta2 。问题是这是不合适的。我在这里想念什么?这是我的数据集的link,这是我的轮廓图的代码段。
import numpy as np
import matplotlib.pyplot as plt
n = len(fobj.Hours)
x_val=np.array(fobj.Hours)
y_val=np.array(fobj.Scores)
x_val=x_val.reshape(-1,1)
y_val=y_val.reshape(-1,1)
alpha = 0.0001
a_0 = np.zeros((n,1))
a_1 = np.zeros((n,1))
N = 1000
theta1list = [0]
theta2list = [0]
mean_sq_er=0
epochs = 10000
while(epochs >0):
y_cap = a_0 + a_1 * x_val
error = y_cap - y_val
mean_sq_er = mean_sq_er+(error**2)
mean_sq_er = mean_sq_er/n
a_0 = a_0 - alpha * 2 * np.sum(error)/n
a_1 = a_1 - alpha * 2 * np.sum(error * x_val)/n
theta1list.append(a_0[0][0])
theta2list.append(a_1[0][0])
epochs -= 1
theta1_true = a_0[0][0]
theta2_true = a_1[0][0]
x=np.array(fobj.Hours)
y=theta1_true+theta2_true*x
def cost_func(theta1,theta2):
theta1 = np.atleast_3d(np.asarray(theta1))
theta2 = np.atleast_3d(np.asarray(theta2))
return np.average((y-hypothesis(x, theta1,theta2))**2, axis=2)
def hypothesis(x, theta1,theta2):
return (theta1+theta2*x)
J = [cost_func(theta1list[0],theta2list[0])[0]]
theta_array1=np.array(theta1list)
theta_array2=np.array(theta2list)
indx=0
while(indx<1000):
J.append(cost_func(theta_array1[indx],theta_array2[indx]))
indx+=1
print(r2_score(y_val,y_cap))
# The plot: LHS is the data, RHS will be the cost function.
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,6.15))
# First construct a grid of (theta0, theta1) parameter pairs and their
# corresponding cost function values.
theta0_grid=np.linspace(-3, 7, 100)
theta1_grid=np.linspace(-2, 22, 100)
J_grid = cost_func(theta0_grid[np.newaxis,:,np.newaxis],
theta1_grid[:,np.newaxis,np.newaxis])
# A labeled contour plot for the RHS cost function
X, Y = np.meshgrid(theta0_grid, theta1_grid)
contours = ax.contour(X, Y, J_grid, 30)
ax.clabel(contours)
# The target parameter values indicated on the cost function contour plot
ax.scatter([theta1_true]*2,[theta2_true]*2,s=[10,10], color=['k','w'])
# Annotate the cost function plot with colored points indicating the parameters chosen and red arrows
# indicating the steps down the gradient.
N=1000
for j in range(1,N):
ax.annotate('', xy=(theta1list[j], theta2list[j]), xytext=(theta1list[j-1], theta2list[j-1]),
arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 1},
va='center', ha='center')
ax.scatter(theta1list,theta2list, s=40, lw=0)
# Labels, titles and a legend.
ax.set_xlabel(r'$\theta_0$')
ax.set_ylabel(r'$\theta_1$')
ax.set_title('Cost function')
plt.show()`
fobj 是我的数据框,这就是我的Contour plot of linear regression with gradient descent的显示方式。