我正在尝试通过python中的梯度下降方法实现线性回归。 我的结果有误
这是一种获取要素空间的方法
def quadratic (arg_row) :
poly_arr = []
for i in range(len(arg_row)):
poly_arr.append([float(arg_row[i][0])])
poly_arr[i].append(float(arg_row[i][1]))
poly_arr[i].append(float(arg_row[i][3]))
poly_arr[i].append(float(arg_row[i][0])**2)
poly_arr[i].append(float(arg_row[i][1])**2)
poly_arr[i].append(float(arg_row[i][1])*float(arg_row[i][0]))
return poly_arr
我只是在做二次空间
这是我计算梯度下降的方法
def descent(features,train) :
m_val_avg = []
m_val_arr = []
m_val_avg_arr = []
weights =[]
rate = 0.001
precision = 0.000001
previous_step_size = 1
max_iters = 1000
iters = 0
for i in range(len(features[0])) : #weights initalization
weights.append(1)
print(weights)
while iters < max_iters :
m_val_avg_arr = []
my_new_list = []
current_weights = list(weights)
error_result = error_cal(features,current_weights,train)
square_errored = square_error(features,current_weights,train)
#print(error_result)
for j in range(len(features[0])) :
for i in range(len(features)):
m_val = error_result[i]*(-features[i][j])
m_val_arr.append(m_val)
m_val_avg = sum(m_val_arr)/len(m_val_arr)
m_val = []
m_val_avg_arr.append(m_val_avg)
#print(m_val_avg_arr)
for i in range(len(features[0])):
my_new_list = np.array(m_val_avg_arr)
my_new_list = my_new_list*rate
#print(my_new_list)
m_grad = current_weights[i]-my_new_list[i]
weights[i] = m_grad
iters = iters+1
print(iters)
print(square_errored)
这是我用于计算每次迭代的改进误差差异的方法
def error_cal(features, weights, train) :
arr_actcal = []
diff_cal_result =[]
for j in range(len(features)) :
for i in range(len(features[0])) :
act_cal = features[j][i]*weights[i]
arr_actcal.append(act_cal)
diff_cal = ((train[j][2])-sum(arr_actcal))
diff_cal_result.append(diff_cal)
arr_actcal =[]
return diff_cal_result
这是我用于计算每次迭代的平方误差的方法。
def square_error (features, weights, train) :
arr_actcal_squ = []
diff_cal_result_squ =[]
for j in range(len(features)) :
for i in range(len(features[0])) :
act_cal_squ = features[j][i]*weights[i]
arr_actcal_squ.append(act_cal_squ)
diff_cal_squ = ((train[j][2])-sum(arr_actcal_squ))
diff_cal_result_squ.append(diff_cal_squ)
new_square_error = np.array(diff_cal_result_squ)**2
new_square_error_avg = sum(new_square_error/len(new_square_error))
arr_actcal_squ =[]
return new_square_error_avg
我正在做1000次梯度下降。并且在每次迭代中,我都在改进错误计算。 根据该结果,我正在计算每次迭代的均方误差。 理想情况下,平方误差应继续减小,但对我而言,误差先增大后减小。我不知道我的代码在哪里出问题。
这是我要为其计算线性回归的数据
6.4432,9.6309,50.9155,1
3.7861,5.4681,29.9852,1
8.1158,5.2114,42.9626,1
5.3283,2.3159,24.7445,1
3.5073,4.8890,27.3704,1
9.3900,6.2406,51.1350,1
8.7594,6.7914,50.5774,1
5.5016,3.9552,30.5206,1
6.2248,3.6744,31.7380,1
5.8704,9.8798,49.6374,1
2.0774,0.3774,10.0634,1
3.0125,8.8517,38.0517,1
4.7092,9.1329,43.5320,1
2.3049,7.9618,33.2198,1
8.4431,0.9871,31.1220,1
1.9476,2.6187,16.2934,1
2.2592,3.3536,19.3899,1
1.7071,6.7973,28.4807,1
2.2766,1.3655,13.6945,1
4.3570,7.2123,36.9220,1
3.1110,1.0676,14.9160,1
9.2338,6.5376,51.2371,1
4.3021,4.9417,29.8112,1
1.8482,7.7905,32.0336,1
9.0488,7.1504,52.5188,1
9.7975,9.0372,61.6658,1
4.3887,8.9092,42.2733,1
1.1112,3.3416,16.5052,1
2.5806,6.9875,31.3369,1
4.0872,1.9781,19.9475,1
5.9490,0.3054,20.4239,1
2.6221,7.4407,32.6062,1
6.0284,5.0002,35.1676,1
7.1122,4.7992,38.2211,1
2.2175,9.0472,36.4109,1
1.1742,6.0987,25.0108,1
2.9668,6.1767,29.8861,1
3.1878,8.5944,37.9213,1
4.2417,8.0549,38.8327,1
5.0786,5.7672,34.4707,1
这是我的驱动程序
#driver
result_fea = quadratic(data_row_str)
result = descent(result_fea,data_row_str)
# print("-----------------------------------")
print(result)
我知道这里涉及很多数学。如果某些事情没有意义,我可以给出我正在使用的所有公式。预先谢谢