"我必须再次提出这个问题,因为以前没有适当的帮助。"
我最近学会了批量梯度下降算法并尝试在Python中实现它。我给出了一个非随机的数据集。当我尝试运行下面的代码时,该过程在3次迭代后收敛,但是出现了很大的错误。有人能以正确的方式指导我吗? 样本数据集:(原始数据集长度为600。)
6203.75 1 173.8 43.6 0.0 183.0
6329.75 1 115.0 60.1 0.0 236.2
5830.75 1 159.5 94.1 21.0 275.8
4061.75 1 82.5 45.0 11.0 75.7
3311 1 185.5 46.1 4.0 0.0
4349.75 1 169.5 40.3 5.0 73.5
5695.25 1 138.5 68.9 6.0 204.2
5633.5 1 50.0 117.3 4.0 263.9
第一列是输出。第二列是常数值。休息是特色。
谢谢
data = open('Data_trial.txt','r')
import time
lines=data.readlines()
dataSet=[]
for line in lines:
dataSet.append(line.split())
original_output=[]
features=[]
for i in range(0,len(dataSet)):
features.append([])
predict=[]
grad=[]
weights=[0,0,0,0,0]
learning_factor=0.01
for i in range(0,len(dataSet)):
for j in range(0,len(dataSet[i])):
if j==0:
original_output.append(float(dataSet[i][j]))
else:
features[i].append(float(dataSet[i][j]))
def prediction(predict,weights,original_output,features):
for count in range(0,len(original_output)):
predict.append(sum(weights[i]*features[count][i] for i in range(0,len(features[count]))))
print("predicted values",predict)
def gradient(predict,grad,original_output,features):
for count in range(0,len(weights)):
grad.append(sum((predict[i]-original_output[i])*features[i][count]
for i in range(0,len(original_output))))
print("Gradient values",grad)
def weights_update(grad,learning_factor,weights):
for i in range(0,len(weights)):
weights[i]-=learning_factor*grad[i]
print("Updated weights",weights)
if __name__=="__main__":
while True:
prediction(predict,weights,original_output,features)
gradient(predict,grad,original_output,features)
weights_update(grad,learning_factor,weights)
time.sleep(1)
predict=[]
grad=[]
print()