我试图使用假设类csigmoid(ax + b)来使用梯度下降的方法将x <0的图g(x)= 1和x> 0的g(x)= 2x + 1匹配+ d,其中a,b,c,d是参数。我选择的损失函数是平方损失误差。但是,我从以下代码中得到的结果显然是不正确的。
import math
cur_a = 1
cur_b = 3
cur_c = 3
cur_d = 3
rate = 0.0000000000001
def sigmoid(x):
return 1 / (1 + math.exp(-x))
data = [[-1, 1.0], [-0.75, 1.0], [-0.5, 1.0], [-0.25, 1], [0, 1], [0.25, 1.5], [0.5, 2], [0.75, 2.5], [1, 3], [1.25, 3.5], [1.5, 4]]
def gradienta():
result = 0
for i in range(len(data)):
ele = data[i]
result += 2 * (cur_c * sigmoid(cur_a * ele[0] + cur_b) + cur_d - ele[1]) * cur_c * sigmoid(
cur_a * ele[0] + cur_b) * (1 - sigmoid(cur_a * ele[0] + cur_b)) * ele[0]
return result/len(data)
def gradientb():
result = 0
for i in range(len(data)):
ele = data[i]
result += 2 * (cur_c * sigmoid(cur_a * ele[0] + cur_b) + cur_d - ele[1]) * cur_c * sigmoid(
cur_a * ele[0] + cur_b) * (1 - sigmoid(cur_a * ele[0] + cur_b))
return result/len(data)
def gradientc():
result = 0
for i in range(len(data)):
ele = data[i]
result += 2 * (cur_c * sigmoid(cur_a * ele[0] + cur_b) + cur_d - ele[1]) * sigmoid(
cur_a * ele[0] + cur_b)
return result/len(data)
def gradientd():
result = 0;
for i in range(len(data)):
ele = data[i]
result += 2 * cur_c * sigmoid(cur_a * ele[0] + cur_b) + cur_d - ele[1]
return result/len(data)
iters = 0
max_iters = 10000
precision = 0.00001
step_size = 1
while step_size > precision and iters < max_iters:
# save previous location
prev_a = cur_a
prev_b = cur_b
prev_c = cur_c
prev_d = cur_d
# move to new location according to steepest descent
cur_a = cur_a - rate * gradienta()
cur_b = cur_a - rate * gradientb()
cur_c = cur_a - rate * gradientc()
cur_d = cur_a - rate * gradientd()
# update step_size and iters
step_size = ((prev_a - cur_a) ** 2 + (prev_b - cur_b) ** 2 + (prev_c - cur_c) ** 2 + (prev_d - cur_d) ** 2) ** 0.5
iters += 1
print(cur_a)
print(cur_b)
print(cur_c)
print(cur_d)
虽然d的值似乎合理,但我希望a,b和c的值在5和10左右。但是,代码的输出是
1.0000000000000535
1.0000000000000615
1.0000000000003113
1.0000000000001017
其中每个值对应一个参数。这不是一个很好的匹配。
关于哪里出了问题的任何建议?