我想使用成本函数实现次梯度和随机下降,计算为数据找到理想分类器所需的迭代次数,以及权重(w)和偏差(b)。 数据集是四维的
我运行代码时遇到很多错误,请有人帮忙。
这是我在python中的代码
import numpy as np
learn_rate = 1
w = np.zeros((4,1))
b = 0
M = 1000
data = '/Users/labuew/Desktop/dataset.data'
#calculating the gradient
def cal_grad_w(data, w, b):
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -Ym[i]*(w*Xm+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym[i]*Xm*tmp
sum = sum +value
return sum
def cal_grad_b(data, w, b):
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -Ym*(w*Xm+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym[i]*x*tmp
sum = sum +value
return sum
if __name__ == '__main__':
counter = 0
while 1:
counter +=1
dw = cal_grad_w(data, w, b)
db = cal_grad_b(data, w, b)
if dw == 0 and db == 0:
break
w = w - learn_rate*dw
b = b - learn_rate *dw
print(counter,w,b)
答案 0 :(得分:0)
您是否缺少numpy加载功能?
data = np.load('/Users/labuew/Desktop/dataset.data')
看起来您正在对字符串进行数字运算。
还
Ym = sample[-1]
Xm = sample[0:4]
也有4个维度表示Ym = Xm [3]?您的数据等级是否为2,第二等级为维度5? [0:4]包含第四维,即
z = [1,2,3,4]
z[0:4] = [1,2,3,4]
这是我最好的猜测。我正在对您的数据格式进行一些有根据的猜测。
import numpy as np
learn_rate = 1
w = np.zeros((1,4))
b = 0
M = 1000
#Possible format
#data = np.load('/Users/labuew/Desktop/dataset.data')
#Assumed format
data = np.ones((1000,5))
#calculating the gradient
def cal_grad_w(data, w, b):
sum = 0
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -1*Ym*(np.matmul(w,Xm.reshape(4,1))+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym*Xm*tmp
sum = sum +value
return sum.reshape(1,4)
def cal_grad_b(data, w, b):
sum = 0
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -1*Ym*(np.matmul(w,Xm.reshape(4,1))+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym*tmp
sum = sum +value
return sum
if __name__ == '__main__':
counter = 0
while 1:
counter +=1
dw = cal_grad_w(data, w, b)
db = cal_grad_b(data, w, b)
if dw.all() == 0 and db == 0:
break
w = w - learn_rate*dw
b = b - learn_rate*db
print([counter,w,b])
因为我不知道格式,所以输入了伪数据。