我有一个用于线性回归的神经网络:
class Neural(object):
def __init__(self,x,y,hiddensize1,output_size,cost=None,alpha=0.01,reg_coef=0.1):
std=0.1
self.x=x
self.y=y
self.hiddensize1=hiddensize1
self.output_size=output_size
self.cost=0
self.alpha=alpha
self.reg_coef=reg_coef
self.params = {}
self.params['W1'] = std * np.random.randn(x.shape[1], hiddensize1) # each w for each col
self.params['b1'] = np.zeros((1,hiddensize1))
self.params['W2'] = std * np.random.randn(hiddensize1, output_size)
if output_size==1:
self.params['W2'] = std * np.random.randn(hiddensize1)
self.params['b2'] = np.zeros((output_size))
def relu(self,input_x):
return(np.maximum(input_x, 0))
def relu_grad(self,input_x):
return(1. * (input_x > 0))
def nerual_cost(self, x,y):
N=x.shape[0] # rows
scores=0
#Make calculation tree
cal_one=np.dot(x,self.params['W1'])+self.params['b1']
act_one=Neural.relu(self,cal_one)
cal_two=np.dot(act_one,self.params['W2'])+self.params['b2']
scores=cal_two
scores=np.array(scores)
# scores=scores.reshape(scores.shape[0])
# This is then passed to cost with dcost/dscore=(1/N)*(diff)
#MSE
# y=y.reshape((1,y.shape[0]))
diff=scores-y
cost=(1/N)*np.sum(diff**2)+self.reg_coef*np.sum(np.square(self.params['W1'])+np.square(self.params['W2'])) # only reg w1,w2
data_cost=(1/N)*np.sum(diff**2)
reg_cost=self.reg_coef*np.sum(np.square(self.params['W1'])+np.square(self.params['W2']))
self.cost=(data_cost,reg_cost)
#grad
dcost_db2=2*(1/N)*(scores-y)
dcost_dw2=2/N*np.dot(diff,act_one)
#add contribution of reg to gradient
dcost_dw2+=2*self.reg_coef*self.params['W2']
dcost_db1=2/N*np.dot(diff,act_one)*Neural.relu_grad(self,act_one) # brach equally
#equivalent to dcost_dw2 * gradient of activation gate which is relu_grad
dcost_dw1=np.dot(x.T,dcost_db1) # brached and dot
dcost_dw1+=2*self.reg_coef*self.params['W1']
# update
self.params['W1']=self.params['W1']-self.alpha*dcost_dw1
self.params['b1']=self.params['b1']-self.alpha*dcost_db1
self.params['W2']=self.params['W2']-self.alpha*dcost_dw2
self.params['b2']=self.params['b2']-self.alpha*dcost_db2
return(self.cost)
可以看出,我的偏见2是output_size(在这种情况下为1)。但是当我计算梯度时,我得到一个(n,)向量,该向量具有批次中每个试验的梯度。然后当我将b2更新为
时 self.params['b2']=self.params['b2']-self.alpha*dcost_db2
我也将其转换为(n,)维向量。但是它应该保持(1,)正确,因为偏差是一个单独的术语,而我只是针对批次梯度更新该术语。如何将这个梯度列表转换为一个偏差变化?
也可以看到,我添加了一个L2正则化器。问题是当我运行它时,我的权重变为0。我得到的权重非常小。有人可以帮忙调试一下吗?