我意识到降低学习速度时有时模型会发散,我无法确切理解问题,但我相信这取决于我的体重初始化,因为它并不总是发生。
根据均值为零的正态分布初始化权重矩阵
def createW(numberOfRows,numberOfColumns):
mu, sigma = 0, 0.1 # mean and standard deviation
W = np.random.normal(mu, sigma,(numberOfRows,numberOfColumns))
return W
数据形状为(3,2 * n) 目标形状为(1,2 * n) 权重矩阵的形状应为(1,3)
W = createW(1,3)
def deltaBatchLearning(data,target,W,numberOfEpochs,alpha):
costHistory = []
for i in range(numberOfEpochs):
deltaW = -alpha*np.dot((np.dot(W,data) - target),(data.T))
W = W + deltaW
costHistory.append(np.sum(target - np.dot(W,data)))
return costHistory,W
numberofEpochs = 20
alpha = 0.0001
jvec,W = deltaBatchLearning(data,target,W,numberofEpochs,alpha)
jvec = np.absolute(jvec)