我已经实现了一个神经网络来预测xor门。它有1个输入层,2个节点,1个隐藏层,2个节点,1个输出层,1个节点。无论我尝试做什么,我的成本都在不断增加。我已经尝试将我的学习率设置为较小的值,但这只会使成本增加缓慢。请,任何提示赞赏。
import numpy as np
train_data = np.array([[0,0],[0,1],[1,0],[1,1]]).T
labels = np.array([[0,1,1,0]])
def sigmoid(z,deriv = False):
sig = 1/(1+np.exp(-z))
if deriv == True:
return np.multiply(sig,1-sig)
return sig
w1 = np.random.randn(2,2)*0.01
b1 = np.zeros((2,1))
w2 = np.random.randn(1,2)*0.01
b2 = np.zeros((1,1))
iterations = 1000
lr = 0.1
for i in range(1000):
z1 = np.dot(w1,train_data) + b1
a1 = sigmoid(z1)
z2 = np.dot(w2,a1) + b2
al = sigmoid(z2) #forward_prop
cost = np.dot(labels,np.log(al).T) + np.dot(1-labels,np.log(1-al).T)
cost = cost*(-1/4)
cost = np.squeeze(cost)#calcost
dal = (-1/4) * (np.divide(labels,al) + np.divide(1-labels,1-al))
dz2 = np.multiply(dal,sigmoid(z2,deriv = True))
dw2 = np.dot(dz2,a1.T)
db2 = np.sum(dz2,axis=1,keepdims = True)
da1 = np.dot(w2.T,dz2)
dz1 = np.multiply(da1,sigmoid(z1,deriv = True))
dw1 = np.dot(dz1,train_data.T)
db1 = np.sum(dz1,axis=1,keepdims = True) #backprop
w1 = w1 - lr*dw1
w2 = w2 - lr*dw2
b1 = b1 - lr*db1
b2 = b2 - lr*db2 #update params
print(cost,'------',str(i))
答案 0 :(得分:1)
主要错误在于交叉熵backprop(建议these notes进行检查)。正确的公式如下:
dal = -labels / al + (1 - labels) / (1 - al)
我还简化了一些代码。这是一个完整的工作版本:
import numpy as np
train_data = np.array([[0,0], [0,1], [1,0], [1,1]]).T
labels = np.array([0, 1, 1, 1])
def sigmoid(z):
return 1 / (1 + np.exp(-z))
w1 = np.random.randn(2,2) * 0.001
b1 = np.zeros((2,1))
w2 = np.random.randn(1,2) * 0.001
b2 = np.zeros((1,1))
lr = 0.1
for i in range(1000):
z1 = np.dot(w1, train_data) + b1
a1 = sigmoid(z1)
z2 = np.dot(w2, a1) + b2
a2 = sigmoid(z2)
cost = -np.mean(labels * np.log(a2) + (1 - labels) * np.log(1 - a2))
da2 = (a2 - labels) / (a2 * (1 - a2)) # version #1
# da2 = -labels / a2 + (1 - labels) / (1 - a2) # version #2
dz2 = np.multiply(da2, a2 * (1 - a2))
dw2 = np.dot(dz2, a1.T)
db2 = np.sum(dz2, axis=1, keepdims=True)
da1 = np.dot(w2.T, dz2)
dz1 = np.multiply(da1, a1 * (1 - a1))
dw1 = np.dot(dz1, train_data.T)
db1 = np.sum(dz1, axis=1, keepdims=True)
w1 = w1 - lr*dw1
w2 = w2 - lr*dw2
b1 = b1 - lr*db1
b2 = b2 - lr*db2
print i, cost