神经网络不产生结果

时间:2017-11-22 14:48:32

标签: python numpy machine-learning neural-network gradient-descent

这是我的项目。它包括:m = 24,其中m是训练样本的数量; 3个隐藏层和输入层;连接每层3组重物;数据为1x38,响应为y(1x1)。

import numpy as np
x = np.array([
[1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0],
[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0],
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1],
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0],
[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0],
[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1],
[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0],
[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]])

y = np.array([
    [1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,0]]).T

w = np.random.random((38, 39))
w2 = np.random.random((39, 39))
w3 = np.random.random((39, 1))

for j in xrange(100000):
    a2 = 1/(1 + np.exp(-(np.dot(x, w) + 1)))
    a3 = 1/(1 + np.exp(-(np.dot(a2, w2) + 1)))
    a4 = 1/(1 + np.exp(-(np.dot(a3, w3) + 1)))
    a4delta = (y - a4) * (1 * (1 - a4))
    a3delta = a4delta.dot(w3.T) * (1 * (1 - a3))
    a2delta = a3delta.dot(w2.T) * (1 * (1 - a2))
    w3 += a3.T.dot(a4delta)
    w2 += a2.T.dot(a3delta)
    w += x.T.dot(a2delta)
print(a4)

结果如下:

[[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]
[ 1.]]

有人能看出我错了吗?我的网络是否需要更改?我尝试通过添加更多隐藏图层和更多内存来尝试使用超参数

1 个答案:

答案 0 :(得分:1)

你有一些错误和一些我认为是错误的事情,但也许只是一个不同的实现。

您应该将渐变添加到权重中,此时应减去渐变乘以步长。这就是为什么你的权重只需一次迭代即可达到1.0。

这些:

w3 += a3.T.dot(a4delta)

应该是这样的:

 w3 -= addBias(a3).T.dot(a4delta) * step

另外,我不认为你对sigmoid函数的偏导数有正确的表述。我想这些:

a3delta = a4delta.dot(w3.T) * (1 * (1 - a3)) 

应该是:

a3delta = a4delta.dot(w3.T) * (a3 * (1 - a3))

您还应该使用以下内容将您的体重初始化为零

ep = 0.12
w = np.random.random((39, 39)) * 2 * ep - ep

大多数实现都会向每个图层添加一个偏向节点,但您并没有这样做。它使事情变得复杂,但我认为它会使它更快收敛。

对我而言,这在200次迭代中收敛于一个自信的答案:

# Weights have different shapes to account for bias node
w = np.random.random((39, 39)) * 2 * ep - ep
w2 = np.random.random((40, 39))* 2 * ep - ep
w3 = np.random.random((40, 1)) * 2 * ep - ep

ep = 0.12
w = np.random.random((39, 39)) * 2 * ep - ep
w2 = np.random.random((40, 39))* 2 * ep - ep
w3 = np.random.random((40, 1)) * 2 * ep - ep

def addBias(mat):
    return np.hstack((np.ones((mat.shape[0], 1)), mat))

step = -.1
for j in range(200):
    # Forward prop
    a2 = 1/(1 + np.exp(- addBias(x).dot(w)))
    a3 = 1/(1 + np.exp(- addBias(a2).dot(w2)))
    a4 = 1/(1 + np.exp(- addBias(a3).dot(w3)))

    # Back prop
    a4delta = (y - a4) 
    # need to remove bias nodes here
    a3delta = a4delta.dot(w3[1:,:].T) * (a3 * (1 - a3))
    a2delta = a3delta.dot(w2[1:,:].T) * (a2 * (1 - a2))

    # Gradient Descent
    # Multiply gradient by step then subtract
    w3 -= addBias(a3).T.dot(a4delta) * step
    w2 -= addBias(a2).T.dot(a3delta) * step 
    w -= addBias(x).T.dot(a2delta) * step
print(np.rint(a4))