Question

我正在尝试创建一个简单的ANN，它接受输入x1和x2，计算每个输入中的一个（ON）的数量并返回x1 - x2。当我使用一个示例时，网络似乎正确训练。但是，当我开始添加其他示例时，结果非常糟糕。我试过调整正则化强度，没有。迭代和学习率，但它似乎没有帮助。我的理解是隐藏层神经元的最佳数量是len(x1) + len(x2)，这是我实现的。任何帮助都会很棒。

import os
import fnmatch
import numpy as np

class Neuron:
    def __init__(self,n):
        #create weights (+1 is for bias)
        self.weights = np.random.randn(n+1) / np.sqrt(n)
        self.lr = 1e-2
        self.reg = 0.01

    def linear(self,x):
        #apply linear step
        x = np.append(x,1)
        return np.dot(self.weights,x)

    def output(self,x):
        #apply sigmoid step
        preact = self.linear(x)
        self.out =  1/(1+np.exp(-preact))
        return self.out

    def apply_gradients(self):
        #update weights
        self.weights -= self.gradients * self.lr

    def calculate_gradients(self,loss):
        #calculate gradients
        self.gradients = loss * self.out * (1-self.out)
        return self.gradients

    def _reg(self):
        return (self.reg*np.sum((self.weights)))

def main(argv=None):

    num_epochs = 1000

    #inputs
    x1 = [[1,1,1],[1,1,0]]
    x2 = [[1,1,0],[1,1,0]]
    x = np.append(x1,x2,axis=1)

    #labels: binary substraction x1-x2
    y = np.array([[0,0,1],[0,0,0]])

    _,n = y.shape
    hln = 2*n

    hidden_out = np.array(np.zeros(hln))
    y_ = np.array(np.zeros(n))
    reg_value = np.array(np.zeros(n))
    loss = np.array(np.zeros(n))

    #initialise the hidden layer neurons
    hidden_layer = []
    for i in range(hln):
        hidden_layer.append(Neuron(2*n))

    #initialise the output layer neurons
    out_layer = []
    for i in range(n):
        out_layer.append(Neuron(hln))

    rows,cols = x.shape

    #run through the epochs
    for epoch in range(num_epochs):
        #run through the samples
        for data in range(rows):

            #pass the data through the hidden layer
            for i in range(len(hidden_layer)):
                hidden_out[i]=hidden_layer[i].output(x[data,:])

            #pass the hidden layer output through the output layer
            for i in range(len(out_layer)):
                y_[i] = out_layer[i].output(hidden_out)
                #get the reg value
                reg_value[i] = out_layer[i]._reg()

            #calculate L1 loss
            loss = y_ - y[data,:] + reg_value

            #calculate gradients of output layer
            for i in range(len(out_layer)):
                out_layer[i].calculate_gradients(loss[i])

            #calculate gradients of hidden layer
            for i in range(len(hidden_layer)):
                back_vec = 0
                for j in range(len(out_layer)):
                    #sum all the output weights coming back into a hidden neuron
                    back_vec += out_layer[j].weights[i]*loss[j]
                #send the backwards value through the hidden neuron
                hidden_layer[i].calculate_gradients(back_vec)

            #apply gradient to output layer
            for i in range(len(out_layer)):
                out_layer[i].apply_gradients()

            #apply gradients to hidden layer
            for i in range(len(hidden_layer)):
                hidden_layer[i].apply_gradients()

            #output the final results
            if epoch == num_epochs-1:
                print((y_))


if __name__ == "__main__":
    main()

Answer 1

您需要训练集来训练ML模型（在这种情况下是神经网络）。从两个训练样例中推广一个有点复杂的操作（二元差异）并不是一件容易的事。

我的建议是在开始建立网络之前创建一个完整的培训集;由于您正在使用固定长度（3）二进制数组，因此您可以轻松创建所有可能性（8 * 8 = 64个训练样本），留出一小部分用于验证并与其余部分进行训练。

用简单的神经网络进行二进制减法

1 个答案: