训练隐藏层不起作用

时间:2018-08-22 10:01:46

标签: python python-3.x machine-learning neural-network deep-learning

一个月以来,我才独自进入机器学习,尤其是深度学习,并为此而努力。 学习完所有数学概念后,我决定自己用一个神经元的python程序来完成此工作,该神经元可以正常工作。 (超高精度)

我现在决定使用2个神经元,1个输出神经元和2个输入的一个隐藏层来执行此操作,但是这没有用...实际上,成本并没有降低,准确性也没有提高。但是该程序有效(输出如下)

import numpy as np
import matplotlib.pyplot as plt


def init_variables():
    """
        Init model variables (weights, biais)
    """
    weights_11 = np.random.normal(size=2)
    weights_12 = np.random.normal(size=2)
    weight_ouput = np.random.normal(size=2)
    bias_11 = 0
    bias_12 = 0
    bias_output = 0
    return weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output

def get_dataset():
    """
        Method used to generate the dataset
    """
    #Number of rows per class
    row_per_class = 100
    #generate rows
    sick_people =  (np.random.randn(row_per_class,2)) + np.array([-2,-2])
    sick_people2 =  (np.random.randn(row_per_class,2)) + np.array([2,2])
    healthy_people = (np.random.randn(row_per_class,2)) + np.array([-2,2])
    healthy_people2 =  (np.random.randn(row_per_class,2)) + np.array([2,-2])

    features = np.vstack([sick_people,sick_people2, healthy_people, healthy_people2])
    targets = np.concatenate((np.zeros(row_per_class*2), np.zeros(row_per_class*2)+1))

    #plt.scatter(features[:,0], features[:,1], c=targets, cmap = plt.cm.Spectral)
    #plt.show()

    return features, targets

def pre_activation(features, weights, bias):
    """
        compute pre activation of the neural
    """
    return np.dot(features, weights) + bias

def activation(z):
    """
        compute the activation (sigmoide)
    """
    return 1 / ( 1 + np.exp(-z) )

def derivative_activation(z):
    """
        compute the derivative of the activation (derivative of sigmoide)
    """
    return activation(z) * (1 - activation(z))


def cost(predictions, targets):
    """
        make the difference between predictions and results
    """
    return np.mean((predictions - targets)**2)

def predict_hidden_layer(features, weights_11, weights_12, bias_11, bias_12):
    """
        This function is not generic at all and aims to understand how is made the input for the next ouput neural
    """
    predictions_11 = activation(pre_activation(features, weights_11, bias_11))
    predictions_12 = activation(pre_activation(features, weights_12, bias_12))
    layer1_result = np.stack((predictions_11, predictions_12), axis=-1)
    return layer1_result

def predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output):
    """
        Determine the prediction of the output
    """
    layer1_result = predict_hidden_layer(features, weights_11, weights_12, bias_11, bias_12)
    output_result = activation(pre_activation(layer1_result, weight_ouput, bias_output))
    return layer1_result, output_result


def train_multiple_neurals(features, targets, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output):
    """
        function of training multiple neural (ajust weights and bias in function of features and targets)
        This function is not generic or optimized and aims to understand better how it works
    """
    epochs = 100
    learning_rate = 0.1

    #display Accuracy before the training
    layer1, prediction = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
    predictions = np.around(prediction)
    print ("Accuracy", np.mean(predictions == targets))

    for epoch in range(epochs):
        layer1, predictions = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
        if epoch % 10 == 0:
            layer1, predictions = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
            print (cost(predictions, targets))
        """
            There are a lot of things to do here !
            to do the back propagation, we will first train the ouput neural
        """
        #Init gradient
        weights_gradient_output = np.zeros(weight_ouput.shape)
        bias_gradient_output = 0
        #Go throught each row
        for neural_input, target, prediction in zip(layer1, targets, predictions):
            #compute pre activation
            z = pre_activation(neural_input, weight_ouput, bias_output)
            #Update the gradient
            weights_gradient_output += (prediction - target)* derivative_activation(prediction) * neural_input
            bias_gradient_output += (prediction - target)* derivative_activation(prediction)

        """
            Now we are going to train hiddens layer of neurals
        """
        weights_gradient_11 = np.zeros(weights_11.shape)
        bias_gradient_11 = 0

        weights_gradient_12 = np.zeros(weights_12.shape)
        bias_gradient_12 = 0

        #Go throught each row
        for neural_output, feature, target, prediction in zip(layer1, features, targets, predictions):
            #compute pre activation
            z = pre_activation(neural_input, weights_11, bias_11)
            #Update the gradient
            weights_gradient_11 += (prediction - target)* derivative_activation(prediction) * weight_ouput[0]  * derivative_activation(neural_output[0]) * feature
            bias_gradient_11 += (prediction - target)* derivative_activation(prediction) * weight_ouput[0]  * derivative_activation(neural_output[0])

            #print (weights_gradient_11)
            #Update the gradient
            weights_gradient_12 += (prediction - target)* derivative_activation(prediction) * weight_ouput[1]  * derivative_activation(neural_output[1]) * feature
            bias_gradient_12 += (prediction - target)* derivative_activation(prediction) * weight_ouput[1]  * derivative_activation(neural_output[1])

        #Update the weights and bias
        weight_ouput = weight_ouput - (learning_rate * weights_gradient_output)
        bias_output = bias_output - (learning_rate * bias_gradient_output)
        weights_11 =  weights_11 - (learning_rate * weights_gradient_11)
        bias_11 =  bias_11 - (learning_rate * bias_gradient_11)
        weights_12 =  weights_12 - (learning_rate * weights_gradient_12)
        bias_12 =  bias_12 - (learning_rate * bias_gradient_12)

    layer1, prediction = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
    predictions = np.around(prediction)
    print ("Accuracy", np.mean(predictions == targets))


if __name__ == '__main__':
    #dataset
    features, targets  = get_dataset()
    #variables
    weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output = init_variables()
    layer1_result, output_result = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
    train_multiple_neurals(features, targets, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)

代码效率不高,因为我试图逐步理解它,以了解所有内容,我知道问题出在隐藏层的训练上,但是它们尊重我在互联网上看到的公式(神经输入*(预测-目标)* sigmoid'(预测)*(weightOfTheNextLayer),这就是为什么我真的不理解。

这是我的输出(开始时在末尾以及在其成本之间的精度),精度没有增加,成本也没有减少:

Accuracy 0.6025
0.32149563353794364
0.3216454935878719
0.32177853678600526
0.32189583396850424
0.32199849304998307
0.3220876323586574
0.3221644075538757
0.32223008209366144
0.32228608192864866
0.32233396315649065
0.3223752777740352
0.32241140511378036
0.3224434401200392
0.3224721764785219
0.32249815913581226
0.32252176039218206
0.32254324818743063
0.32256283493698107
0.32258070692435065
0.3225970387325917
0.3226119980415239
0.322625745368742
0.3226384319652169
0.32265019765826863
0.3226611692835548
0.32267145957097
0.3226811659211415
0.32269036836411585
0.3226991261062232
0.32270747252405985
0.3227154094426258
0.3227229031837465
0.32272988687106613
0.3227362744197289
0.3227419889521814
0.3227470002539846
0.32275135531703975
0.3227551824643601
0.3227586613182756
0.32276197240283183
0.32276525289471264
0.32276857750543586
0.3227719648351581
0.3227753969249716
0.32277883940346674
0.3227822558361521
0.32278561551026963
0.3227888964074382
0.322792085387534
0.3227951770494241
Accuracy 0.5

如果你们能帮助我,那真棒!

1 个答案:

答案 0 :(得分:1)

您的派生函数可能有一些错误。

def derivative_activation(z):
    """
        compute the derivative of the activation (derivative of sigmoide)
    """
    return activation(z) * (1 - activation(z))

假设您在最后一个输出层有out_F = sigmod(in_F),其中out_F是您的prediction,而in_F被输入到您的最后一个节点。
正如您的函数名称所暗示的那样,此处针对此功能可能是指 对in_F进行衍生。所以应该是d{out_F}/d{in_F} = out_F * (1 - out_F)

尝试一下:

def derivative_activation(z):
    """
        compute the derivative of the activation (derivative of sigmoide)
    """
    return z * (1 - z)