Backprop神经网络学习简单的函数,但不学习sklearn虹膜数据集

时间:2013-06-26 01:53:28

标签: python neural-network

我使用NumPy在Python中编写了一个反向传播神经网络,用于矩阵计算和批量更新。它可以很好地学习像XOR这样的二进制函数,但是当我在iris数据集(在sklearn.datasets中)中使用one-vs-all(我的目标函数为y = iris.target == 1)进行训练时,它会在将所有1或全部-1放在输出上。我已经尝试在[0.01,20],[3,20]节点中的隐藏层大小以及高达5万个时期中学习速率而没有任何改进。

以下是NN的重要代码。 _sigmoid是numpy的tanh函数,_dsigmoid是它的衍生物。我真的很感激任何帮助!

def __init__(self, n_input, n_hidden, n_output):
    self.n_input = n_input + 1
    self.n_hidden = n_hidden
    self.n_output = n_output
    self.w1 = np.random.normal(scale=0.7, size=(self.n_input*self.n_hidden)).reshape(self.n_input, self.n_hidden)
    self.w2 = np.random.normal(scale=0.7, size=(self.n_hidden*self.n_output)).reshape(self.n_hidden, self.n_output)
    self.output_activation = np.zeros(n_output)
    self.hidden_activation = np.zeros(n_hidden)
    self.input_activation = np.zeros(n_input)

def feed_forward(self):
    """
    Update output vector created by feed-forward propagation of input activations
    """
    self.hidden_activation = self._sigmoid(np.dot(self.input_activation, self.w1))
    self.output_activation = self._sigmoid(np.dot(self.hidden_activation, self.w2))

def back_propagate(self, target, alpha):        
    output_error = target - self.output_activation
    output_delta = output_error * self._dsigmoid(self.output_activation)

    hidden_error = np.dot(output_delta, self.w2.T)
    hidden_delta = hidden_error * self._dsigmoid(self.hidden_activation)

    self.w2 += alpha * (np.dot(self.hidden_activation.T, output_delta))
    self.w1 += alpha * (np.dot(self.input_activation.T, hidden_delta))

def train(self, data, target, alpha, epochs=50):
    m = data.shape[0]

    # add bias to input
    X = np.ones((m, self.n_input))
    X[:, 1:] = data

    # turn target into a column vector
    target = target[:, np.newaxis]

    for epoch in range(epochs):
        self.input_activation = X
        self.feed_forward()
        self.back_propagate(target, alpha)

def predict(self, data):
    m = data.shape[0]
    self.input_activation = np.ones((m, self.n_input))
    self.input_activation[:, 1:] = data
    self.feed_forward()
    return self.output_activation

1 个答案:

答案 0 :(得分:0)

这对我有用:

import numpy as np
import sklearn.datasets
import math

class NN():
    def __init__(self, n_input, n_hidden, n_output):
        self.n_input = n_input + 1
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.w1 = np.random.normal(scale=0.7, size=(self.n_input*self.n_hidden)).reshape(self.n_input, self.n_hidden)
        self.w2 = np.random.normal(scale=0.7, size=(self.n_hidden*self.n_output)).reshape(self.n_hidden, self.n_output)
        self.output_activation = np.zeros(n_output)
        self.hidden_activation = np.zeros(n_hidden)
        self.input_activation = np.zeros(n_input)

    def _sigmoid(self,x):
        return(1/(1+math.e**(-x))) #sigmoid
        #return(np.tanh(x.astype(float))) #tanh

    def _dsigmoid(self,x):
        return(x*(1-x)) #sigmoid
        #return(1-x**2) #tanh

    def feed_forward(self):
        """
        Update output vector created by feed-forward propagation of input activations
        """
        self.hidden_activation = self._sigmoid(np.dot(self.input_activation, self.w1))
        self.output_activation = self._sigmoid(np.dot(self.hidden_activation, self.w2))

    def back_propagate(self, target, alpha):        
        output_error = (target - self.output_activation)
        output_delta = output_error * self._dsigmoid(self.output_activation)

        hidden_error = np.dot(output_delta, self.w2.T)
        hidden_delta = hidden_error * self._dsigmoid(self.hidden_activation)

        self.w2 += alpha * (np.dot(self.hidden_activation.T, output_delta))
        self.w1 += alpha * (np.dot(self.input_activation.T, hidden_delta))

    def train(self, data, target, alpha, epochs=50):
        m = data.shape[0]

        # add bias to input
        X = np.ones((m, self.n_input))
        X[:, 1:] = data

        # turn target into a column vector
        target = target[:, np.newaxis]

        for epoch in range(epochs):
            self.input_activation = X
            self.feed_forward()
            self.back_propagate(target, alpha)

    def predict(self, data):
        m = data.shape[0]
        self.input_activation = np.ones((m, self.n_input))
        self.input_activation[:, 1:] = data
        self.feed_forward()
        return self.output_activation

iris = sklearn.datasets.load_iris()
data = iris['data']
targets = iris['target']
for i,t in enumerate(targets):
    if t!=1:
        targets[i] = 0
network = NN(4,3,1)
network.train(data,targets,0.01,epochs=10000)
print(network.predict(data))
print(targets)

我将函数更改为Sigmoid,因为当目标介于0和1之间时,它更有意义。问题可能出在数据准备中或其他您未共享的问题上。不过,这似乎不太可能,因为我无法使其与tanh一起使用,并且结果根据隐藏的神经元的数量而有很大差异。我认为您应该查看您的反向传播代码,然后尝试进行梯度检查。