神经网络收敛到最小值,但仅在提供所有训练数据时有效

时间:2017-07-22 05:11:31

标签: r neural-network backpropagation perceptron feed-forward

我是神经网络的新手,我编写了以下前馈神经网络代码,以执行3位二进制计数器。我在代码输出中发现的是当我尝试使用任何一个3位输入进行训练时,几乎总是预测下一个状态错误,但是当我将cntr_inp(包含所有输入的矩阵)作为输入传递时,它会预测下一个国家对相应的国家的权利。我现在无法找出发生了什么并且停留了一段时间。 如果有人能够发现我正在做的错误,我会有所帮助。

感谢。

    #Sigmoid Function
    sigmoid <- function(z){
      1.0/(1.0+exp(-z))  
    }

    #Derivative of Sigmoid
    sigmoid.derivative <- function(z){
      z*(1-z)
    }

    #Randomly Generates Weights
    create.weights <- function(layers){
      weights <- vector("list",length(layers)-1)
      for(i in 1:length(weights)){
        weights[[i]] <- matrix(runif(layers[i]*layers[i+1]),nrow = layers[i],ncol = layers[i+1])
      }
      return(weights)
    }

    #Randomly Generates Biases
    create.biases <- function(layers){
      biases <- vector("list",length(layers)-1)
      for(i in 1:length(biases)){
        biases[[i]] <- runif(layers[i+1])
      }
      return(biases)
    }

    #Feedforward / Forward Propogation
    feedforward <- function(inp,weights,biases){
      layer <- vector("list",length(layers)-1)
      for(i in 1:(length(layers)-1)){
        if(i==1){
          layer[[i]] <- inp %*% weights[[i]] + biases[[i]]
          layer_dim <- dim(layer[[i]])
          layer[[i]] <- matrix(sigmoid(as.numeric(layer[[i]])),nrow = layer_dim[1],ncol = layer_dim[2],byrow = FALSE)
        }else{
          layer[[i]] <- layer[[i-1]] %*% weights[[i]] + biases[[i]]
          layer_dim <- dim(layer[[i]])
          layer[[i]] <- matrix(sigmoid(as.numeric(layer[[i]])),nrow = layer_dim[1],ncol = layer_dim[2],byrow = FALSE)
        }
      }
      return(layer)
    }

    #Calculating Delta Values
    calculate_delta <- function(net_out,out,weights){
      Slope <- lapply(net_out,sigmoid.derivative)
      delta <- vector("list",length(Slope))
      for(i in length(Slope):1){
        if(i == length(Slope)){
          delta[[i]] <-  (out-net_out[[i]]) * Slope[[i]]
        }else{
          Error_hidden <- delta[[i+1]] %*% t(weights[[i+1]])
          delta[[i]] <- Error_hidden * Slope[[i]]
        }
      }
      return(delta)
    }

    #Updating Weights
    updating_weights <- function(weights,inp,delta,net_out,step.size){
      for(i in length(weights):1){
        if(i != 1){
          weights[[i]] <- weights[[i]] + t(net_out[[i-1]]) %*% delta[[i]] * step.size
        }else{
          weights[[i]] <- weights[[i]] + t(inp) %*% delta[[i]] * step.size
        }

      }
      return(weights)
    }

    #Updating Biases
    updating_biases <- function(biases,delta,step.size){
      for(i in 1:length(biases)){
        biases[[i]] <- biases[[i]] + colSums(delta[[i]]) * step.size
      }
      return(biases)
    }


    #Complete Neural Net operations (Forward and Backward Propogation)
    Neural.Net <- function(layers = NULL,inp,out,epoch,step.size = 0.01,error.threshold = 0.01){
      weights <- create.weights(layers)
      biases <- create.biases(layers)
      for(i in 1:epoch){
        ##Back Propogation
        net_out <- feedforward(inp,weights,biases)
        delta <- calculate_delta(net_out,out,weights)
        weights <- updating_weights(weights,inp,delta,net_out,step.size)
        biases <- updating_biases(biases,delta,step.size)
        avg_error <- mean(abs(out - net_out[[length(net_out)]]))

        ##Printing Output for every epoch
        cat("\014")
        cat("------- Feed Forward Neural Nets -------\n")
        cat("Inputs: ",layers[1],"\n")
        cat("Outputs: ",layers[length(layers)],"\n")
        cat("Hidden Layers:", length(layers)-2,"\n")
        cat(paste("Epoch :",i," Avg_error = ",avg_error,"\n"))
        cat("Output Values:\n\n")
        print(net_out[[length(net_out)]])
        Sys.sleep(0.002)
        ##

        if(avg_error <= error.threshold){
          message("Optimum values found")
          break
        }
      }
      return(list(weights = weights,biases = biases,Net_out = net_out))
    }

    Neural.Net.Predict <- function(Model,Test.inp){
      layer <- feedforward(inp = Test.inp,weights = Model$weights,biases = Model$biases)
      return(layer[length(layer)])
    }


    #Input and Hyper parameters to Neural Network
    layers <- c(3,7,3)
    cntr_inp <- matrix(c(0,0,0,
                         0,0,1,
                         0,1,0,
                         0,1,1,
                         1,0,0,
                         1,0,1,
                         1,1,0,
                         1,1,1),byrow = T,nrow = 8,ncol = 3)

    cntr_out <- matrix(c(0,0,1,
                         0,1,0,
                         0,1,1,
                         1,0,0,
                         1,0,1,
                         1,1,0,
                         1,1,1,
                         0,0,0),byrow = T,nrow = 8,ncol = 3)

    Output <- Neural.Net(layers = layers,inp = cntr_inp,out = cntr_out,epoch  = 100000,step.size = 0.8,error.threshold = 0.01)

    ###Predicts wrong when one input point is provided
    # I don't understand why this is predicting wrong. Need help understanding here
    Neural.Net.Predict(Output,matrix(c(1,0,0),ncol = 3,byrow = TRUE))

    ###Predicts right for all inputs
    Neural.Net.Predict(Output,cntr_inp)

0 个答案:

没有答案