Question

我一直试图在R中实施反向传播，但我得到了一些奇怪的结果。似乎在反向提升的1000次迭代之后，程序为所有值预测1。我希望它在测试函数中是一个问题，但是对较少数量的迭代进行测试表明，在某些情况下，0被预测为输出值。似乎在迭代数据集的某个地方，权重更新倾向于增加，而它们应倾向于减少错误。

我很抱歉代码难以阅读。我和合作伙伴一起工作，我不喜欢他给变量命名的方式。它也没有像我想的那样完全评论。任何帮助表示赞赏

# initialize a global output vector and a global vector of data frames
createNeuralNet <- function(numberOfInputNodes,hiddenLayers,nodesInHiddenLayer){
  L <<- initializeWeightDataFrames(numberOfInputNodes,nodesInHiddenLayer,hiddenLayers)
  # print(L)
  OutputList <<- initializeOutputVectors(hiddenLayers)
}

# creates a list of weight data frames
# each weight data frame uses the row as an index of the "tail" for a connection
# the "head" of the connection (where the arrow points)  is in the column index
# the value in the weight data frame is the weight of that connection 
# the last row is the weight between the bias and a particular node
initializeWeightDataFrames <- function(numberOfInputNodes, nodesPerHiddenLayer, numberOfHiddenLayers) {
  weights <- vector("list", numberOfHiddenLayers + 1)

  # this code simply creates empty data frames of the proper size so that they may 
  first <- read.csv(text=generateColumnNamesCSV(nodesPerHiddenLayer))
  middle <- read.csv(text=generateColumnNamesCSV(nodesPerHiddenLayer))
  # assume binary classifier, so output layer has 1 node
  last <- read.csv(text=generateColumnNamesCSV(1))

  first <- assignWeights(first, numberOfInputNodes + 1)
  weights[[1]] <- first

  # assign random weights to each row
  if (numberOfHiddenLayers != 1) {
    for (i in 1:numberOfHiddenLayers - 1) {
      middle <- assignWeights(middle, nodesPerHiddenLayer + 1)
      weights[[i+1]] <- middle
    }

  }

  last <- assignWeights(last, nodesPerHiddenLayer + 1)
  weights[[length(weights)]] <- last

  return(weights)
}

# generate a comma-separated string of column names c1 thru cn for creating arbitrary size data frame
generateColumnNamesCSV <- function(n) {
  namesCSV <- ""
  if (n==1) {
    return("c1")
  }
  for (i in 1:(n-1)) {
    namesCSV <- paste0(namesCSV, "c", i, ",")
  }
  namesCSV <- paste0(namesCSV, "c", n)
  return(namesCSV)
}

assignWeights <- function(weightDF, numRows) {

  modifiedweightDF <- weightDF

  for (rowNum in 1:numRows) {
    # creates a bunch of random numbers from -1 to 1, used to populate a row
    rowVector <- runif(length(weightDF))
    for (i in 1:length(rowVector)) {
      sign <- (-1)^round(runif(1))
      rowVector[i] <- sign * rowVector[i]
    }
    modifiedweightDF[rowNum,] <- rowVector
  }

  return(modifiedweightDF)
}

# create an empty list of the right size, will hold vectors of node outputs in the future
initializeOutputVectors <- function(numberOfHiddenLayers) {
  numberOfLayers <- numberOfHiddenLayers + 1
  outputVectors <- vector("list", numberOfLayers)
  return(outputVectors)
}

# this is the main loop that does feed-forward and back prop
trainNeuralNet <- function(trainingData,target,iterations){
  count <- 0
  # iterations is a constant for how many times the dataset should be iterated through
  while(count<iterations){
    print(count)
    for(row in 1:nrow(trainingData)) {  # for each row in the data set
      #Feed Forward

      # instance is the current row that's being looked at
      instance <- trainingData[row,]
      # print(instance)
      for (l in 1:length(L)) { # for each weight data frame 
        # w is the current weights
        w <- L[[l]]
        #print(w)
        Output <- rep(NA, length(w))
        if (l!=1) {
          # x is the values in the previous layer
          # can't access the previous layer if you're on the first layer
          x <- OutputList[[l-1]]
          #print(x)
        }

        for (j in 1:ncol(w)) { # for each node j in the "head" layer
          s <- 0

          for (i in 1:(nrow(w)-1)) { 
            # calculate the weighted sum s of connection weights and node values
            # this is used to calculate a node in the next layer

            # check the instance if on the first layer
            if (l==1) {
              # print(i)
              # print(instance[1,i])
              # i+1 skips over the target column
              s <- s + instance[1,i+1]*w[i,j]
              # print(s)

            # if the layer is 2 or more 
            }else{
              # print(i)
              #print(j)
              #  print(w)

              # print(w[i,j])
              s <- s + x[i]*w[i,j] # weighted sum
              # sigmoid activation function value for node j
            }
          }
          #print(s)
          s <- s + w[nrow(w),j] # add weighted bias
  #        print("s")
   #       print(s)
    #      print("sigmoid s")
     #     print(sigmoid(s))

          Output[j] <- sigmoid(s)

        }
        OutputList[[l]] <- Output
      }
      # print(OutputList)
      #  print("w")
      # print(L)
      # print("BAck prop Time")
      #Back Propagation
      out <- OutputList[length(OutputList)]
      #print(OutputList)
      outputError <- rep(NA, length(w))
      outputErrorPresent <- rep(NA, length(w))
      outputError[1] <- out[[1]]*(1-out[[1]])*(out[[1]]-target[row])


      for (h in (length(L)):1) { # for each weight matrix in hidden area h (going backwards)

        hiddenOutput <- OutputList[h]
        #print("hiddenOutput")
        #print(h)
        if (row==1||row==2) {
          #    print("h")
          #   print(h)
          #  print("output error Present")
          # print(outputErrorPresent)
        }

        if (h!=(length(L))) {
          outputError <- outputErrorPresent
        }


        w <- L[[h]]

        for (j in 1:(nrow(w))) { # for each node j in hidden layer h
          #print("length w")
          #print(length(w))
          if (row==1||row==2) {
            #     print("j")
            #    print(j)
          }
          errSum <- 0
          nextLayerNodes <- L[[h]]
          # print(nextLayerNodes)
          #print(class(nextLayerNodes))

          for (k in 1:ncol(nextLayerNodes)) {
            errSum <- errSum + outputError[k]*nextLayerNodes[j,k]
          }


          m <- 0
          if (h == 1) {
            m <- as.numeric(instance)
            m <- m[-1]

          } else {
            m <- OutputList[h-1][[1]]
          }


          deltaWeight <- 0
          for (k in 1:ncol(nextLayerNodes)) {
            hiddenNodeError <- hiddenOutput[[1]][k]*(1- hiddenOutput[[1]][k])*errSum

            if (j == nrow(w)) {
              deltaWeight <- learningRate*hiddenNodeError
            } else {
              deltaWeight <- learningRate*hiddenNodeError*m[j] 
            }

            # print(deltaWeight)
            w[j,k] <- w[j,k] + deltaWeight
          }

          if (j != nrow(w)) {
            outputErrorPresent[j] <- hiddenNodeError
          }
        }


        L[[h]] <<- w
      }


      # print(OutputList)
    }
    count <- count +1
    #  print(L)
    #calculate global error

  }
  ########################repeat
  # print("w")


}

sigmoid <- function(s){
  sig <- 1/(1+exp(-s))
  return(sig)
}

testNeuralNetwork <- function(testingData,testTarget){
  correctCount <- 0
  # run the same code as feed forward
  # this time run it on testing examples and compare the outputs
  for(row in 1:nrow(testingData)) {  # for each test instance
    #Feed Forward
    instance <- testingData[row,]
    #print(instance)
    for (l in 1:length(L)) { # for each layer l
      w <- L[[l]]
      #print(w)
      Output <- rep(NA, length(w))
      if (l!=1) {
        x <- OutputList[[l-1]]
        #print(x)
      }

      for (j in 1:ncol(w)) { # for each node j in layer l
        s <- 0

        for (i in 1:(nrow(w)-1)) {

          if (l==1) {
            # i+1 skips over the target column
            s <- s + instance[1,i+1]*w[i,j]
            # print(s)
          }else{
            # print(i)
            #print(j)
            #  print(w)

            # print(w[i,j])
            s <- s + x[i]*w[i,j] # weighted sum
            # sigmoid activation function value for node j
          }
        }
        #print(s)
        s <- s + w[nrow(w),j] # add weighted bias

        Output[j] <- sigmoid(s)
        #print(sigmoid(s))

      }
      OutputList[[l]] <- Output
    }
    # print(OutputList)

    outputVal <- threshold(OutputList[[length(OutputList)]])
    if (outputVal==testTarget[row]) {
      print(paste0(" ", outputVal, " Correct!"))
      correctCount <- correctCount + 1
    }else{
      print(paste0(" ", outputVal, " Wrong."))
    }
    #print()
    #print(paste0("s2 ",str))
  }
}

# convert real-valued output to a binary classification
threshold <- function(value){
  if (value>=0.5) {
    return(1)
  }else{
    return(0)
  }
}

# this modifies df by removing 30 random rows
# this means that the same df will be changed permanently, so be careful of that
# it also returns the 30 random rows as a test set
makeTestSet <- function(df, size) {
  len <- 1:length(df[,1])
  randRows <- sample(len, size, replace=F)
  return(randRows)
}

Data <- read.csv(file = "Downloads/numericHouse-votes-84.csv", head = TRUE, sep = ",")
learningRate <<- 0.1

# assume that the first column of the data is the column that is to be predicted
# thus the number of inputs is 1 less than the number of columnns
numberOfInputNodes <- ncol(Data) - 1

randRows <- makeTestSet(Data,30) #change this to 30
testData <- Data[randRows,]
trainingData <- Data[-randRows,]
testTarget <- testData[,1]
#trainingData <- Data[,1:numberOfInputNodes]
trainingTarget <- trainingData[,1]
createNeuralNet(numberOfInputNodes,1,numberOfInputNodes)
iterations <- 100
trainNeuralNet(trainingData,trainingTarget,iterations)
testNeuralNetwork(testData,testTarget)
L

r

0 个答案: