如何用R反向传播更新多层NN中的权重和偏差?

时间:2018-01-26 20:19:26

标签: r neural-network deep-learning

我正在尝试用R创建一个简单的神经网络 我想让我的代码告诉我,如果这个虹膜是彩色或者是virginica这个类型不是线性可分的,在单层NN中它很简单但在多层NN中我不知道如何更新重量和偏差。 /> 这是我做的代码,它给了我所有数据的相同预测。 我正在使用4输入一个隐藏层,有3个节点和一个输出。

data = matrix(
  c(iris[1,1:4],
  iris[2,1:4],
  iris[3,1:4],
  iris[4,1:4],
  iris[5,1:4],

  iris[51,1:4],   
  iris[52,1:4],
  iris[53,1:4],
  iris[54,1:4],
  iris[55,1:4],

  iris[101,1:4],
  iris[102,1:4],
  iris[103,1:4],
  iris[104,1:4],
  iris[105,1:4]),
  ncol = 4,
  byrow = TRUE
)

dataOut <- c(0,0,0,0,0,1,1,1,1,1,0,0,0,0,0) # 1 => versicolor

sigmoid <- function(x){
  1/(1+exp(-x))
}

sigmoid_p <- function(x){
  sigmoid(x) * (1 - sigmoid(-x))
}

l_rate = 0.01

w1 <- runif(12)
w2 <- runif(3)

b1 <- runif(3)
b2 <- runif(1)

sH <- c(0,0,0)
outH <- c(0,0,0)

for(i in 1:5000){ 
  ri = as.integer(runif(1,1,16))#ri => random number
  p <- c(as.double(data[ri,]))

  #value of 1st node in hidden layer
  sH[1] = p[1]*w1[1] + p[2]*w1[2]  + p[3]*w1[3] + p[4]*w1[4]  + b1[1]
  outH[1] = sigmoid(sH[1])

  #value of 2nd node in hidden layer
  sH[2] = p[1]*w1[5] + p[2]*w1[6]  + p[3]*w1[7] + p[4]*w1[8]  + b1[2]
  outH[2] = sigmoid(sH[2])

  #value of 3th node in hidden layer
  sH[3] = p[1]*w1[9] + p[2]*w1[10]  + p[3]*w1[11] + p[4]*w1[12]  + b1[3]
  outH[3] = sigmoid(sH[3])

  #value of the output
  s = outH[1]*w2[1] + outH[2]*w2[2]  + outH[3]*w2[3] + b2
  out = sigmoid(s)

  error_P = out - dataOut[ri]
  out_P = sigmoid_p(s)

  w2_p <- c(0,0,0)
  w2_p[1] = error_P * out_P * outH[1]
  w2_p[2] = error_P * out_P * outH[2]
  w2_p[3] = error_P * out_P * outH[3]

  w1_p <- c(0,0,0,0,0,0,0,0,0,0,0,0)
  w1_p[1] = error_P * out_P * w2[1] * sigmoid_p(sH[1]) * p[1]
  w1_p[2] = error_P * out_P * w2[1] * sigmoid_p(sH[1]) * p[2]
  w1_p[3] = error_P * out_P * w2[1] * sigmoid_p(sH[1]) * p[3]
  w1_p[4] = error_P * out_P * w2[1] * sigmoid_p(sH[1]) * p[4]

  w1_p[5] = error_P * out_P * w2[2] * sigmoid_p(sH[2]) * p[1]
  w1_p[6] = error_P * out_P * w2[2] * sigmoid_p(sH[2]) * p[2]
  w1_p[7] = error_P * out_P * w2[2] * sigmoid_p(sH[2]) * p[3]
  w1_p[8] = error_P * out_P * w2[2] * sigmoid_p(sH[2]) * p[4] 

  w1_p[9] = error_P * out_P * w2[3] * sigmoid_p(sH[3]) * p[1]
  w1_p[10] = error_P * out_P * w2[3] * sigmoid_p(sH[3]) * p[2]
  w1_p[11] = error_P * out_P * w2[3] * sigmoid_p(sH[3]) * p[3]
  w1_p[12] = error_P * out_P * w2[3] * sigmoid_p(sH[3]) * p[4]

  for(j in 1: 3){
    w2[j] = w2[j] - l_rate * w2_p[j]
  }

  b2 = b2 - l_rate * error_P * out_P

  for(j in 1: 12){
    w1[j] = w1[j] - l_rate * w1_p[j]
  }
  for(j in 1: 3){
    b1[j] = b1[j] - l_rate  * error_P * sigmoid_p(sH[j]) 
  }

}



for(i in 1:15){
  p <- c(as.double(data[i,]))

  sH[1] = p[1]*w1[1] + p[2]*w1[2]  + p[3]*w1[3] + p[4]*w1[4]  + b1[1]
  outH[1] = sigmoid(sH[1])

  sH[2] = p[1]*w1[5] + p[2]*w1[6]  + p[3]*w1[7] + p[4]*w1[8]  + b1[2]
  outH[2] = sigmoid(sH[2])

  sH[3] = p[1]*w1[9] + p[2]*w1[10]  + p[3]*w1[11] + p[4]*w1[12]  + b1[3]
  outH[3] = sigmoid(sH[3])

  s = outH[1]*w2[1] + outH[2]*w2[2]  + outH[3]*w2[3] + b2
  out = sigmoid(s)

  if(out < 0.5)
    print(c(out,"Not Vericolor"))
  else
    print(c(out,"Vericolor"))
}

0 个答案:

没有答案