Question

我实现了这个简单的NN，但是即使进行所有交互，它也无法收敛并且MSE仍然很高

我尝试更改迭代次数和学习率，但不起作用

rm(list=ls())
data <- read.csv("C:/Users/Mikele/Documents/Uni/IA AI & Machine Learning/R/11_23_2018/wine.csv",sep = ',',header = FALSE)

x <- data[,1:11]
y <- as.matrix(data[,12])

y_matrix <- matrix(rep(0,length(y)),nrow = length(y), ncol = 6)
k <-1

for (w in 1:length(y)) 
{
  temp <- y[k] - 2
  y_matrix[k,temp] <-1
  k <- k + 1
}

hl <- c(40, 30, 20)
iter <- 1000
lr <- 0.1

## add in intercept
x_1 <- as.matrix(cbind(rep(1, nrow(x)),x))

## set error array
error <- rep(0, iter)

## set up weights
## the +1 is to add in the intercept/bias parameter
W1 <- matrix(runif(ncol(x_1)*hl[1], -1, 1), nrow = ncol(x_1))
W2 <- matrix(runif((hl[1]+1)*hl[2], -1, 1), nrow = hl[1]+1)
W3 <- matrix(runif((hl[2]+1)*hl[3], -1, 1), nrow = hl[2]+1)
W4 <- matrix(runif((hl[3]+1)*ncol(y), -1, 1), nrow = hl[3]+1)

for(k in 1:iter)
{
  # calculate the hidden and output layers using X and hidden layer as inputs
  # hidden layer 1 and 2 have a column of ones appended for the bias term
  hidden1 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(x_1 %*% W1))
  hidden2 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(hidden1 %*% W2))
  hidden3 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(hidden2 %*% W3))
  y_hat <- sigm(hidden3 %*% W4)

  # calculate the gradient and back prop the errors
  # see theory above
  y_hat_del <- (y-y_hat)*(d.sigm(y_hat))
  hidden3_del <- y_hat_del %*% t(W4)*d.sigm(hidden3)
  hidden2_del <- hidden3_del[,-1] %*% t(W3)*d.sigm(hidden2)
  hidden1_del <- hidden2_del[,-1] %*% t(W2)*d.sigm(hidden1)

  # update the weights
  W4 <- W4 + lr*t(hidden3) %*% y_hat_del
  W3 <- W3 + lr*t(hidden2) %*% hidden3_del[,-1]
  W2 <- W2 + lr*t(hidden1) %*% hidden2_del[,-1]
  W1 <- W1 + lr*t(x_1) %*% hidden1_del[,-1]

  error[k] <- 1/nrow(y)*sum((y-y_hat)^2)
  if((k %% (10^4+1)) == 0) cat("mse:", error[k], "\n")      
}

# plot loss
xvals <- seq(1, iter, length = 100)
print(qplot(xvals, error[xvals], geom = "line", main = "MSE", xlab = "Iteration"))

没有错误消息，但我不明白如何为多元线性回归另外，我将ys分成了一个6列的矩阵（初始数据集的最大值和最小值），现在有人可以帮助我理解为什么不进行覆盖，无论如何最终结果都集中在第4列？

深度NN用于多元回归

0 个答案: