我实现了这个简单的NN,但是即使进行所有交互,它也无法收敛并且MSE仍然很高
我尝试更改迭代次数和学习率,但不起作用
rm(list=ls())
data <- read.csv("C:/Users/Mikele/Documents/Uni/IA AI & Machine Learning/R/11_23_2018/wine.csv",sep = ',',header = FALSE)
x <- data[,1:11]
y <- as.matrix(data[,12])
y_matrix <- matrix(rep(0,length(y)),nrow = length(y), ncol = 6)
k <-1
for (w in 1:length(y))
{
temp <- y[k] - 2
y_matrix[k,temp] <-1
k <- k + 1
}
hl <- c(40, 30, 20)
iter <- 1000
lr <- 0.1
## add in intercept
x_1 <- as.matrix(cbind(rep(1, nrow(x)),x))
## set error array
error <- rep(0, iter)
## set up weights
## the +1 is to add in the intercept/bias parameter
W1 <- matrix(runif(ncol(x_1)*hl[1], -1, 1), nrow = ncol(x_1))
W2 <- matrix(runif((hl[1]+1)*hl[2], -1, 1), nrow = hl[1]+1)
W3 <- matrix(runif((hl[2]+1)*hl[3], -1, 1), nrow = hl[2]+1)
W4 <- matrix(runif((hl[3]+1)*ncol(y), -1, 1), nrow = hl[3]+1)
for(k in 1:iter)
{
# calculate the hidden and output layers using X and hidden layer as inputs
# hidden layer 1 and 2 have a column of ones appended for the bias term
hidden1 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(x_1 %*% W1))
hidden2 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(hidden1 %*% W2))
hidden3 <- cbind(matrix(1, nrow = nrow(x_1)), sigm(hidden2 %*% W3))
y_hat <- sigm(hidden3 %*% W4)
# calculate the gradient and back prop the errors
# see theory above
y_hat_del <- (y-y_hat)*(d.sigm(y_hat))
hidden3_del <- y_hat_del %*% t(W4)*d.sigm(hidden3)
hidden2_del <- hidden3_del[,-1] %*% t(W3)*d.sigm(hidden2)
hidden1_del <- hidden2_del[,-1] %*% t(W2)*d.sigm(hidden1)
# update the weights
W4 <- W4 + lr*t(hidden3) %*% y_hat_del
W3 <- W3 + lr*t(hidden2) %*% hidden3_del[,-1]
W2 <- W2 + lr*t(hidden1) %*% hidden2_del[,-1]
W1 <- W1 + lr*t(x_1) %*% hidden1_del[,-1]
error[k] <- 1/nrow(y)*sum((y-y_hat)^2)
if((k %% (10^4+1)) == 0) cat("mse:", error[k], "\n")
}
# plot loss
xvals <- seq(1, iter, length = 100)
print(qplot(xvals, error[xvals], geom = "line", main = "MSE", xlab = "Iteration"))
没有错误消息,但我不明白如何为 多元线性回归 另外,我将ys分成了一个6列的矩阵(初始数据集的最大值和最小值),现在有人可以帮助我理解为什么不进行覆盖,无论如何最终结果都集中在第4列?>