我正在使用R的neuralnet
包来运行神经网络。当我在nn中使用多个因变量时,我遇到了问题。
让我们从名为df
的示例开始。
df <- structure(list(DC1 = c(4, 5, 6, 8, 11, 11, 11, 6, 8, 5, 9, 10,
6, 11, 15, 13, 17, 6, 7, 9, 9, 9, 10, 10, 13, 15), DC2 = c(10,
7, 11, 11, 12, 8, 10, 5, 8, 5, 9, 7, 6, 13, 16, 13, 16, 7, 8,
13, 12, 9, 13, 13, 15, 13), DC3 = c(9, 8, 10, 10, 9, 11, 9, 6,
9, 8, 9, 7, 7, 13, 14, 11, 9, 11, 12, 13, 12, 10, 12, 11, 11,
11), DC4 = c(12, 9, 11, 11, 8, 13, 12, 7, 9, 10, 8, 8, 10, 12,
12, 10, 10, 13, 11, 12, 11, 14, 14, 11, 13, 14), DC5 = c(14,
12, 9, 14, 10, 14, 13, 11, 10, 10, 10, 12, 14, 14, 12, 14, 16,
14, 12, 14, 11, 16, 15, 11, 19, 17), DC6 = c(10, 12, 10, 18,
11, 15, 16, 12, 12, 11, 13, 13, 14, 16, 19, 19, 22, 15, 14, 15,
14, 14, 16, 15, 20, 23), DC7 = c(20, 21, 12, 17, 15, 19, 17,
11, 16, 15, 17, 15, 15, 25, 22, 20, 18, 15, 13, 20, 20, 19, 24,
17, 29, 24), DC8 = c(21, 15, 20, 17, 21, 17, 19, 15, 21, 14,
20, 19, 15, 30, 26, 24, 22, 16, 19, 21, 32, 27, 23, 28, 33, 30
), DC9 = c(28, 26, 22, 17, 23, 16, 18, 18, 18, 22, 25, 25, 23,
32, 27, 32, 27, 22, 24, 22, 29, 33, 26, 21, 31, 30), DC10 = c(30,
33, 23, 43, 26, 30, 22, 34, 23, 23, 36, 37, 29, 29, 30, 50, 31,
25, 27, 23, 30, 36, 35, 38, 18, 33)), .Names = c("DC1", "DC2",
"DC3", "DC4", "DC5", "DC6", "DC7", "DC8", "DC9", "DC10"), row.names = c(165L,
167L, 168L, 172L, 174L, 176L, 177L, 236L, 246L, 260L, 263L, 277L,
280L, 281L, 282L, 302L, 315L, 321L, 322L, 331L, 332L, 333L, 335L,
336L, 339L, 340L), class = "data.frame")
测试序列数据集中的I子集(截止值为0.8),并使用最小-最大方法对数据进行归一化。
index <- sample(1:nrow(df), round(0.8 * nrow(df)))
train <- df[index, ]
test <- df[-index, ]
# Normalization (min-max)
maxs = apply(df, 2, max)
mins = apply(df, 2, min)
scaled = as.data.frame(scale(df, center = mins, scale = maxs - mins))
# train and test samples
train.cv <- scaled[index, ]
test.cv <- scaled[-index, ]
现在,我想预测三个变量(DC1
,DC2
,DC3
)。因此这些变量将成为神经网络中的因变量。因为它们是数字,所以我将运行linear.output = T
。
nn <- neuralnet(DC1 + DC2 + DC3 ~ ., data=train.cv, hidden = 3, linear.output = T)
到目前为止,我还没有遇到任何问题。现在是我遇到麻烦了。
由于我有3个因变量,因此我需要在测试集中预测它们的值,对所有变量进行缩放,绘制cv误差并计算准确性。
如果我仅使用DC1
作为因变量运行nn,则可以计算上述所有问题。但是,有了三个因变量,我遇到了很多问题。
仅复制一个效果良好的因变量(DC1
)的代码,然后复制。任何人都可以改编这段代码以考虑三个因变量吗?
pr.nn <- neuralnet::compute(nn, test.cv[, 2:10]) # prediction of the neural network (DC4:DC10)
# unscale
pr.nn <- pr.nn$net.result*(max(df$DC1) - min(df$DC1)) + min(df$DC1)
test.cv.r <- (test.cv$DC1)*(max(df$DC1) - min(df$DC1)) + min(df$DC1)
cv.error[i] <- sum((test.cv.r - pr.nn)^2)/nrow(test.cv)
# Summary and plot of the neural network
nn$result.matrix
nnplot <- plot(nn)
tr.mean.cv.error <- mean(cv.error, trim = .1) # trimmed mean
# Boxplot of the cv.error values
bx_cv.error <- ggplot(data.frame(cv.error), aes(y = cv.error, x = 0)) +
geom_boxplot() +
ggtitle("CV error (MSE) for NN") +
ylab("CV error (MSE)") + xlab("") +
geom_point(alpha = .5) +
geom_point(aes(y = mean(data.frame(cv.error)$cv.error)),
pch = 4, size = 3, color = "red") +
theme_minimal()
# Vemos los valores predichos y los reales
results <- data.frame(real = test$DC1, prediction = pr.nn)
round(results)
# Accuracy
predicted <- results$Irediction * abs(diff(range(df$DC1))) + min(df$DC1)
real <- results$real * abs(diff(range(df$DC1))) + min(df$DC1)
comparison <- data.frame(predicted,real)
deviation <- ((real - predicted) / real)
comparison <- data.frame(predicted, real, deviation)
accuracy <- 1 - abs(mean(deviation))