我有4个数据集(dat,dat2,dat3,dat4),我想构建所有数据集的多个线性回归。最后,我需要一个表来根据RMSE,r²,RPD和平均误差来比较模型。对于每个数据集中的每个特征,我正在使用的代码能够用于单变量模型。在这里:
dat <- structure(list(TILLER1 = c(43, 23, 46, 30, 30, 45),
LAI1 = c(3.545, 1.5, 1.76, 1.92, 1.36, 1.27),
CHLOR1 = c(447.2, 432.8, 457.6, 449, 486.8, 455),
HEIGHT1 = c(34.8, 31.5, 26.1, 26, 40.5, 35.2 ),
DIAM1 = c(25.23, 23.9, 21.97, 20.99, 23.92, 24.01),
NDRE1 = c(0.2579, 0.1911, 0.1643, 0.2072, 0.2233, 0.2009),
NDVI1 = c(0.6495, 0.4502, 0.3643, 0.4904, 0.5625, 0.4725),
TCH = c(127.55, 142.33, 127.19, 86.64, 144.36, 155.95)),
.Names = c("TILLER1", "LAI1", "CHLOR1", "HEIGHT1", "DIAM1", "NDRE1", "NDVI1", "TCH"),
row.names = c(NA, 6L), class = "data.frame")
### RMSE
rmse <- function(error)
{
sqrt(mean(error^2))
}
# tabel of R², Erro médio, RMSE and RPD
tabel = NULL
for (i in 3:(ncol(dat)-1)) {
## Train control
fitControl <- trainControl (
method = "repeatedcv",
number = 10,
savePredictions = "final")
## Creating all models
set.seed(62433)
reg = train(TCH ~ ., data = dat[, c(i, which(colnames(dat) == "TCH"))],
method = 'lm',
trControl = fitControl,
verbose = TRUE,
importance = TRUE)
mean.error = mean(dat$TCH - data.frame(reg$pred$pred)[, 1])
rpd = sd(dat[, which(colnames(dat) == "TCH")][[1]]) / rmse(residuals(reg))
tmp = data.frame(variable = names(dat[,i]), r2 = summary(reg)$r.squared,
mean_error = mean.error, rmse = rmse(residuals(reg)), rpd = rpd)
if (is.null(tabel)) {
tabel = tmp
} else {
tabel = rbind(tabel, tmp)
}
}
tabel
答案 0 :(得分:0)
好的,你走了。首先,我修复了代码中的两个错误:
在
rpd = sd(dat[, which(colnames(dat) == "TCH")][[1]]) / rmse(residuals(reg))
您尝试计算单个值的标准偏差,返回NA
。我已移除[[1]]
以更正此问题。
在
tmp = data.frame(variable = names(dat[,i]), r2 = summary(reg)$r.squared,
mean_error = mean.error, rmse = rmse(residuals(reg)), rpd = rpd)
names(dat[,i])
返回NULL
,我已将其更改为names(dat)[i]
。
然后我将你的代码包装在一个函数中:
foo <- function(dat){
for (i in 3:(ncol(dat)-1)) {
## Train control
fitControl <- trainControl (
method = "repeatedcv",
number = 10,
savePredictions = "final")
## Creating all models
set.seed(62433)
reg = train(TCH ~ ., data = dat[, c(i, which(colnames(dat) == "TCH"))],
method = 'lm',
trControl = fitControl,
verbose = TRUE,
importance = TRUE)
mean.error = mean(dat$TCH - data.frame(reg$pred$pred)[, 1])
rpd = sd(dat[, which(colnames(dat) == "TCH")]) / rmse(residuals(reg))
tmp = data.frame(variable = names(dat)[i], r2 = summary(reg)$r.squared,
mean_error = mean.error, rmse = rmse(residuals(reg)), rpd = rpd)
if (is.null(tabel)) {
tabel = tmp
} else {
tabel = rbind(tabel, tmp)
}
}
return(tabel)
}
现在,您可以将数据框放在列表中,lapply
将函数foo
放在列表中,然后将rbind
输出中的表放在一起:
dat <- structure(list(TILLER1 = c(43, 23, 46, 30, 30, 45),
LAI1 = c(3.545, 1.5, 1.76, 1.92, 1.36, 1.27),
CHLOR1 = c(447.2, 432.8, 457.6, 449, 486.8, 455),
HEIGHT1 = c(34.8, 31.5, 26.1, 26, 40.5, 35.2 ),
DIAM1 = c(25.23, 23.9, 21.97, 20.99, 23.92, 24.01),
NDRE1 = c(0.2579, 0.1911, 0.1643, 0.2072, 0.2233, 0.2009),
NDVI1 = c(0.6495, 0.4502, 0.3643, 0.4904, 0.5625, 0.4725),
TCH = c(127.55, 142.33, 127.19, 86.64, 144.36, 155.95)),
.Names = c("TILLER1", "LAI1", "CHLOR1", "HEIGHT1", "DIAM1", "NDRE1", "NDVI1", "TCH"),
row.names = c(NA, 6L), class = "data.frame")
dat2 <- dat-1
dat3 <- dat-2
dat4 <- dat-3
datlist <- list(dat, dat2, dat3, dat4)
tablist <- lapply(datlist, foo)
tabel <- do.call(rbind, tablist)
我的示例的输出如下所示:
> tab
variable r2 mean_error rmse rpd
1 CHLOR1 4.425334e-02 4.8136686 21.57771 1.120519
2 HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
3 DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
4 NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
5 NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
6 CHLOR1 4.425334e-02 4.8136686 21.57771 1.120519
7 HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
8 DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
9 NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
10 NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
11 CHLOR1 4.425334e-02 4.8136686 21.57771 1.120519
12 HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
13 DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
14 NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
15 NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
16 CHLOR1 4.425334e-02 4.8136686 21.57771 1.120519
17 HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
18 DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
19 NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
20 NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
21 CHLOR1 4.425334e-02 4.8136686 21.57771 1.120519
22 HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
23 DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
24 NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
25 NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
26 CHLOR1 4.425334e-02 4.8136686 21.57771 1.120519
27 HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
28 DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
29 NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
30 NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
31 CHLOR1 4.425334e-02 4.8136686 21.57771 1.120519
32 HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
33 DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
34 NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
35 NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
36 CHLOR1 4.425334e-02 4.8136686 21.57771 1.120519
37 HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
38 DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
39 NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
40 NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445