如何在数据帧上循环模型?

时间:2017-08-10 08:32:52

标签: r loops linear-regression

我有4个数据集(dat,dat2,dat3,dat4),我想构建所有数据集的多个线性回归。最后,我需要一个表来根据RMSE,r²,RPD和平均误差来比较模型。对于每个数据集中的每个特征,我正在使用的代码能够用于单变量模型。在这里:

dat <- structure(list(TILLER1 = c(43, 23, 46, 30, 30, 45), 
                      LAI1 = c(3.545, 1.5, 1.76, 1.92, 1.36, 1.27), 
                      CHLOR1 = c(447.2, 432.8, 457.6, 449, 486.8, 455), 
                      HEIGHT1 = c(34.8, 31.5, 26.1, 26, 40.5, 35.2 ), 
                      DIAM1 = c(25.23, 23.9, 21.97, 20.99, 23.92, 24.01), 
                      NDRE1 = c(0.2579, 0.1911, 0.1643, 0.2072, 0.2233, 0.2009), 
                      NDVI1 = c(0.6495, 0.4502, 0.3643, 0.4904, 0.5625, 0.4725), 
                      TCH = c(127.55, 142.33, 127.19, 86.64, 144.36, 155.95)), 
                      .Names = c("TILLER1", "LAI1", "CHLOR1", "HEIGHT1", "DIAM1", "NDRE1", "NDVI1", "TCH"), 
                      row.names = c(NA, 6L), class = "data.frame")

 ### RMSE
 rmse <- function(error)
 {
   sqrt(mean(error^2))
 }

 # tabel of R², Erro médio, RMSE and RPD
 tabel = NULL
 for (i in 3:(ncol(dat)-1)) {

   ## Train control
   fitControl <- trainControl (
     method = "repeatedcv",
     number = 10,
     savePredictions = "final")

   ## Creating all models
   set.seed(62433)
   reg = train(TCH ~ ., data = dat[, c(i, which(colnames(dat) == "TCH"))], 
          method = 'lm', 
          trControl = fitControl, 
          verbose = TRUE,
          importance = TRUE)

   mean.error = mean(dat$TCH - data.frame(reg$pred$pred)[, 1])

   rpd = sd(dat[, which(colnames(dat) == "TCH")][[1]]) / rmse(residuals(reg))

   tmp = data.frame(variable = names(dat[,i]), r2 = summary(reg)$r.squared, 
               mean_error = mean.error, rmse = rmse(residuals(reg)), rpd = rpd)

   if (is.null(tabel)) {
     tabel = tmp
   } else {
     tabel = rbind(tabel, tmp)
   }

 }

 tabel

1 个答案:

答案 0 :(得分:0)

好的,你走了。首先,我修复了代码中的两个错误:

rpd = sd(dat[, which(colnames(dat) == "TCH")][[1]]) / rmse(residuals(reg))

您尝试计算单个值的标准偏差,返回NA。我已移除[[1]]以更正此问题。

tmp = data.frame(variable = names(dat[,i]), r2 = summary(reg)$r.squared, 
           mean_error = mean.error, rmse = rmse(residuals(reg)), rpd = rpd)

names(dat[,i])返回NULL,我已将其更改为names(dat)[i]

然后我将你的代码包装在一个函数中:

foo <- function(dat){

  for (i in 3:(ncol(dat)-1)) {

   ## Train control
   fitControl <- trainControl (
     method = "repeatedcv",
     number = 10,
     savePredictions = "final")

   ## Creating all models
   set.seed(62433)
   reg = train(TCH ~ ., data = dat[, c(i, which(colnames(dat) == "TCH"))], 
          method = 'lm', 
          trControl = fitControl, 
          verbose = TRUE,
          importance = TRUE)

   mean.error = mean(dat$TCH - data.frame(reg$pred$pred)[, 1])

   rpd = sd(dat[, which(colnames(dat) == "TCH")]) / rmse(residuals(reg))

   tmp = data.frame(variable = names(dat)[i], r2 = summary(reg)$r.squared, 
               mean_error = mean.error, rmse = rmse(residuals(reg)), rpd = rpd)

   if (is.null(tabel)) {
     tabel = tmp
   } else {
     tabel = rbind(tabel, tmp)
   }

 }

 return(tabel)
}

现在,您可以将数据框放在列表中,lapply将函数foo放在列表中,然后将rbind输出中的表放在一起:

dat <- structure(list(TILLER1 = c(43, 23, 46, 30, 30, 45), 
                      LAI1 = c(3.545, 1.5, 1.76, 1.92, 1.36, 1.27), 
                      CHLOR1 = c(447.2, 432.8, 457.6, 449, 486.8, 455), 
                      HEIGHT1 = c(34.8, 31.5, 26.1, 26, 40.5, 35.2 ), 
                      DIAM1 = c(25.23, 23.9, 21.97, 20.99, 23.92, 24.01), 
                      NDRE1 = c(0.2579, 0.1911, 0.1643, 0.2072, 0.2233, 0.2009), 
                      NDVI1 = c(0.6495, 0.4502, 0.3643, 0.4904, 0.5625, 0.4725), 
                      TCH = c(127.55, 142.33, 127.19, 86.64, 144.36, 155.95)), 
                      .Names = c("TILLER1", "LAI1", "CHLOR1", "HEIGHT1", "DIAM1", "NDRE1", "NDVI1", "TCH"), 
                      row.names = c(NA, 6L), class = "data.frame")

dat2 <- dat-1
dat3 <- dat-2
dat4 <- dat-3

datlist <- list(dat, dat2, dat3, dat4)
tablist <- lapply(datlist, foo)
tabel <- do.call(rbind, tablist)

我的示例的输出如下所示:

> tab
   variable           r2 mean_error     rmse      rpd
1    CHLOR1 4.425334e-02  4.8136686 21.57771 1.120519
2   HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
3     DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
4     NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
5     NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
6    CHLOR1 4.425334e-02  4.8136686 21.57771 1.120519
7   HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
8     DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
9     NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
10    NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
11   CHLOR1 4.425334e-02  4.8136686 21.57771 1.120519
12  HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
13    DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
14    NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
15    NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
16   CHLOR1 4.425334e-02  4.8136686 21.57771 1.120519
17  HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
18    DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
19    NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
20    NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
21   CHLOR1 4.425334e-02  4.8136686 21.57771 1.120519
22  HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
23    DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
24    NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
25    NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
26   CHLOR1 4.425334e-02  4.8136686 21.57771 1.120519
27  HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
28    DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
29    NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
30    NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
31   CHLOR1 4.425334e-02  4.8136686 21.57771 1.120519
32  HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
33    DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
34    NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
35    NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445
36   CHLOR1 4.425334e-02  4.8136686 21.57771 1.120519
37  HEIGHT1 4.652398e-01 -2.8263214 16.14037 1.497998
38    DIAM1 5.070381e-01 -6.2447449 15.49675 1.560213
39    NDRE1 1.263715e-03 -0.9279537 22.05766 1.096138
40    NDVI1 4.842547e-07 -0.8747074 22.07160 1.095445