Question

我正在尝试存储不同模型的系数估计值。为了说明我的问题，下面是一个例子。

library(fpp)

creditlog <- data.frame(score=credit$score, 
  log.savings=log(credit$savings+1), 
  log.income=log(credit$income+1), 
  log.address=log(credit$time.address+1),
  log.employed=log(credit$time.employed+1)) 

fit_1 <-lm(score ~ log.income + log.address + log.employed , data=creditlog)
fit_2 <-lm(score ~ log.savings +  log.employed , data=creditlog)
fit_3 <-lm(score ~  log.address + log.employed , data=creditlog)
fit_4 <- lm(score ~  log.income + log.address , data=creditlog)


coef_1 <-summary(fit_1)$coef[,1]
coef_2 <-summary(fit_2)$coef[,1]
coef_3 <-summary(fit_3)$coef[,1]
coef_4 <-summary(fit_4)$coef[,1]
> coef_1
 (Intercept)   log.income  log.address log.employed 
  -14.957037    10.082396     3.353521     1.049130 
> coef_2
 (Intercept)  log.savings log.employed 
    24.34323     11.28698      1.92655 
> coef_3
 (Intercept)  log.address log.employed 
   26.115064     3.438382     1.213017 
> coef_4
(Intercept)  log.income log.address 
  -13.38037    10.23459     3.58023

如果我尝试rbind，我会

       (Intercept) log.income log.address log.employed
coef_1   -14.95704  10.082396    3.353521      1.04913
coef_2    24.34323  11.286978    1.926550     24.34323
coef_3    26.11506   3.438382    1.213017     26.11506
coef_4   -13.38037  10.234590    3.580230    -13.38037
Warning message:
In rbind(coef_1, coef_2, coef_3, coef_4) :
  number of columns of result is not a multiple of vector length (arg 2)
>

这不是正确的答案。我需要的是像，

     (Intercept)    log.savings  log.income  log.address    log.employed 
fit_1   -14.957037  NA           10.082396   3.353521       1.04913
fit_2   24.34323    11.28698     NA          NA             1.92655
fit_3   26.115064   NA           NA          3.438382       1.213017
fit_4   -13.38037   NA           10.23459    3.58023        NA

提前致谢。

Answer 1

这是一种使用相对较短的代码完成此操作的方法。它使用coef直接从模型对象中提取系数，并使用lapply以避免为每个模型对象重复相同的代码。 rbind.fill负责将每个系数值放在正确的列中：

library(plyr) # For the rbind.fill function

fits = rbind.fill(lapply(list(fit_1, fit_2, fit_3, fit_4), 
                  function(x) as.data.frame(t(coef(x)))))

fits
  (Intercept) log.income log.address log.employed log.savings
1   -14.95704   10.08240    3.353521     1.049130          NA
2    24.34323         NA          NA     1.926550    11.28698
3    26.11506         NA    3.438382     1.213017          NA
4   -13.38037   10.23459    3.580230           NA          NA

如果您有超过四个模型对象并且不想键入所有名称，则可以以编程方式引用对象名称。例如，如果您有模型对象fit_1到fit_20，则将list(fit_1, fit_2, fit_3, fit_4)替换为mget(paste0("fit_", 1:20))。 mget采用文本字符串向量并返回带有这些名称的对象。

Answer 2

您可以转换data.frames中的向量并使用dplyr的rbind_all：

library(dplyr)
# transforming in data.frames
coef_1 <- as.data.frame(t(summary(fit_1)$coef[,1]))
coef_2 <- as.data.frame(t(summary(fit_2)$coef[,1]))
coef_3 <- as.data.frame(t(summary(fit_3)$coef[,1]))
coef_4 <- as.data.frame(t(summary(fit_4)$coef[,1]))

# binding them all
coefs <- rbind_all(list(coef_1, coef_2, coef_3, coef_4))
row.names(coefs) <- c("fit_1", "fit_2", "fit_3", "fit_4")
coefs


      (Intercept) log.income log.address log.employed log.savings
fit_1   -14.95704   10.08240    3.353521     1.049130          NA
fit_2    24.34323         NA          NA     1.926550    11.28698
fit_3    26.11506         NA    3.438382     1.213017          NA
fit_4   -13.38037   10.23459    3.580230           NA          NA

Answer 3

可以使用以下R来将列表与部分列组合使用：

c1 = data.frame(a=1,b=2,d=3)
c2 = data.frame(b=2,c=3)
c3 = data.frame(a=4,d=5)

cc = data.frame(a=numeric(), b=numeric(), c=numeric(), d=numeric())
ff = function(vect, cc){
    n = nrow(cc)+1
    for(i in 1:length(vect)){
        cc[n,names(vect)[i]] = vect[i]
    }
    cc
}

cc=ff(c1, cc)
cc=ff(c2, cc)
cc=ff(c3, cc)
cc
   a  b  c  d
1  1  2 NA  3
2 NA  2  3 NA
3  4 NA NA  5

存储具有不同系数的模型的系数估计

3 个答案: