我正在尝试存储不同模型的系数估计值。为了说明我的问题,下面是一个例子。
library(fpp)
creditlog <- data.frame(score=credit$score,
log.savings=log(credit$savings+1),
log.income=log(credit$income+1),
log.address=log(credit$time.address+1),
log.employed=log(credit$time.employed+1))
fit_1 <-lm(score ~ log.income + log.address + log.employed , data=creditlog)
fit_2 <-lm(score ~ log.savings + log.employed , data=creditlog)
fit_3 <-lm(score ~ log.address + log.employed , data=creditlog)
fit_4 <- lm(score ~ log.income + log.address , data=creditlog)
coef_1 <-summary(fit_1)$coef[,1]
coef_2 <-summary(fit_2)$coef[,1]
coef_3 <-summary(fit_3)$coef[,1]
coef_4 <-summary(fit_4)$coef[,1]
> coef_1
(Intercept) log.income log.address log.employed
-14.957037 10.082396 3.353521 1.049130
> coef_2
(Intercept) log.savings log.employed
24.34323 11.28698 1.92655
> coef_3
(Intercept) log.address log.employed
26.115064 3.438382 1.213017
> coef_4
(Intercept) log.income log.address
-13.38037 10.23459 3.58023
如果我尝试rbind,我会
(Intercept) log.income log.address log.employed
coef_1 -14.95704 10.082396 3.353521 1.04913
coef_2 24.34323 11.286978 1.926550 24.34323
coef_3 26.11506 3.438382 1.213017 26.11506
coef_4 -13.38037 10.234590 3.580230 -13.38037
Warning message:
In rbind(coef_1, coef_2, coef_3, coef_4) :
number of columns of result is not a multiple of vector length (arg 2)
>
这不是正确的答案。我需要的是像,
(Intercept) log.savings log.income log.address log.employed
fit_1 -14.957037 NA 10.082396 3.353521 1.04913
fit_2 24.34323 11.28698 NA NA 1.92655
fit_3 26.115064 NA NA 3.438382 1.213017
fit_4 -13.38037 NA 10.23459 3.58023 NA
提前致谢。
答案 0 :(得分:1)
这是一种使用相对较短的代码完成此操作的方法。它使用coef
直接从模型对象中提取系数,并使用lapply
以避免为每个模型对象重复相同的代码。 rbind.fill
负责将每个系数值放在正确的列中:
library(plyr) # For the rbind.fill function
fits = rbind.fill(lapply(list(fit_1, fit_2, fit_3, fit_4),
function(x) as.data.frame(t(coef(x)))))
fits
(Intercept) log.income log.address log.employed log.savings
1 -14.95704 10.08240 3.353521 1.049130 NA
2 24.34323 NA NA 1.926550 11.28698
3 26.11506 NA 3.438382 1.213017 NA
4 -13.38037 10.23459 3.580230 NA NA
如果您有超过四个模型对象并且不想键入所有名称,则可以以编程方式引用对象名称。例如,如果您有模型对象fit_1
到fit_20
,则将list(fit_1, fit_2, fit_3, fit_4)
替换为mget(paste0("fit_", 1:20))
。 mget
采用文本字符串向量并返回带有这些名称的对象。
答案 1 :(得分:0)
您可以转换data.frames中的向量并使用dplyr的rbind_all
:
library(dplyr)
# transforming in data.frames
coef_1 <- as.data.frame(t(summary(fit_1)$coef[,1]))
coef_2 <- as.data.frame(t(summary(fit_2)$coef[,1]))
coef_3 <- as.data.frame(t(summary(fit_3)$coef[,1]))
coef_4 <- as.data.frame(t(summary(fit_4)$coef[,1]))
# binding them all
coefs <- rbind_all(list(coef_1, coef_2, coef_3, coef_4))
row.names(coefs) <- c("fit_1", "fit_2", "fit_3", "fit_4")
coefs
(Intercept) log.income log.address log.employed log.savings
fit_1 -14.95704 10.08240 3.353521 1.049130 NA
fit_2 24.34323 NA NA 1.926550 11.28698
fit_3 26.11506 NA 3.438382 1.213017 NA
fit_4 -13.38037 10.23459 3.580230 NA NA
答案 2 :(得分:0)
可以使用以下R来将列表与部分列组合使用:
c1 = data.frame(a=1,b=2,d=3)
c2 = data.frame(b=2,c=3)
c3 = data.frame(a=4,d=5)
cc = data.frame(a=numeric(), b=numeric(), c=numeric(), d=numeric())
ff = function(vect, cc){
n = nrow(cc)+1
for(i in 1:length(vect)){
cc[n,names(vect)[i]] = vect[i]
}
cc
}
cc=ff(c1, cc)
cc=ff(c2, cc)
cc=ff(c3, cc)
cc
a b c d
1 1 2 NA 3
2 NA 2 3 NA
3 4 NA NA 5