我有一个大约200列的数据框,看起来像这样:
d1 <- structure(list(Date=c(2012, 2012, 2013, 2013, 2014, 2014),
x1=c(NA, NA, 17L, 29L, 27L, 10L), x2=c(30L, 19L, 22L, 20L, 11L,
24L), x3=c(NA, 23L, 22L, 27L, 21L, 26L), x4=c(30L, 28L, 23L,
24L, 10L, 17L), x5=c(12L, 18L, 17L, 16L, 30L, 26L)),
row.names=c(NA, 6L), class="data.frame")
输出:
Date x1 x2 x3 x4 x5
1 2012 NA 30 NA 30 12
2 2012 NA 19 23 28 18
3 2013 17 22 22 23 17
4 2013 29 20 27 24 16
5 2014 27 11 21 10 30
6 2014 10 24 26 17 26
我现在想分别对每年进行线性回归,并仅对每年每个变量x1至x4的截距创建新数据框。我的自变量是x5。
像这样:
Time x1 x2 x3 x4
1 2012 Interceptx1 Interceptx2 Interceptx3 Interceptx4
2 2013 Interceptx1 Interceptx2 Interceptx3 Interceptx4
3 2014 Interceptx1 Interceptx2 Interceptx3 Interceptx4
我尝试了lms <- lapply(2:5, function(x) lm(d1[,x] ~ d1$x5))
和df <- data.frame(sapply(lms, coef))
但这会在整个时间段内进行回归。我的数据框包含200列,因此我正在寻找创建此新数据框的有效方法。
非常感谢您!
答案 0 :(得分:0)
这是一个基于我已经完成的其他工作的解决方案。我敢肯定有可能将其清理成一个纯粹的purrr解决方案,并欢迎您提出任何建议。
由于NA
值导致数据中断,我不得不对您的数据进行一些更改。
library(purrr)
library(dplyr)
library(tidyr)
library(broom)
d1 <- structure(list(cyear=c(2012, 2012, 2013, 2013, 2014, 2014),
x1=c(5L, 5L, 17L, 29L, 27L, 10L),
x2=c(30L, 19L, 22L, 20L, 11L,24L),
x3=c(5L, 23L, 22L, 27L, 21L, 26L),
x4=c(30L, 28L, 23L,24L, 10L, 17L),
x5=c(12L, 18L, 17L, 16L, 30L, 26L)),
row.names=c(NA, 6L), class="data.frame")
models <- nest(d1, -cyear)
str(models)
reg_vars <- c("x1", "x2", "x3", "x4")
# The following loops through each of the independent
for(i in 1:length(reg_vars)){
var_mdl <- rlang::sym(paste0(reg_vars[i], "_mdl")) # create the name of a model
var_res <- rlang::sym(paste0(reg_vars[i], "_res")) # create the name of the results
formula = as.formula(paste0(reg_vars[i], " ~ x5")) # create the regression formula
print(formula)
models <- models %>%
mutate(
# create the model as an element in the nested data
!!var_mdl := map(data, ~ lm(formula, data = ., na.action = "na.omit")),
# tidy the model results into an element
!!var_res := map(!!var_mdl, tidy)
)
}
models
reg_vars2 <- paste0(reg_vars, "_res")
reg_vars2
# clean up ####
# this will extract the regression results into a new data frame
for(i in 1:length(reg_vars2)){
if(i == 1){
results <- rlang::sym(reg_vars2[i])
out_df <- models %>%
select(cyear, !!results) %>%
unnest(!!results)
}
results <- rlang::sym(reg_vars2[i])
temp_df <- models %>%
select(cyear, !!results) %>%
unnest(!!results)
out_df <- bind_rows(out_df, temp_df)
}
head(out_df)