我有一个像 out 这样的列表。在每个列表中,我都有一个数据框(具有相同的结构,即相同的维度和变量名称 (id/period/pred_dif):
id <- c(1,2,3,4,5,1,2,3,4,5)
period <- c(01,09,12,01,08, 02,08,12,11,12)
pred_dif <- c(0.5,0.1,0.15,0.23,0.75,0.6,0.49,0.81,0.37,0.14)
list_1 <- data.frame(id, period, pred_dif)
pred_dif <- c(0.45,0.18,0.35,0.63,0.25,0.63,0.29,0.11,0.17,0.24)
list_2 <- data.frame(id, period, pred_dif)
pred_dif <- c(0.58,0.13,0.55,0.13,0.76,0.3,0.29,0.81,0.27,0.04)
list_3 <- data.frame(id, period, pred_dif)
pred_dif <- c(0.3,0.61,0.18,0.29,0.85,0.76,0.56,0.91,0.48,0.91)
list_4 <- data.frame(id, period, pred_dif)
out <- list(list_1, list_2, list_3, list_4)
我想:
pred_second <- c(0.4,0.71,0.28,0.39,0.95,0.86,0.66,0.81,0.58,0.81)
df <- data.frame(id, period, pred_second)
我将继续(在 dplyr 环境中)如下:
out <- merge(out, df, by = c("id", "period"), all.x = T)
ols <- summary(lm(formula = pred_dif ~ as.factor(period) - 1, data = out))
知道如何以迭代和快速的方式为所有列表解决这个问题吗?
答案 0 :(得分:2)
我们可以在 tidyverse
map
循环 list
left_join
lm
中构建 summarise
模型tidy
数据集unnest
list
的 tibble
并存储在 list
中(或使用 _dfr
中的 map
返回带有 { {1}} 指定为标识符).id
也可以用 library(purrr)
library(dplyr)
library(broom)
library(tidyr)
lmout_lst <- map(out,
~ left_join(.x, df, by = c('id', 'period')) %>%
summarise(new = list(lm(pred_dif ~ as.factor(period) - 1) %>%
broom::tidy(.))) %>%
unnest(new))
转换为单个数据集
bind_rows
-输出
lmout <- bind_rows(lmout_lst, .id = 'categ')