我想将3个线性模型应用于我的数据,并为每个模型提取残差。我想知道是否有办法使用dplyr和purrr的组合为每个模型应用相同的步骤:
我想保留:
lm
对象augment
输出这是一个分析mpg
数据集的工作示例:
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(ggplot2)
以下是我想用于我的lm的三种不同公式
f1 = hwy ~ cyl
f2 = hwy ~ displ
f3 = hwy ~ cyl + displ
lin_mod = function(formula) {
function(data) {
lm(formula, data = data)
}
}
这是我为单个公式提取残差的方法:
mpg %>%
group_by(manufacturer) %>%
nest() %>%
mutate(model = map(data, lin_mod(f1)),
aug = map(model, augment),
res = map(aug, ".resid"))
但是,对于所有公式来说,这种技术似乎是一种不好的方法,因为我重写了很多代码:
mpg %>%
group_by(manufacturer) %>%
nest() %>%
mutate(model1 = map(data, lin_mod(f1)),
aug1 = map(model1, augment),
res1 = map(aug1, ".resid"),
model2 = map(data, lin_mod(f2)),
aug2 = map(model2, augment),
res2 = map(aug2, ".resid"),
model3 = map(data, lin_mod(f3)),
aug3 = map(model3, augment),
res3 = map(aug3, ".resid"))
如何以优雅的方式将此功能应用于每个公式?我当时认为mutate_all,或者将公式放入列表可能会有所帮助,但唉,我被卡住了。
答案 0 :(得分:1)
您可以使用mutate_at
(或mutate_if
)改变列表列的位置。这样可以节省多次迭代,并使代码可管理且更紧凑。
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
lin_mod = function(formula) {
function(data,...){
map(data,~lm(formula, data = .x))
}
}
list_model <- list(cyl_model= hwy ~ cyl,
displ_model= hwy ~ displ,
full_model= hwy ~ cyl + displ) %>%
lapply(lin_mod)
ggplot2::mpg %>%
group_by(manufacturer) %>% nest() %>%
mutate_at(.vars=("data"),.funs=list_model) %>%
mutate_at(.vars=vars(ends_with("model")), .funs=~map(.x, augment)) %>%
mutate_at(.vars=vars(ends_with("model")), .funs=~map(.x, ".resid")) %>% unnest()
答案 1 :(得分:0)
这是我能够提出的最接近的例子here
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(ggplot2)
# Here are the three different formulas I want to use for my lm
f1 = hwy ~ cyl
f2 = hwy ~ displ
f3 = hwy ~ cyl + displ
formulas = c(f1,f2,f3)
lin_mod = function(formula) {
function(data) {
lm(formula, data = data)
}
}
list_model = lapply(formulas, lin_mod)
names(list_model) = c('cyl_model', 'displ_model', 'full_model')
fn_model <- function(.model, df){
df$model <- map(df$data, possibly(.model, NULL))
df
}
mpg_nested = mpg %>%
group_by(manufacturer) %>%
nest()
mpg_nested_new = list_model %>%
map_df(fn_model, mpg_nested, .id = 'id_model') %>%
arrange(manufacturer) %>%
mutate(aug = map(model, augment),
res = map(aug, ".resid"))
output = mpg_nested_new %>%
gather(Var, val, c('model', 'aug', 'res')) %>%
unite(desc, id_model, Var)%>%
spread(desc, val)