我有一个包含数据组的数据集,我对每组数据进行了回归。我使用dplyr进行回归并获得包含所有结果的tbl_df对象。然后我想为每组回归提取拟合值向量,并将它们放在数据框中。我曾经使用summarize()方便地提取相关信息。但它只适用于标量。这里有一些带有lapply的示例代码,我用它来提取信息,我感觉有点麻烦:
library(dplyr)
library(reshape2)
df1 = data.frame(type1 = c(rep('a',5),rep('b',5)),
x = 1:10,
y = 11:20)
df1 %>%
group_by(type1) %>%
do(model = lm(y~x,.)) -> model1
names(model1$model) = model1$type1
lapply(model1$model,function(mod) mod$fit) %>%
melt
答案 0 :(得分:3)
library(broom)
model1 %>% augment(model)
# A tibble: 10 x 10 # Groups: type1 [2] type1 y x .fitted .se.fit .resid .hat .sigma .cooksd .std.resid <fctr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 a 11 1 11 2.482534e-16 3.567051e-19 0.6 3.925229e-16 2.322633e-06 0.001759785 2 a 12 2 12 1.755417e-16 3.026750e-16 0.3 2.977199e-16 2.730293e-01 1.128776594 3 a 13 3 13 1.433292e-16 -3.857170e-16 0.2 2.471607e-16 2.263176e-01 -1.345563357 4 a 14 4 14 1.755417e-16 -1.380180e-16 0.3 3.747906e-16 5.677113e-02 -0.514715401 5 a 15 5 15 2.482534e-16 2.207032e-16 0.6 3.052655e-16 8.891591e-01 1.088827560 6 b 16 6 16 1.709167e-15 -2.416065e-15 0.6 8.008132e-17 2.248024e+00 -1.731290167 7 b 17 7 17 1.208563e-15 2.359219e-15 0.3 1.824137e-15 3.499565e-01 1.277939838 8 b 18 8 18 9.867878e-16 1.265324e-15 0.2 2.510473e-15 5.138141e-02 0.641132787 9 b 19 9 19 1.208563e-15 5.595623e-17 0.3 2.702016e-15 1.968677e-04 0.030310330 10 b 20 10 20 1.709167e-15 -1.264434e-15 0.6 2.303179e-15 6.157097e-01 -0.906060815
答案 1 :(得分:2)
使用tidyverse
包,其中包含dplyr
,purrr
,tidyr
library(tidyverse)
使用nest
和map
df1 %>%
group_by(type1) %>%
nest() %>%
mutate(data = map(data, ~lm(y~x,.x)$fit)) %>% # combined lm with $fit
unnest()
type1 data
1 a 11
2 a 12
3 a 13
4 a 14
5 a 15
6 b 16
# etc
答案 2 :(得分:2)
我们可以将modelr
与tidyverse
一起使用。 add_predictions
函数很方便。这是一个例子。
# Load package
library(tidyverse)
library(modelr)
# Create example data frame
df1 = data.frame(type1 = c(rep('a',5),rep('b',5)),
x = 1:10,
y = 11:20)
# Created nested data frame
df2 <- df1 %>%
group_by(type1) %>%
nest()
# A function to fit lm model to y ~ x
lm_model <- function(df) {
lm(y ~ x, data = df)
}
# Fit model
df3 <- df2 %>%
mutate(model = map(data, lm_model))
# Add prediction
df4 <- df3 %>%
mutate(Pred = map2(data, model, add_predictions))
# Unnest the data frame
df5 <- df4 %>% unnest(Pred)
df5
# A tibble: 10 x 4
type1 x y pred
<fctr> <int> <int> <dbl>
1 a 1 11 11
2 a 2 12 12
3 a 3 13 13
4 a 4 14 14
5 a 5 15 15
6 b 6 16 16
7 b 7 17 17
8 b 8 18 18
9 b 9 19 19
10 b 10 20 20
答案 3 :(得分:0)
我想使用data.table包提供一个灵活的答案:
library(data.table)
df1 %>%
data.table %>%
.[,
.(x,
y,
fit = lm(y~x)$fit),
by = type1]