使用并保存多个lm拟合的lm摘要

时间:2014-05-21 20:42:22

标签: r dataframe subset lm

我参与动物试验,其中我试图获得有关几组动物的运动信息(通常是4组12个人,但不是总是相同)。 我每次试验的最终数据框架都是这样的。

> dput(aa)
structure(list(Tiempo = c(618.4, 618.6, 618.8, 619, 619.2, 619.4, 
619.6, 619.8, 620, 620.2, 620.4), UT1 = c(0, 0, 15, 19, 26, 27, 
29, 37, 42, 44, 45), UT2 = c(0, 0, 0, 0, 0, 1, 18, 19, 21, 21, 
21), UT3 = c(0, 2, 3, 3, 3, 3, 16, 19, 20, 20, 20), UT4 = c(0, 
0, 0, 0, 0, 0, 5, 17, 29, 34, 39), UT5 = c(0, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1), UT6 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), UT7 = c(0, 
0, 1, 2, 2, 3, 4, 6, 7, 7, 8), UT8 = c(0, 19, 20, 23, 24, 25, 
33, 80, 119, 122, 130), UT9 = c(0, 1, 1, 1, 1, 3, 6, 9, 19, 19, 
19), UT10 = c(0, 0, 0, 0, 0, 1, 2, 3, 10, 12, 14), TR1 = c(0, 
0, 0, 0, 0, 0, 0, 1, 2, 2, 2), TR2 = c(0, 0, 0, 0, 0, 0, 2, 19, 
32, 37, 43), TR3 = c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), TR4 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TR5 = c(0, 0, 0, 0, 0, 0, 13, 
18, 20, 22, 26), TR6 = c(0, 2, 11, 20, 25, 29, 37, 40, 41, 42, 
43), TR7 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), TR8 = c(0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), TR9 = c(0, 0, 4, 9, 16, 19, 23, 27, 
31, 33, 34), TR10 = c(0, 1, 9, 25, 32, 41, 49, 49, 51, 57, 60
), UT1.1 = c(0, 10, 15, 17, 23, 31, 37, 48, 53, 57, 58), UT2.1 = c(0, 
1, 1, 1, 1, 2, 2, 4, 4, 4, 4), UT3.1 = c(0, 2, 11, 14, 20, 22, 
24, 25, 26, 26, 26), UT4.1 = c(0, 0, 0, 0, 0, 0, 0, 11, 13, 13, 
14), UT5.1 = c(0, 3, 5, 7, 18, 19, 19, 27, 37, 39, 42), UT6.1 = c(0, 
0, 0, 0, 0, 0, 2, 2, 3, 4, 4), UT7.1 = c(0, 0, 2, 8, 9, 9, 12, 
16, 18, 18, 18), UT8.1 = c(0, 0, 1, 8, 13, 15, 44, 68, 80, 89, 
94), UT9.1 = c(0, 1, 1, 1, 1, 2, 3, 5, 9, 10, 10), UT10.1 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), UT11 = c(0, 12, 17, 17, 18, 34, 
74, 116, 131, 145, 170), UT12 = c(0, 1, 2, 3, 3, 3, 5, 14, 21, 
22, 24), TR1.1 = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1), TR2.1 = c(0, 
0, 0, 11, 16, 19, 40, 94, 121, 134, 145), TR3.1 = c(0, 0, 0, 
2, 3, 5, 6, 6, 6, 7, 7), TR4.1 = c(0, 0, 0, 1, 1, 1, 1, 1, 4, 
4, 5), TR5.1 = c(0, 24, 27, 28, 29, 37, 86, 151, 212, 258, 288
), TR6.1 = c(0, 0, 1, 1, 1, 2, 5, 9, 12, 12, 13), TR7.1 = c(0, 
4, 7, 28, 47, 70, 108, 125, 127, 127, 127), TR8.1 = c(0, 1, 2, 
2, 2, 2, 3, 3, 4, 4, 4), TR9.1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0), TR10.1 = c(0, 1, 1, 1, 1, 1, 13, 40, 41, 45, 49), TR11 = c(0, 
0, 0, 1, 4, 8, 10, 11, 17, 23, 25), TR12 = c(0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0)), .Names = c("Tiempo", "UT1", "UT2", "UT3", "UT4", 
"UT5", "UT6", "UT7", "UT8", "UT9", "UT10", "TR1", "TR2", "TR3", 
"TR4", "TR5", "TR6", "TR7", "TR8", "TR9", "TR10", "UT1.1", "UT2.1", 
"UT3.1", "UT4.1", "UT5.1", "UT6.1", "UT7.1", "UT8.1", "UT9.1", 
"UT10.1", "UT11", "UT12", "TR1.1", "TR2.1", "TR3.1", "TR4.1", 
"TR5.1", "TR6.1", "TR7.1", "TR8.1", "TR9.1", "TR10.1", "TR11", 
"TR12"), row.names = c(NA, -11L), class = "data.frame")

我的目标是使用Tiempo变量作为x来表示每列中代表的个体,所以我这样做:

fit<-apply(aa,2,function(x) lm(x~aa$Tiempo))

它工作得很完美,但问题是所有有价值的(和无用的)信息都存储在该lm对象中,我无法以有效的方式提取数据。我的对象看起来像这样

summary(fit)
       Length Class Mode
Tiempo 12     lm    list
UT1    12     lm    list
UT2    12     lm    list
UT3    12     lm    list
UT4    12     lm    list
UT5    12     lm    list
UT6    12     lm    list
UT7    12     lm    list
UT8    12     lm    list
UT9    12     lm    list
UT10   12     lm    list
TR1    12     lm    list
TR2    12     lm    list
TR3    12     lm    list
TR4    12     lm    list
TR5    12     lm    list
TR6    12     lm    list
TR7    12     lm    list
TR8    12     lm    list
TR9    12     lm    list
TR10   12     lm    list
UT1.1  12     lm    list
UT2.1  12     lm    list
UT3.1  12     lm    list
UT4.1  12     lm    list
UT5.1  12     lm    list
UT6.1  12     lm    list
UT7.1  12     lm    list
UT8.1  12     lm    list
UT9.1  12     lm    list
UT10.1 12     lm    list
UT11   12     lm    list
UT12   12     lm    list
TR1.1  12     lm    list
TR2.1  12     lm    list
TR3.1  12     lm    list
TR4.1  12     lm    list
TR5.1  12     lm    list
TR6.1  12     lm    list
TR7.1  12     lm    list
TR8.1  12     lm    list
TR9.1  12     lm    list
TR10.1 12     lm    list
TR11   12     lm    list
TR12   12     lm    list 

每只动物都是这样的

 summary(fit$UT1)

Call:
lm(formula = x ~ aa$Tiempo)

Residuals:
   Min     1Q Median     3Q    Max 
-6.873 -1.845  1.182  2.314  4.918 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -14642.700   1104.825  -13.25 3.29e-07 ***
aa$Tiempo       23.682      1.784   13.28 3.24e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.742 on 9 degrees of freedom
Multiple R-squared:  0.9514,    Adjusted R-squared:  0.946 
F-statistic: 176.3 on 1 and 9 DF,  p-value: 3.24e-07

我希望将所有动物的数据框中的摘要信息(或至少是系数和R平方数据)组织起来,以便继续进行一些统计分析。拥有这些信息可能会帮助我思考一个函数来评估R平方是否低于固定值,我应该检查它是否合适(或者如果动物真的表现不好就丢弃该动物)。此外,我应该找到一种方法使其可重复,因为现在我正在使用

FIT<-data.frame(UT1=fit$UT1$coefficients,
              UT2=fit$UT2$coefficients,
              UT3=fit$UT3$coefficients,...)

这种方法甚至不能满足我想要做的事情,而且它真的很不稳定。 我进行了一些搜索,找到coef函数,但

coef(fit)
NULL

1 个答案:

答案 0 :(得分:1)

使用您的fit列表,您可以使用

提取系数和r平方值
fit<-apply(aa,2,function(x) lm(x~aa$Tiempo))
mysummary <- t(sapply(fit, function(x) {
    ss<-summary(x); c(coef(x), 
        r.square=ss$r.squared, adj.r.squared=ss$adj.r.squared)
}))

我们使用sapply遍历您创建的列表,并从模型中提取系数,并从摘要中提取r平方值。输出是

> mysummary
       (Intercept)   aa$Tiempo  r.square adj.r.squared
Tiempo      0.0000   1.0000000 1.0000000     1.0000000
UT1    -14642.7000  23.6818182 0.9514231     0.9460256
UT2     -8662.4182  14.0000000 0.7973105     0.7747894
UT3     -7535.5091  12.1818182 0.8404400     0.8227111
...