手动计算R中的F统计量:mtcars mpg~cyl ANOVA

时间:2016-10-04 16:09:52

标签: r anova

enter image description here

sumsqwithgrp   <- 0
ybar_j_mean    <- tapply(mtcars$mpg, mtcars$cyl, mean)
mtcars_ordered <- mtcars[order(mtcars$cyl), ]
count          <- 1

# Number of levels.
m <- length(ybar_j_mean)

for(j in 1:m) {

    # The corresponding 'cyl'-value of 'ybar_j_mean',
    # i.e. the value between '*':
    # > ybar_j_mean
    #       *4*        6        8 
    #  26.66364 19.74286 15.10000 
    ybar_j_index <- as.numeric(names(ybar_j_mean)[j])

    # Relevant subset of observations at 'j' level.
    mtcars_j     <- mtcars[mtcars$cyl==ybar_j_index, ]

    # The number of observations of the subset.
    nj           <- nrow(mtcars_j)

    sum_j <- 0
    for(i in 1:nj) {
        sum_j <- sum_j + (mtcars_j[i, "mpg"] - ybar_j_mean[j])^2 
    }
    sumsqwithgrp <- sumsqwithgrp + sum_j
} 

sumsqbtwgrp <- 0
total_mean  <- mean(mtcars$mpg)
count <- 1

for(j in 1:m) {
    ybar_j_index <- as.numeric(names(ybar_j_mean)[j])
    mtcars_j     <- mtcars[mtcars$cyl==ybar_j_index, ]
    nj           <- nrow(mtcars_j)

    sumsqbtwgrp  <- sumsqbtwgrp + nj*(ybar_j_mean[j] - total_mean)^2
}

现在:

> df_1 <- m-1
> df_1 <- nrow(mtcars)-m
> (sumsqbtwgrp/df_1)/(sumsqwithgrp/df_2)
39.69752 

可是:

> summary(aov(mpg~cyl, data=mtcars))
            Df Sum Sq Mean Sq F value   Pr(>F)    
cyl          1  817.7   817.7   79.56 6.11e-10 ***
Residuals   30  308.3    10.3                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

$ 39.7 \ neq 79.6 $。

我做错了什么?

1 个答案:

答案 0 :(得分:2)

您需要将cyl设置为因子:

summary(aov(mpg~as.factor(cyl), data=mtcars))

请点击此处了解详情:https://stats.stackexchange.com/questions/112552/how-to-correct-the-degrees-of-freedom