计算data.frame的摘要统计信息

时间:2015-04-20 08:37:26

标签: r

有没有快速的方法来计算数字向量的平均值和sd,然后将它们放在data.frame中?所以假设我有几个看起来像这样的变量

test1 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))
test2 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4))) 
test3 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))

> test1
     score status
1   0.6648      0
2  -0.5158      0
3  -0.0297      0
4  -0.1086      0
5  -1.8708      0
6   0.7908      0
7   0.4760      1
8  -0.4841      1
9  -0.3451      1
10 -0.0772      1

如何在每个条目中构造一个与mean(sd)类似的data.frame。我依稀记得我曾经用某种命令做过这件事,但我不记得它是什么。我并不想要()和sd()每个变量,然后将它们分别放在一个data.frame中。所以这就是我想要的最终data.frame看起来像;一长串的事情:

                  0           1
 test1     0.9(0.1)   0.1(0.03)
 test2     0.2(0.1)   0.2(0.03)
 test3     0.1(0.2)   0.1(0.04)

2 个答案:

答案 0 :(得分:2)

以下是我的试用版。

myfun()获取两列(或任意数量的列)的mean(sd)。 数据框放入列表中以便在sapply()中使用。

set.seed(1237)
test1 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))
test2 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4))) 
test3 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))

tests <- list(test1, test2, test3)

myfun <- function(x) {
  sapply(x, function(x) paste0(round(mean(x),1),"(",round(sd(x),1),")"))
}

t(sapply(tests, myfun))

     score       status    
[1,] "-0.2(1.1)" "0.4(0.5)"
[2,] "0.3(1.2)"  "0.4(0.5)"
[3,] "0.1(0.9)"  "0.4(0.5)"

答案 1 :(得分:0)

summarySE <- 
function (data = NULL, measurevar, groupvars = NULL, na.rm = TRUE, 
    conf.interval = 0.95, .drop = TRUE, dec = 2) 
{
    require(plyr)
    length2 <- function(x, na.rm = FALSE) {
        if (na.rm) 
            sum(!is.na(x))
        else length(x)
    }
    datac <- ddply(data, groupvars, .drop = .drop, .fun = function(xx, 
        col) {
        c(N = length2(xx[[col]], na.rm = na.rm), mean = mean(xx[[col]], 
            na.rm = na.rm), sd = sd(xx[[col]], na.rm = na.rm))
    }, measurevar)
    datac <- rename(datac, c(mean = measurevar))
    datac$se <- datac$sd/sqrt(datac$N)
    ciMult <- qt(conf.interval/2 + 0.5, datac$N - 1)
    datac$ci <- datac$se * ciMult
    datac[, measurevar] <- round(datac[, measurevar], dec)
    datac$sd <- round(datac$sd, dec)
    datac$se <- round(datac$se, dec)
    datac$ci <- round(datac$ci, dec)
    return(datac)
}

来自here,已修改。

现在绑定所有内容并使用summarySE:

dat <- rbind(test1, test2, test3)

dat$ID <- rep(c("test1", "test2", "test3"), each = nrow(test1))

summarySE(dat, "score", c("ID", "status"))

     ID status N score   sd   se   ci
1 test1      0 6 -0.59 0.56 0.23 0.59
2 test1      1 4  0.36 2.10 1.05 3.34
3 test2      0 6 -0.13 0.81 0.33 0.85
4 test2      1 4  0.95 1.32 0.66 2.11
5 test3      0 6 -0.27 0.55 0.23 0.58
6 test3      1 4  0.05 0.99 0.50 1.58