有没有快速的方法来计算数字向量的平均值和sd,然后将它们放在data.frame中?所以假设我有几个看起来像这样的变量
test1 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))
test2 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))
test3 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))
> test1
score status
1 0.6648 0
2 -0.5158 0
3 -0.0297 0
4 -0.1086 0
5 -1.8708 0
6 0.7908 0
7 0.4760 1
8 -0.4841 1
9 -0.3451 1
10 -0.0772 1
如何在每个条目中构造一个与mean(sd)类似的data.frame。我依稀记得我曾经用某种命令做过这件事,但我不记得它是什么。我并不想要()和sd()每个变量,然后将它们分别放在一个data.frame中。所以这就是我想要的最终data.frame看起来像;一长串的事情:
0 1
test1 0.9(0.1) 0.1(0.03)
test2 0.2(0.1) 0.2(0.03)
test3 0.1(0.2) 0.1(0.04)
答案 0 :(得分:2)
以下是我的试用版。
myfun()
获取两列(或任意数量的列)的mean(sd)
。
数据框放入列表中以便在sapply()
中使用。
set.seed(1237)
test1 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))
test2 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))
test3 = data.frame(score = c(rnorm(10)), status = c(rep(0, 6), rep(1, 4)))
tests <- list(test1, test2, test3)
myfun <- function(x) {
sapply(x, function(x) paste0(round(mean(x),1),"(",round(sd(x),1),")"))
}
t(sapply(tests, myfun))
score status
[1,] "-0.2(1.1)" "0.4(0.5)"
[2,] "0.3(1.2)" "0.4(0.5)"
[3,] "0.1(0.9)" "0.4(0.5)"
答案 1 :(得分:0)
summarySE <-
function (data = NULL, measurevar, groupvars = NULL, na.rm = TRUE,
conf.interval = 0.95, .drop = TRUE, dec = 2)
{
require(plyr)
length2 <- function(x, na.rm = FALSE) {
if (na.rm)
sum(!is.na(x))
else length(x)
}
datac <- ddply(data, groupvars, .drop = .drop, .fun = function(xx,
col) {
c(N = length2(xx[[col]], na.rm = na.rm), mean = mean(xx[[col]],
na.rm = na.rm), sd = sd(xx[[col]], na.rm = na.rm))
}, measurevar)
datac <- rename(datac, c(mean = measurevar))
datac$se <- datac$sd/sqrt(datac$N)
ciMult <- qt(conf.interval/2 + 0.5, datac$N - 1)
datac$ci <- datac$se * ciMult
datac[, measurevar] <- round(datac[, measurevar], dec)
datac$sd <- round(datac$sd, dec)
datac$se <- round(datac$se, dec)
datac$ci <- round(datac$ci, dec)
return(datac)
}
来自here,已修改。
现在绑定所有内容并使用summarySE:
dat <- rbind(test1, test2, test3)
dat$ID <- rep(c("test1", "test2", "test3"), each = nrow(test1))
summarySE(dat, "score", c("ID", "status"))
ID status N score sd se ci
1 test1 0 6 -0.59 0.56 0.23 0.59
2 test1 1 4 0.36 2.10 1.05 3.34
3 test2 0 6 -0.13 0.81 0.33 0.85
4 test2 1 4 0.95 1.32 0.66 2.11
5 test3 0 6 -0.27 0.55 0.23 0.58
6 test3 1 4 0.05 0.99 0.50 1.58