我正在尝试添加col1在名称变量下发生的次数,而忽略缺失值。应该是2次,但是当我使用length和count函数时,它们总是返回3。
M = data.frame( Name = c('name','name1','name','name1','name','name1'), Col1 = c(NA,1,3,4,5,NA) , Col2 = c(1,1,NA,5,8,4))
myData <- aggregate(M[,2],
by = list(VAR = M$Name),
FUN = function(x) c(mean = mean(x,na.rm=T), sd = sd(x,na.rm=T),n=length(x)))
myData <- do.call(data.frame, myData)
myData
#> x.n
# 3
#I want it to say 2 becuase the number only appears twice this variable.
#> x.n
# 2
答案 0 :(得分:1)
尝试编辑您的函数以包含na.omit(x)
,如下所示-
M = data.frame( Name = c('name','name1','name','name1','name','name1'), Col1 = c(NA,1,3,4,5,NA) , Col2 = c(1,1,NA,5,8,4))
myData <- aggregate(M[,2],
by = list(VAR = M$Name),
FUN = function(x) c(mean = mean(x,na.rm=T), sd = sd(x,na.rm=T),n=length(na.omit(x))))
myData <- do.call(data.frame, myData)
# VAR x.mean x.sd x.n
# 1 name 4.0 1.414214 2
# 2 name1 2.5 2.121320 2
答案 1 :(得分:1)
这是一个tidyverse
解决方案
library(tidyverse);
M %>%
gather(k, v, -Name) %>%
filter(complete.cases(.) & k == "Col1") %>%
group_by(Name) %>%
summarise(mean = mean(v), sd = sd(v), n = n())
## A tibble: 2 x 4
# Name mean sd n
# <fct> <dbl> <dbl> <int>
#1 name 4.00 1.41 2
#2 name1 2.50 2.12 2
说明:我们从宽变长,从NA
删除包含complete.cases
个条目的行,并计算对Name
个分组的条目的摘要统计信息。