我已经尝试了多次调用分位数,但是考虑到我需要0.05的分位数增量并且我的数据非常大,所以这样做效率极低。
ff <- function(table, IDs, q.increment=0.05){
fun <- list("mean" = function(x) mean(x, na.rm = TRUE),
"median" = function(x) median(x, na.rm =TRUE),
function(x) quantile(x, probs = seq(0, 1, q.increment), na.rm=T))
fun.names <- names(fun)
abc <- table[ , c(summary = list(fun.names),
lapply(.SD, function(col) I(lapply(fun, function(f) f(col))))),
by = IDs, .SDcols = grep("value", names(table))]
return(abc)
}
dt <- data.table(country = c("FR", "US", "HU", "HU", "FR", "FR", "US", "US", "US", "HU"), value1=rnorm(10), value2=rnorm(10))
abc <- ff(dt, c("country"))
我不喜欢分位数在一个单元格中分散,我希望每个分位数都有一行。
答案 0 :(得分:1)
方法是将quantile
转换为list
,然后在unlist
使用recursive = FALSE
时
dt[, c(unlist(lapply(.SD, function(x) list(mean = mean(x),
median = median(x))), recursive = FALSE),
unlist(lapply(.SD, function(x) as.list(quantile(x,
probs = seq(0, 1, 0.05)))), recursive = FALSE)), country]
它可以包装在一个函数中
ff1 <- function(data, IDs, q.increment = 0.05) {
f1 <- function(x) list(mean = mean(x, na.rm = TRUE),
median = median(x, na.rm = TRUE),
quantile = as.list(quantile(x,
probs = seq(0, 1, q.increment))))
data[, unlist(unlist(lapply(.SD, f1), recursive = FALSE),
recursive = FALSE), by = IDs, .SDcols = grep("value", names(data))]
}
out <- ff1(dt, "country")
如果我们需要长格式,请使用melt
nm1 <- unique(sub(".*\\.", "", names(out)[-1]))
melt(out, measure = patterns('^value1', '^value2'),
variable.name = 'summary')[, summary := nm1[summary]][]
# country summary value1 value2
# 1: FR mean -0.70362861 -0.37004727
# 2: US mean -0.17024421 -0.10986835
# 3: HU mean 0.35754440 0.43067053
# 4: FR median -0.25453398 -0.72539656
# 5: US median -0.08068703 0.15472558
# 6: HU median 0.61732639 0.30846369
# 7: FR 0% -1.60473855 -1.34258692
# 8: US 0% -0.87641285 -2.04386860
# 9: HU 0% -0.37871048 0.08147549
#10: FR 5% -1.46971809 -1.28086789
#11: US 5% -0.80765939 -1.72964937
#...