我正在尝试计算由另一个变量分组的一个(或多个)变量的中位数。我正在使用svyby
。问题是某些组的非NA值可能少于2个。
我使用此函数将NA
返回到少于2个非NA值的组,但它不起作用:
require(survey)
exemplo <- data.frame(ID = 1:10, var = rnorm(10), var2 = rnorm(10, 5), grupo = factor(c('A', 'A', 'A', 'A', 'B', 'B', 'B', 'C', 'D', 'D')), peso = rchisq(10, 5))
exemplo[10, 2] <- NA
amostra <- svydesign(ids = ~ ID, data = exemplo, weights = ~ peso)
svyquantile <- function(x, design, quantiles, ...) {
out <- try(survey::svyquantile(x = x, design = design, quantiles = quantiles, ...))
termos <- attr(terms(x), "term.labels")
out <- if (class(out) == "try-error") { matrix(NA, nrow = length(termos), ncol = length(quantile)) } else { out }
colnames(out) <- quantiles
rownames(out) <- termos
return(out)
}
svyby(~ var + var2, ~ grupo, amostra, svyquantile, quantile = .5, na.rm = TRUE) # Não funciona
有人有任何想法吗?
答案 0 :(得分:1)
首先,我不建议使用
~ var + var2
调查包中的如果任何缺失,则两者都缺少结果。比较:
svymean( ~ var + var2 , amostra , na.rm = TRUE )
svymean( ~ var2 , amostra , na.rm = TRUE )
而不是覆盖svyquantile,也许写一个函数,如果条件合适就调用它?
myqt <-
function( ... ){
a <- list( ... )
this.qt <- if( is.null( a$quantile ) ) a[[3]] else a$quantile
tt <- a[[2]]$variables
# condition where svyquantile breaks
if( sum( complete.cases( tt[ as.character( a[[1]] )[2] ] ) ) < 2 ){
# re-create an empty svyquantile object of the same structure
out <- matrix(rep(as.numeric(NA), length(this.qt)),nrow=1)
dimnames(out) <- list( as.character(a[[1]])[2] , this.qt )
# end the function
return( out )
# if svyquantile doesn't break, pass everything to it as normal
} else svyquantile( ... )
}
并注意您可以在svyby
中使用此代替svyquantile
# test cases
myqt( ~ var , subset( amostra , grupo %in% 'A' ) , 0.5 )
myqt( ~ var , subset( amostra , grupo %in% 'A' ) , c( 0.25 , 0.5 ) )
myqt( ~ var , subset( amostra , grupo %in% 'C' ) , 0.5 )
myqt( ~ var , subset( amostra , grupo %in% 'C' ) , c( 0.25 , 0.5 ) )
svyby(~ var , ~ grupo, amostra, myqt, quantile = 0.5 , na.rm = TRUE, keep.var=F)
svyby(~ var , ~ grupo, amostra, myqt, quantile = c( .25 , .5 , .75 ) , na.rm = TRUE, keep.var=F)