R上具有自举置信区间的相关矩阵

时间:2018-07-30 13:10:37

标签: r matrix correlation bootstrapping

我想获得具有自举置信区间的相关矩阵。

我发现jmv软件包提供了具有p值和置信区间的相关矩阵;但在这种情况下,它们是通过Fisher转换计算的。

我找到了如何获取引导置信区间:

library(boot)
Brep = 5000
pearson <- function(d,i=c(1:n)){
d2 <- d[i,]
return(cor(d2$var1,d2$var2))}
bootcorr <- boot(data = data.frame(cbind(var1, var2)), statistic = 
pearson  ,R=Brep)
ciu <- boot.ci(bootcorr,conf=.95, type="basic")$basic[5]
cil <- boot.ci(bootcorr,conf=.95, type="basic")$basic[4]

如何获得具有自举置信区间的CorrMatrix类型的矩阵?

下面是一个示例:

DAT = matrix (data = c (rnorm (10), rnorm (10),rnorm (10)), nrow = 
10, byrow = F)
DAT = as.data.frame (DAT)
library('jmv')
corrMatrix(data= DAT, ci=T, vars = c("V1", "V2", "V3"))

非常感谢您的帮助!

1 个答案:

答案 0 :(得分:1)

下面是完成这项工作的函数,我已经写好了,所以如果它对其他人有用...:

# Rounding functions for HTML printing

printDec<-function(value,dec=1){
  ifelse (nchar(trunc(value))<4,sprintf(paste0("%.",dec,"f"),trunc(value*10^dec+sign(value*10^dec)*0.5)/10^dec),formatC(value, format = "e", digits = 1))
}

# Check if packages are installed

packages<-function(x){ 
  x<-as.character(match.call()[[2]])
  if (!require(x,character.only=TRUE)){
    install.packages(pkgs=x,repos="http://cran.r-project.org")
    require(x,character.only=TRUE)
  }
}

 # Prepare data frame + names

corrMat = function( data, method = c('pearson', 'kendall', 'spearman'), Brep = 5000, coeffBoot = T, pvalueBoot = T, limfactor = 5)
  # method : just for tests on categorical variables / 1 categorical + 1 numerical variables 
  # Brep :  bootstrap numbers
  # coeffBoot : is the printed coefficient the one estimated by bootstrap ?
  # pvalueBoot : is the printed pvalue the one estimated by bootstrap ?
  # limfactor : max number of levels for a variable to be considered as categorical

{ 
   packages(Kendall)

   data = as.data.frame(data)

   # Everything is made as numerical. Be careful with factor with string non supported by R like mathematical symbol
   # In this case, do as.numerci(as.factor())

   oldw <- getOption("warn")
   options(warn = -1) # temporary suppresion of warnings print
   for (i in 1:ncol(data)){ if ( (is.factor(data[,i]) |is.character(data[,i]))  == T & sum(is.na(as.numeric(as.character(data[,i]))))!= length(data[,i])){data[,i] = as.numeric(as.character(data[,i]))}
                            if ( (is.factor(data[,i]) |is.character(data[,i]))  == T  & sum(is.na(as.numeric(as.numeric(as.character(data[,i])))))== length(data[,i])){data[,i] = as.numeric(as.factor(data[,i]))} } 
   options(warn = oldw)

   listVar = names(data)
   if(is.null(listVar) == T) {listVar = colnames(data)}

   permut = combn(listVar, m=2)

   coeffcorr = matrix(NA, nrow = dim(permut)[2], ncol = 8)

   for (i in 1:dim(permut)[2] ) {

      if (length(unique(data[,t(permut)[i,1]])) > limfactor | length(unique(data[,t(permut)[i,2]])) > limfactor) # verifier que l'une des veux variables est numérique

        {



     z1  = replicate( Brep, sample(1:dim(data)[1], dim(data)[1], replace = T))
     res = do.call  ( rbind, apply(z1, 2, function(x){ res=cor.test(data[,t(permut)[i,1]][x], data[,t(permut)[i,2]][x],  method = method ) ; return ((list(res$p.value,res$estimate))) }))

      if (coeffBoot == T){ # if the user wants the bootstrapped value
     coeffcorr[i,1] = printDec ( mean(unlist(res[,2]), na.rm = T)) # na.rm : if the calculation of the coefficient does not work
     } else {
     coeffcorr[i,1] = printDec( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$estimate)
     }

     coeffcorr[i,2] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[1] ) # limit 1 of CI
     coeffcorr[i,3] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[2] ) # limit 2 of CI
     if (pvalueBoot == T){
     coeffcorr[i,4] = pdec    ( mean    (unlist(res[,1]), na.rm = T ) ) # bootstrapped pvalue 
                        }else{
     coeffcorr[i,4] = pdec    ( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$p.value ) } # "classical" pvalue 

     } 
     else  # if the two variables to compare are categorical
      {

     z1  = replicate(Brep, sample(1:dim(data)[1], dim(data)[1], replace = T))
     res = do.call  ( rbind, apply(z1, 2, function(x){ res=Kendall(data[,t(permut)[i,1]][x], data[,t(permut)[i,2]][x] ) ; return ((list(res$sl,res$tau))) }))
     if (coeffBoot == T){
       coeffcorr[i,1] = printDec(mean(unlist(res[,2]), na.rm = T))
     } else {
       coeffcorr[i,1] = printDec( Kendall(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$tau)
     }
     coeffcorr[i,2] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[1] ) # limit 1 of CI
     coeffcorr[i,3] = printDec( quantile(unlist(res[,2]), c(0.025, 0.975), na.rm = T)[2] ) # limit 2 of CI
     if (pvalueBoot == T){
       coeffcorr[i,4] = paste("Kendall",pdec    ( mean    (unlist(res[,1]), na.rm = T ) )) # bootstrapped pvalue 
     }else{
       coeffcorr[i,4] = pdec    ( cor.test(data[,t(permut)[i,1]], data[,t(permut)[i,2]])$p.value ) } # classical pvalue

     }}


   df = as.data.frame(matrix(NA, nrow = length(listVar), ncol = length(listVar)))
   m = 1

   for (i in 1:length(listVar)){
      for (j in 1:length(listVar)){

      if (i == j) {df[i,j]=paste("---")} # diagonale 
      if (i < j){ # partie supérieure de la matrice
        df[i,j]=paste(coeffcorr[m,1], "[", coeffcorr[m,2],";", coeffcorr[m,3], "] p =",coeffcorr[m,4])
        m = m +1
      }
      if (i > j){ df[i,j]=paste("")}
   }
}
   df = data.frame(lapply(df, function(x) {gsub("-0.0", "0.0", x)})) # replacement of -0.0 in 0.0
   df = rbind (t(listVar), df); df = cbind (c(paste(""),listVar), df )

    return(df) 
}