在偏最小二乘法(PLS)(非正态分布)

时间:2016-01-18 02:29:49

标签: r imputation

我正在使用IPW和MI与PLS的组合,在每个MI模型中,我计算了95%CI。我的问题是如何将95%CI的结果合并到最终结果中。下面是我正在使用的示例脚本。假设感兴趣的参数不遵循正态分布。

coeftable [[i]]包含每个MI模型中的系数和95%CI。

library(Hmisc)
library(dplyr)
library(nlme)
library(reshape)

library(plsRglm)
library(xlsx)
library(boot)

set.seed(123)
id <- c(1:1000)
y <-  sample(c(1:5,NA), 1000, replace=T)
x1 <-  sample(c(1:2,NA), 1000, replace=T)
x2 <-  sample(c(1:3,NA), 1000, replace=T)
x3 <-  sample(c(1:4,NA), 1000, replace=T)

df <- data.frame(id,y,x1,x2,x3)


df.nomiss <- subset(df, !is.na(df$y))

# obs==1: with any missing data of x
df.nomiss[,"obs"] <- 0
df.nomiss$obs[is.na(df.nomiss$x1)==TRUE | 
                       is.na(df.nomiss$x2)==TRUE| 
                       is.na(df.nomiss$x3)==TRUE ] <- 1


# only include obs==1 into the imputation
include<-df.nomiss[df.nomiss$obs==1,]
exclude<-anti_join(df.nomiss,include,by="id")


# imputation
m=10

include.i <- aregImpute(~factor(y) + factor(x1) + factor(x2) +factor(x3) ,
                            data=include,n.impute=m)
    include.nomiss <- list(include, include, include, include, include, include,include, include, include, include)

    # if a variable is coded as 0, use "include.i$imputed$x1[,i]-1 "
for(i in 1:m){
  include.nomiss[[i]]$y[is.na(include.nomiss[[i]]$y)] <-
    include.i$imputed$y[,i]

  include.nomiss[[i]]$x1[is.na(include.nomiss[[i]]$x1)] <-
include.i$imputed$x1[,i]

  include.nomiss[[i]]$x2[is.na(include.nomiss[[i]]$x2)] <-
include.i$imputed$x2[,i]

  include.nomiss[[i]]$x3[is.na(include.nomiss[[i]]$x3)] <-
    include.i$imputed$x3[,i]

}



missingmodel <-  list(NA)
analysismodel<-list(NA)
all<- rep(list(NA), m)
modplsglm <- rep(list(NA), m)
coeftable <- rep(list(NA), m)
rawci <- rep(list(NA), m)
loading <- rep(list(NA), m)
temp.bootplsRglm <- rep(list(NA), m)


# PLSRGLM
R <- 1000
ncomp <- 3


# IPW with PLS 
for(i in 1:m){
  all[[i]]<-rbind(exclude,include.nomiss[[i]])

  # IPW
  missingmodel[[i]] <- glm(obs ~y + x1  + x2 +x3 ,
                       data=all[[i]], family=binomial)

  all[[i]]$pw<-(1/missingmodel[[i]]$fitted.values)


  # PLSRGLM
  modplsglm[[i]] <- plsRglm(y~ factor(x1) + factor(x2) + factor(x3) ,
                        nt=ncomp,data=all[[i]], modele="pls", weights=all[[i]]$pw)

  # bootstrap 95%CI
  temp.bootplsRglm[[i]] <- bootplsglm(modplsglm[[i]], typeboot="plsmodel", R=R , statistic=coefs.plsRglmnp, sim="balanced", stype="i", stabvalue=1e6, verbose=TRUE)

  indices.temp.bootplsRglm <- !is.na(temp.bootplsRglm[[i]]$t[,1])
  temp.bootplsRglm[[i]]$t=temp.bootplsRglm[[i]]$t[indices.temp.bootplsRglm,]
  temp.bootplsRglm[[i]]$R=sum(indices.temp.bootplsRglm)
  temp.bootplsRglm[[i]]$call$R<-sum(indices.temp.bootplsRglm)
  Cornell.bootYX.raw <- temp.bootplsRglm[[i]]

  # generate coeftable
  options(scipen=999)

  coeftable[[i]] <- as.data.frame(modplsglm[[i]]$Coeffs)
  colnames(coeftable[[i]]) <- "coef"

  rawci[[i]] <- confints.bootpls(Cornell.bootYX.raw, typeBCa=FALSE)
  rawci[[i]] <- as.data.frame(rawci[[i]])
  col <- c("Normal lower","Normal upper","Basic lower","Basic upper","Percentile lower","Percentile upper")
  colnames(rawci[[i]]) <- col
  rawci[[i]] <- rawci[[i]][,c("Percentile lower","Percentile upper")]

  coeftable[[i]] <- cbind(variable=0,coeftable[[i]],rawci[[i]])
  coeftable[[i]][,"variable"]<-rownames(coeftable[[i]])

}

下面是一个MI模型的系数,我的预期输出应该是这样的,但是会显示所有MI模型的总体估计值。

coeftable[[1]]
               variable        coef Percentile lower Percentile upper
Intercept     Intercept  2.96021462      0.000000000       0.00000000
factor.x1.1 factor.x1.1  0.04540381     -0.019860282       0.04854000
factor.x1.2 factor.x1.2 -0.04540381     -0.048540000       0.01986028
factor.x2.2 factor.x2.2  0.23350314     -0.002184034       0.15478083
factor.x2.3 factor.x2.3  0.04506754     -0.063760940       0.09520172
factor.x3.2 factor.x3.2  0.08297860     -0.057287056       0.09292398
factor.x3.3 factor.x3.3 -0.15542543     -0.124509722       0.02694244
factor.x3.4 factor.x3.4 -0.05176159     -0.092618253       0.05736522

0 个答案:

没有答案