我正在尝试使用Hmisc :: aregimpute对我的数据集进行多次插补。我希望能够将计算分散到多个核心上 我试图创建一个函数,将两个插补的输出结合起来,就好像它是唯一的插补一样。查看输出aregImpute对象,似乎只有少数包含的对象需要更改。
c_aregImpute <- function(areg1, areg2){
call <- areg1$call
call[["n.impute"]] <- areg1$n.impute + areg2$n.impute
structure(list(call=call,
formula=areg1$formula,
match=areg1$match,
fweighted=areg1$fweighted,
pmmtype=areg1$pmmtype,
n=areg1$n,
p=areg1$p,
na=areg1$na,
nna=areg1$nna,
type=areg1$type,
tlinear=areg1$tlinear,
nk=areg1$nk,
cat.levels=areg1$cat.levels,
df=areg1$df,
n.impute=areg1$n.impute + areg2$n.impute,
imputed=mapply(cbind,areg1$imputed, areg2$imputed),
x=areg1$x,
rsq=areg2$rsq,
resampacc=areg1$resampacc),
class='aregImpute')
}
我的问题是这是否是组合aregImpute对象的有效方法。如果没有,这种方法的缺陷是什么?是否有其他使用Hmisc :: aregImpute的解决方案?
library("Hmisc")
#> Loading required package: lattice
#> Loading required package: survival
#> Loading required package: Formula
#> Loading required package: ggplot2
#>
#> Attaching package: 'Hmisc'
#> The following objects are masked from 'package:base':
#>
#> format.pval, round.POSIXt, trunc.POSIXt, units
#code from https://www.rdocumentation.org/packages/Hmisc/versions/4.0-3/topics/aregImpute
set.seed(3)
x1 <- rnorm(200)
x2 <- x1^2
x3 <- runif(200)
m <- 30
x2[1:m] <- NA
a <- aregImpute(~x1+x2+I(x3), n.impute=50, nk=4, match='closest') # output clipped for SO
#>
Iteration 50
a
#>
#> Multiple Imputation using Bootstrap and PMM
#>
#> aregImpute(formula = ~x1 + x2 + I(x3), n.impute = 50, nk = 4,
#> match = "closest")
#>
#> n: 200 p: 3 Imputations: 50 nk: 4
#>
#> Number of NAs:
#> x1 x2 x3
#> 0 30 0
#>
#> type d.f.
#> x1 s 3
#> x2 s 1
#> x3 l 1
#>
#> Transformation of Target Variables Forced to be Linear
#>
#> R-squares for Predicting Non-Missing Values for Each Variable
#> Using Last Imputations of Predictors
#> x2
#> 0.984
c_aregImpute <- function(areg1, areg2){
call <- areg1$call
call[["n.impute"]] <- areg1$n.impute + areg2$n.impute
structure(list(call=call,
formula=areg1$formula,
match=areg1$match,
fweighted=areg1$fweighted,
pmmtype=areg1$pmmtype,
n=areg1$n,
p=areg1$p,
na=areg1$na,
nna=areg1$nna,
type=areg1$type,
tlinear=areg1$tlinear,
nk=areg1$nk,
cat.levels=areg1$cat.levels,
df=areg1$df,
n.impute=areg1$n.impute + areg2$n.impute,
imputed=mapply(cbind,areg1$imputed, areg2$imputed),
x=areg1$x,
rsq=areg2$rsq,
resampacc=areg1$resampacc),
class='aregImpute')
}
a1 <- aregImpute(~x1+x2+I(x3), n.impute=25, nk=4, match='closest') # output clipped for SO
#> Iteration 25
a2 <- aregImpute(~x1+x2+I(x3), n.impute=25, nk=4, match='closest') # output clipped for SO
#> Iteration 25
ca <- c_aregImpute(a1, a2)
a$rsq
#> x2
#> 0.9841747
a1$rsq
#> x2
#> 0.9721295
a2$rsq
#> x2
#> 0.9787526
ca$rsq
#> x2
#> 0.9787526
a
#>
#> Multiple Imputation using Bootstrap and PMM
#>
#> aregImpute(formula = ~x1 + x2 + I(x3), n.impute = 50, nk = 4,
#> match = "closest")
#>
#> n: 200 p: 3 Imputations: 50 nk: 4
#>
#> Number of NAs:
#> x1 x2 x3
#> 0 30 0
#>
#> type d.f.
#> x1 s 3
#> x2 s 1
#> x3 l 1
#>
#> Transformation of Target Variables Forced to be Linear
#>
#> R-squares for Predicting Non-Missing Values for Each Variable
#> Using Last Imputations of Predictors
#> x2
#> 0.984
ca
#>
#> Multiple Imputation using Bootstrap and PMM
#>
#> aregImpute(formula = ~x1 + x2 + I(x3), n.impute = 50, nk = 4,
#> match = "closest")
#>
#> n: 200 p: 3 Imputations: 50 nk: 4
#>
#> Number of NAs:
#> x1 x2 x3
#> 0 30 0
#>
#> type d.f.
#> x1 s 3
#> x2 s 1
#> x3 l 1
#>
#> Transformation of Target Variables Forced to be Linear
#>
#> R-squares for Predicting Non-Missing Values for Each Variable
#> Using Last Imputations of Predictors
#> x2
#> 0.979
matplot(x1[1:m]^2, ca$imputed$x2)
matplot(x1[1:m]^2, a$imputed$x2)
devtools::session_info()
#> Session info --------------------------------------------------------------
#> setting value
#> version R version 3.3.2 (2016-10-31)
#> system x86_64, linux-gnu
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> tz America/New_York
#> date 2017-08-08
#> Packages ------------------------------------------------------------------
#> package * version date source
#> acepack 1.4.1 2016-10-29 CRAN (R 3.3.2)
#> backports 1.0.5 2017-01-18 CRAN (R 3.3.2)
#> base64enc 0.1-3 2015-07-28 CRAN (R 3.3.2)
#> checkmate 1.8.2 2016-11-02 CRAN (R 3.3.2)
#> cluster 2.0.5 2016-10-08 CRAN (R 3.3.2)
#> colorspace 1.3-2 2016-12-14 CRAN (R 3.3.2)
#> data.table 1.10.4 2017-02-01 CRAN (R 3.3.2)
#> devtools 1.12.0 2016-12-05 CRAN (R 3.3.2)
#> digest 0.6.12 2017-01-27 CRAN (R 3.3.2)
#> evaluate 0.10 2016-10-11 CRAN (R 3.3.2)
#> foreign 0.8-67 2016-09-13 CRAN (R 3.3.2)
#> Formula * 1.2-1 2015-04-07 CRAN (R 3.3.2)
#> ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.3.2)
#> gridExtra 2.2.1 2016-02-29 CRAN (R 3.3.2)
#> gtable 0.2.0 2016-02-26 CRAN (R 3.3.2)
#> Hmisc * 4.0-2 2016-12-31 CRAN (R 3.3.2)
#> htmlTable 1.9 2017-01-26 CRAN (R 3.3.2)
#> htmltools 0.3.6 2017-04-28 cran (@0.3.6)
#> htmlwidgets 0.8 2016-11-09 CRAN (R 3.3.2)
#> knitr 1.15.1 2016-11-22 CRAN (R 3.3.2)
#> lattice * 0.20-34 2016-09-06 CRAN (R 3.3.2)
#> latticeExtra 0.6-28 2016-02-09 CRAN (R 3.3.2)
#> lazyeval 0.2.0 2016-06-12 CRAN (R 3.3.2)
#> magrittr 1.5 2014-11-22 CRAN (R 3.3.2)
#> Matrix 1.2-8 2017-01-20 CRAN (R 3.3.2)
#> memoise 1.0.0 2016-01-29 CRAN (R 3.3.2)
#> munsell 0.4.3 2016-02-13 CRAN (R 3.3.2)
#> nnet 7.3-12 2016-02-02 CRAN (R 3.3.2)
#> plyr 1.8.4 2016-06-08 CRAN (R 3.3.2)
#> RColorBrewer 1.1-2 2014-12-07 CRAN (R 3.3.2)
#> Rcpp 0.12.12 2017-07-15 cran (@0.12.12)
#> rlang 0.1.1 2017-05-18 CRAN (R 3.3.2)
#> rmarkdown 1.3 2016-12-21 CRAN (R 3.3.2)
#> rpart 4.1-10 2015-06-29 CRAN (R 3.3.2)
#> rprojroot 1.2 2017-01-16 CRAN (R 3.3.2)
#> scales 0.4.1 2016-11-09 CRAN (R 3.3.2)
#> stringi 1.1.5 2017-04-07 cran (@1.1.5)
#> stringr 1.2.0 2017-02-18 CRAN (R 3.3.2)
#> survival * 2.40-1 2016-10-30 CRAN (R 3.3.2)
#> tibble 1.3.3 2017-05-28 cran (@1.3.3)
#> withr 2.0.0 2017-07-28 cran (@2.0.0)
#> yaml 2.1.14 2016-11-12 CRAN (R 3.3.2)