并行化Hmisc isgimpute是否有效?

时间:2017-08-08 17:03:42

标签: r parallel-processing hmisc

我正在尝试使用Hmisc :: aregimpute对我的数据集进行多次插补。我希望能够将计算分散到多个核心上 我试图创建一个函数,将两个插补的输出结合起来,就好像它是唯一的插补一样。查看输出aregImpute对象,似乎只有少数包含的对象需要更改。

c_aregImpute <- function(areg1, areg2){
  call <- areg1$call
  call[["n.impute"]] <- areg1$n.impute + areg2$n.impute
  structure(list(call=call,
                 formula=areg1$formula,
                 match=areg1$match,
                 fweighted=areg1$fweighted,
                 pmmtype=areg1$pmmtype,
                 n=areg1$n,
                 p=areg1$p,
                 na=areg1$na,
                 nna=areg1$nna,
                 type=areg1$type,
                 tlinear=areg1$tlinear,
                 nk=areg1$nk,
                 cat.levels=areg1$cat.levels,
                 df=areg1$df,
                 n.impute=areg1$n.impute + areg2$n.impute,
                 imputed=mapply(cbind,areg1$imputed, areg2$imputed),
                 x=areg1$x,
                 rsq=areg2$rsq,
                 resampacc=areg1$resampacc),
            class='aregImpute')
}

我的问题是这是否是组合aregImpute对象的有效方法。如果没有,这种方法的缺陷是什么?是否有其他使用Hmisc :: aregImpute的解决方案?


library("Hmisc")
#> Loading required package: lattice
#> Loading required package: survival
#> Loading required package: Formula
#> Loading required package: ggplot2
#> 
#> Attaching package: 'Hmisc'
#> The following objects are masked from 'package:base':
#> 
#>     format.pval, round.POSIXt, trunc.POSIXt, units


#code from https://www.rdocumentation.org/packages/Hmisc/versions/4.0-3/topics/aregImpute
set.seed(3)
x1 <- rnorm(200)
x2 <- x1^2
x3 <- runif(200)
m <- 30
x2[1:m] <- NA
a <- aregImpute(~x1+x2+I(x3), n.impute=50, nk=4, match='closest') # output clipped for SO
#> 
Iteration 50 
a
#> 
#> Multiple Imputation using Bootstrap and PMM
#> 
#> aregImpute(formula = ~x1 + x2 + I(x3), n.impute = 50, nk = 4, 
#>     match = "closest")
#> 
#> n: 200   p: 3    Imputations: 50     nk: 4 
#> 
#> Number of NAs:
#> x1 x2 x3 
#>  0 30  0 
#> 
#>    type d.f.
#> x1    s    3
#> x2    s    1
#> x3    l    1
#> 
#> Transformation of Target Variables Forced to be Linear
#> 
#> R-squares for Predicting Non-Missing Values for Each Variable
#> Using Last Imputations of Predictors
#>    x2 
#> 0.984


c_aregImpute <- function(areg1, areg2){
  call <- areg1$call
  call[["n.impute"]] <- areg1$n.impute + areg2$n.impute
  structure(list(call=call,
                 formula=areg1$formula,
                 match=areg1$match,
                 fweighted=areg1$fweighted,
                 pmmtype=areg1$pmmtype,
                 n=areg1$n,
                 p=areg1$p,
                 na=areg1$na,
                 nna=areg1$nna,
                 type=areg1$type,
                 tlinear=areg1$tlinear,
                 nk=areg1$nk,
                 cat.levels=areg1$cat.levels,
                 df=areg1$df,
                 n.impute=areg1$n.impute + areg2$n.impute,
                 imputed=mapply(cbind,areg1$imputed, areg2$imputed),
                 x=areg1$x,
                 rsq=areg2$rsq,
                 resampacc=areg1$resampacc),
            class='aregImpute')
}
a1 <- aregImpute(~x1+x2+I(x3), n.impute=25, nk=4, match='closest') # output clipped for SO
#> Iteration 25 
a2 <- aregImpute(~x1+x2+I(x3), n.impute=25, nk=4, match='closest') # output clipped for SO
#> Iteration 25 
ca <- c_aregImpute(a1, a2)
a$rsq 
#>        x2 
#> 0.9841747
a1$rsq
#>        x2 
#> 0.9721295
a2$rsq
#>        x2 
#> 0.9787526
ca$rsq
#>        x2 
#> 0.9787526
a
#> 
#> Multiple Imputation using Bootstrap and PMM
#> 
#> aregImpute(formula = ~x1 + x2 + I(x3), n.impute = 50, nk = 4, 
#>     match = "closest")
#> 
#> n: 200   p: 3    Imputations: 50     nk: 4 
#> 
#> Number of NAs:
#> x1 x2 x3 
#>  0 30  0 
#> 
#>    type d.f.
#> x1    s    3
#> x2    s    1
#> x3    l    1
#> 
#> Transformation of Target Variables Forced to be Linear
#> 
#> R-squares for Predicting Non-Missing Values for Each Variable
#> Using Last Imputations of Predictors
#>    x2 
#> 0.984
ca
#> 
#> Multiple Imputation using Bootstrap and PMM
#> 
#> aregImpute(formula = ~x1 + x2 + I(x3), n.impute = 50, nk = 4, 
#>     match = "closest")
#> 
#> n: 200   p: 3    Imputations: 50     nk: 4 
#> 
#> Number of NAs:
#> x1 x2 x3 
#>  0 30  0 
#> 
#>    type d.f.
#> x1    s    3
#> x2    s    1
#> x3    l    1
#> 
#> Transformation of Target Variables Forced to be Linear
#> 
#> R-squares for Predicting Non-Missing Values for Each Variable
#> Using Last Imputations of Predictors
#>    x2 
#> 0.979
matplot(x1[1:m]^2, ca$imputed$x2)

matplot(x1[1:m]^2, a$imputed$x2)

devtools::session_info()
#> Session info --------------------------------------------------------------
#>  setting  value                       
#>  version  R version 3.3.2 (2016-10-31)
#>  system   x86_64, linux-gnu           
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  tz       America/New_York            
#>  date     2017-08-08
#> Packages ------------------------------------------------------------------
#>  package      * version date       source         
#>  acepack        1.4.1   2016-10-29 CRAN (R 3.3.2) 
#>  backports      1.0.5   2017-01-18 CRAN (R 3.3.2) 
#>  base64enc      0.1-3   2015-07-28 CRAN (R 3.3.2) 
#>  checkmate      1.8.2   2016-11-02 CRAN (R 3.3.2) 
#>  cluster        2.0.5   2016-10-08 CRAN (R 3.3.2) 
#>  colorspace     1.3-2   2016-12-14 CRAN (R 3.3.2) 
#>  data.table     1.10.4  2017-02-01 CRAN (R 3.3.2) 
#>  devtools       1.12.0  2016-12-05 CRAN (R 3.3.2) 
#>  digest         0.6.12  2017-01-27 CRAN (R 3.3.2) 
#>  evaluate       0.10    2016-10-11 CRAN (R 3.3.2) 
#>  foreign        0.8-67  2016-09-13 CRAN (R 3.3.2) 
#>  Formula      * 1.2-1   2015-04-07 CRAN (R 3.3.2) 
#>  ggplot2      * 2.2.1   2016-12-30 CRAN (R 3.3.2) 
#>  gridExtra      2.2.1   2016-02-29 CRAN (R 3.3.2) 
#>  gtable         0.2.0   2016-02-26 CRAN (R 3.3.2) 
#>  Hmisc        * 4.0-2   2016-12-31 CRAN (R 3.3.2) 
#>  htmlTable      1.9     2017-01-26 CRAN (R 3.3.2) 
#>  htmltools      0.3.6   2017-04-28 cran (@0.3.6)  
#>  htmlwidgets    0.8     2016-11-09 CRAN (R 3.3.2) 
#>  knitr          1.15.1  2016-11-22 CRAN (R 3.3.2) 
#>  lattice      * 0.20-34 2016-09-06 CRAN (R 3.3.2) 
#>  latticeExtra   0.6-28  2016-02-09 CRAN (R 3.3.2) 
#>  lazyeval       0.2.0   2016-06-12 CRAN (R 3.3.2) 
#>  magrittr       1.5     2014-11-22 CRAN (R 3.3.2) 
#>  Matrix         1.2-8   2017-01-20 CRAN (R 3.3.2) 
#>  memoise        1.0.0   2016-01-29 CRAN (R 3.3.2) 
#>  munsell        0.4.3   2016-02-13 CRAN (R 3.3.2) 
#>  nnet           7.3-12  2016-02-02 CRAN (R 3.3.2) 
#>  plyr           1.8.4   2016-06-08 CRAN (R 3.3.2) 
#>  RColorBrewer   1.1-2   2014-12-07 CRAN (R 3.3.2) 
#>  Rcpp           0.12.12 2017-07-15 cran (@0.12.12)
#>  rlang          0.1.1   2017-05-18 CRAN (R 3.3.2) 
#>  rmarkdown      1.3     2016-12-21 CRAN (R 3.3.2) 
#>  rpart          4.1-10  2015-06-29 CRAN (R 3.3.2) 
#>  rprojroot      1.2     2017-01-16 CRAN (R 3.3.2) 
#>  scales         0.4.1   2016-11-09 CRAN (R 3.3.2) 
#>  stringi        1.1.5   2017-04-07 cran (@1.1.5)  
#>  stringr        1.2.0   2017-02-18 CRAN (R 3.3.2) 
#>  survival     * 2.40-1  2016-10-30 CRAN (R 3.3.2) 
#>  tibble         1.3.3   2017-05-28 cran (@1.3.3)  
#>  withr          2.0.0   2017-07-28 cran (@2.0.0)  
#>  yaml           2.1.14  2016-11-12 CRAN (R 3.3.2)

0 个答案:

没有答案