library(boot)
set.seed(3)
run2 <- structure(list(X = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
beta1 = c(1.40597500087168, 1.41848914211916, 1.42962301367006,
1.42029917323066, 1.4289966153492, 1.42596562840805, 1.42680678397731,
1.41425063435813, 1.40927087463386, 1.41364428128251)), .Names = c("X",
"beta1"), class = "data.frame", row.names = 11:20)
#my boostrap mean
> b11 = mean(sample(run2$beta1, 10000, replace = TRUE)); b11
[1] 1.419424
boot.beta1 <- function(d, i){
d2 <- d[i, ]
return(d2$beta1)
}
> bootbeta1 <- boot(run2, boot.beta1, R = 10000)
> boot.ci(bootbeta1, type = c("norm"))
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates
CALL :
boot.ci(boot.out = bootbeta1, type = c("norm"))
Intervals :
Level Normal
95% ( 1.377, 1.408 )
Calculations and Intervals on Original Scale
我的增强平均值是1.419,但这超出了95%CI?我该如何解决这个问题?
答案 0 :(得分:3)
bootbeta1
只是返回值的向量,而不是平均值,而非boot
版本实际上并不是自举,因为它不会创建用于计算统计量的重复项,而只是采样a很多。
在基础R中,它看起来像
run2 <- data.frame(
X = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
beta1 = c(1.40597500087168, 1.41848914211916, 1.42962301367006, 1.42029917323066, 1.4289966153492, 1.42596562840805, 1.42680678397731, 1.41425063435813, 1.40927087463386, 1.41364428128251)
)
set.seed(3)
straps <- replicate(10000, mean(sample(run2$beta1, nrow(run2), replace = TRUE)))
mean(straps)
#> [1] 1.419342
# 95% confidence interval
qnorm(c(.025, .975), mean(straps), sd(straps))
#> [1] 1.414387 1.424298
使用boot
,它看起来像
library(boot)
boot_mean <- boot(run2, function(d, i){mean(d[i, 'beta1'])}, R = 10000)
boot_mean
#>
#> ORDINARY NONPARAMETRIC BOOTSTRAP
#>
#>
#> Call:
#> boot(data = run2, statistic = function(d, i) {
#> mean(d[i, "beta1"])
#> }, R = 10000)
#>
#>
#> Bootstrap Statistics :
#> original bias std. error
#> t1* 1.419332 -1.286626e-05 0.002550844
boot_ci <- boot.ci(boot_mean, type = 'norm')
boot_ci
#> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
#> Based on 10000 bootstrap replicates
#>
#> CALL :
#> boot.ci(boot.out = boot_mean, type = "norm")
#>
#> Intervals :
#> Level Normal
#> 95% ( 1.414, 1.424 )
#> Calculations and Intervals on Original Scale
现在都包含均值。