我对模型预测的偏差校正自举置信区间感兴趣。我已经编写了一些代码来执行引导,如下所示:
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.0.2
#> Warning: package 'tibble' was built under R version 4.0.2
#> Warning: package 'tidyr' was built under R version 4.0.2
#> Warning: package 'dplyr' was built under R version 4.0.2
library(boot)
x = rnorm(25)
eta = -0.5 + 0.2*x
y = rpois(25, exp(eta))
d = tibble(x, y)
predictions<-function(data, ind){
model = glm(y ~ x, data = data[ind,])
newdata = tibble(x = seq(-2,2, 0.01))
eta = predict(model, newdata = newdata)
exp(eta)
}
bt = boot(d, predictions, 1000)
boot.ci(bt, type = 'bca')
#> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
#> Based on 1000 bootstrap replicates
#>
#> CALL :
#> boot.ci(boot.out = bt, type = "bca")
#>
#> Intervals :
#> Level BCa
#> 95% ( 0.654, 6.095 )
#> Calculations and Intervals on Original Scale
#> Some BCa intervals may be unstable
引导程序按预期工作,但是boot.ci
调用仅计算单个置信区间。
我想要的输出将是一个具有以下内容的数据框:
100*(1-a)
的一列,100*(1-a)
的一列。如何使用boot
获得所需的输出?
答案 0 :(得分:1)
您可以使用replicate
作为引导程序。
predictions <- function(X) {
model <- glm(y ~ x, data=X)
newdata <- data.frame(x=seq(-2, 2, 0.01))
eta <- predict(model, newdata=newdata)
exp(eta)
}
set.seed(42)
b <- replicate(200, predictions(d[sample(1:nrow(d), nrow(d), replace=T), ]))
res <- cbind(estimate=colMeans(b), matrixStats::colQuantiles(b, probs=c(.025, .975)))
head(res)
# estimate 2.5% 97.5%
# [1,] 2.913660 1.324121 5.363724
# [2,] 2.432935 1.327170 3.990526
# [3,] 2.608683 1.221961 4.715235
# [4,] 2.681802 1.395430 4.537477
# [5,] 2.592316 1.947494 3.357226
# [6,] 3.705157 1.606632 7.012669
数据:
d <- structure(list(x = c(-0.733089270649064, 0.843270552215953, -0.860309462865639,
1.57992224055388, 0.606460750363077, -1.60987545170912, 0.116723009723847,
-1.35220761859743, -0.721654356711471, -0.614137997795221, -0.0900374904672829,
1.20655915421524, 1.46948958927721, -0.210496187862193, -1.21763978187878,
-0.566774409165728, 2.49263604822452, 1.37169581528637, -0.407181458046919,
-0.247944038851957, -0.703358091631417, -0.311797310571164, -0.990496073627748,
0.515047211912022, 0.355009786755435), y = c(0L, 1L, 0L, 0L,
1L, 2L, 0L, 1L, 1L, 2L, 3L, 1L, 0L, 2L, 1L, 0L, 0L, 0L, 1L, 2L,
0L, 1L, 1L, 1L, 2L)), class = "data.frame", row.names = c(NA,
-25L))
答案 1 :(得分:0)
您可以传递index=
参数以获得感兴趣的列:
res = lapply(1:ncol(bt$t),function(i){
ci = boot.ci(bt,index=i,type = 'bca')
data.frame(
var = i,
mean = mean(bt$t[,i]),
lower = ci$bca[4],
upper = ci$bca[5]
)
})
res = do.call(rbind,res)
head(res)
var mean lower upper
1 1 2.736563 0.9131014 5.768826
2 2 2.729717 0.9155394 5.743819
3 3 2.722903 0.9174191 5.709087
4 4 2.716120 0.9215527 5.706216
在您的示例中不太容易看到,我们可以看到它与虹膜兼容:
bo = boot(iris,function(d,i)colMeans(d[i,-5]),1000)
data.frame(
var=colnames(iris)[1:4],
mean = colMeans(bo$t),
t(sapply(1:4,function(i)boot.ci(bo,index=i,type="bca")$bca[4:5]))
)
var mean X1 X2
1 Sepal.Length 5.847009 5.712667 5.969333
2 Sepal.Width 3.055873 2.982667 3.128428
3 Petal.Length 3.767644 3.487473 4.029977
4 Petal.Width 1.203107 1.083937 1.313888