Question

我已经编写了下面的代码来获得平均值的bootstrap估计值。我的目标是通过boot包中的函数boot查看从数据集中选择的数字，最好按照选择的顺序查看。

数据集只包含三个数字：1,10和100，我只使用两个bootstrap样本。

估算的平均值为23.5，下面的R代码表示六个数字包括一个＆＃39; 1＆＃39;四个＆＃39; 10＆＃39;和一个＆＃39; 100＆＃39;但是，这些数字有30种可能的组合，平均值为23.5。

我有办法确定这30种可能组合中的哪一种是两个自举样本中实际出现的组合吗？

library(boot)

set.seed(1234)

dat <- c(1, 10, 100)
av  <- function(dat, i) { sum(dat[i])/length(dat[i]) }
av.boot <- boot(dat, av, R = 2)
av.boot
#
# ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
# Call:
# boot(data = dat, statistic = av, R = 2)
#
#
# Bootstrap Statistics :
#     original  bias    std. error
# t1*       37   -13.5    19.09188
#

mean(dat) + -13.5 
# [1] 23.5

# The two samples must have contained one '1', four '10' and one '100',
# but there are 30 possibilities.
# Which of these 30 possible sequences actual occurred?

# This code shows there must have been one '1', four '10' and one '100'
# and shows the 30 possible combinations

my.combos <- expand.grid(V1  = c(1, 10, 100),
                         V2  = c(1, 10, 100),
                         V3  = c(1, 10, 100),
                         V4  = c(1, 10, 100),
                         V5  = c(1, 10, 100),
                         V6  = c(1, 10, 100))

my.means <- apply(my.combos, 1, function(x) {( (x[1] + x[2] + x[3])/3 + (x[4] + x[5] + x[6])/3 ) / 2 })

possible.samples <- my.combos[my.means == 23.5,]
dim(possible.samples)

n.1   <- rowSums(possible.samples == 1)
n.10  <- rowSums(possible.samples == 10)
n.100 <- rowSums(possible.samples == 100)

n.1[1]
n.10[1]
n.100[1]

length(unique(n.1))   == 1
length(unique(n.10))  == 1
length(unique(n.100)) == 1

Answer 1

我认为您可以使用以下代码确定采样数和采样顺序。您必须从ordinary.array包中提取函数boot并将该函数粘贴到R代码中。然后指定n，R和strata的值，其中n是数据集中的观察数，R是重复样本的数量你想要的。

我不知道这种方法有多普遍，但它适用于我尝试的几个简单示例，包括下面的示例。

library(boot)

set.seed(1234)

dat <- c(1, 10, 100, 1000)
av  <- function(dat, i) { sum(dat[i])/length(dat[i]) }
av.boot <- boot(dat, av, R = 3)
av.boot
#
# ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
# Call:
# boot(data = dat, statistic = av, R = 3)
#
#
# Bootstrap Statistics :
#     original  bias    std. error
# t1*   277.75  -127.5    132.2405
# 
# 

mean(dat) + -127.5
# [1] 150.25

# boot:::ordinary.array

ordinary.array <- function (n, R, strata) 
{
    inds <- as.integer(names(table(strata)))
    if (length(inds) == 1L) {
        output <- sample.int(n, n * R, replace = TRUE)
        dim(output) <- c(R, n)
    }
    else {
        output <- matrix(as.integer(0L), R, n)
        for (is in inds) {
            gp <- seq_len(n)[strata == is]
            output[, gp] <- if (length(gp) == 1) 
                rep(gp, R)
            else bsample(gp, R * length(gp))
        }
    }
    output
}

# I think the function ordinary.array determines which elements 
# of the data are sampled in each of the R samples

set.seed(1234)
ordinary.array(n=4,R=3,1)

#      [,1] [,2] [,3] [,4]
# [1,]    1    3    1    3
# [2,]    3    4    1    3
# [3,]    3    3    3    3
#
# which equals:

((1+100+1+100) / 4  +  (100+1000+1+100) / 4  +  (100+100+100+100) / 4) / 3

# [1] 150.25

查看函数boot用于bootstrap估计的值

1 个答案: