从R中的二项式检验中提取置信区间

时间:2018-06-08 20:13:22

标签: r loops

我有26个比例,26个样本大小。像这样:

## rates for 26 regions
crude2admit_rate <- c(0.18715084,0.00000000, 0.11111111, 0.03333333, 0.17500000, 0.10810811, 0.10080645, 0.14388489, 0.19029374, 0.15268456, 0.18329278,
    0.10614525, 0.06896552, 0.25000000, 0.10294118, 0.21000981, 0.16950998, 0.18333333, 0.14355231, 0.26595745, 0.18317890, 0.13636364, 0.20556227, 0.21547800 ,0.20924574, 0.23748669)
## N for 26 regions
count_pat <- c(358,7,18,30,40,37,496,973,4698,596,1233,179,29,12,68,1019,2755,60,411,94,1403,352,827,659,411,939)

我写了一个循环来为26个区域中的每个区域运行二项式测试,如下所示:

for (i in 1:26) {
ci[[i]] <- binom.test(x = round(crude2admit_rate[i]*count_pat[i]), n = count_pat[i])
}

我想从每个测试中提取26个置信区间,如下所示:

## 1st region
ci[[1]]$conf.int[1]  #lower bounds
ci[[1]]$conf.int[2]  #upper bounds

## 2nd region 
ci[[2]]$conf.int[1]
ci[[2]]$conf.int[2]

如何编写loop来提取26个下限和上限,并将它们保存为列表或数据框?

谢谢!

1 个答案:

答案 0 :(得分:1)

这是一种tidyverse方法,它使用嵌套的tibble为每行运行binom.test,并使用broom::glance提取每个测试的置信区间:< / p>

library(tidyverse)
library(broom)

binom_df <- data.frame(crude2admit_rate, count_pat) %>%
  group_by(region = row_number()) %>%
  nest() %>%
  mutate(model = data %>% map(~binom.test(x = round(.[[1]]*.[[2]]), n = .[[2]])),
         glance = model %>% map(~glance(.)[,c("conf.low", "conf.high")])) %>%
  select(-model) %>%
  unnest() 

do

binom_df2 <- data.frame(crude2admit_rate, count_pat) %>%
  group_by(region = row_number()) %>%
  do(binom.test(round(.$crude2admit_rate*.$count_pat), n = .$count_pat) %>% 
       glance()) %>%
  select(region, conf.low, conf.high)

<强>结果:

> binom_df
# A tibble: 26 x 5
   region crude2admit_rate count_pat     conf.low conf.high
    <int>            <dbl>     <dbl>        <dbl>     <dbl>
 1      1       0.18715084       358 0.1480846783 0.2314813
 2      2       0.00000000         7 0.0000000000 0.4096164
 3      3       0.11111111        18 0.0137512157 0.3471204
 4      4       0.03333333        30 0.0008435709 0.1721695
 5      5       0.17500000        40 0.0733827294 0.3277901
 6      6       0.10810811        37 0.0302519651 0.2541759
 7      7       0.10080645       496 0.0757494508 0.1307376
 8      8       0.14388489       973 0.1224219683 0.1675278
 9      9       0.19029374      4698 0.1791579215 0.2018158
10     10       0.15268456       596 0.1247488289 0.1841129
# ... with 16 more rows

> binom_df2
# A tibble: 26 x 3
# Groups:   region [26]
   region     conf.low conf.high
    <int>        <dbl>     <dbl>
 1      1 0.1480846783 0.2314813
 2      2 0.0000000000 0.4096164
 3      3 0.0137512157 0.3471204
 4      4 0.0008435709 0.1721695
 5      5 0.0733827294 0.3277901
 6      6 0.0302519651 0.2541759
 7      7 0.0757494508 0.1307376
 8      8 0.1224219683 0.1675278
 9      9 0.1791579215 0.2018158
10     10 0.1247488289 0.1841129
# ... with 16 more rows