按组随机化观察(块)

时间:2016-04-13 13:50:31

标签: r dplyr

我有一个I观察数据框,每个观察都属于g个类别之一。

set.seed(9782)
I <- 500
g <- 10
library(dplyr)

anon_id <- function(n = 1, lenght = 12) {
  randomString <- c(1:n)
  for (i in 1:n)
  {
    randomString[i] <- paste(sample(c(0:9, letters, LETTERS),
                                    lenght, replace = TRUE),
                             collapse = "")
  }
  return(randomString)
}

df <- data.frame(id = anon_id(n = I, lenght = 16),
                 group = sample(1:g, I, T))

我希望将每个观察值随机分配给J“urns”之一,给出一些概率向量p。这就是被分配给ur的概率J = 1是p [1]。增加的复杂性是我想逐块进行。

如果我忽略了这些块,我可以轻松地做到这一点:

J <- 3
p <- c(0.25, 0.5, 0.25)
df1 <- df %>% mutate(urn = sample(x = c(1:J), size = I, replace = T, prob = p))

我想通过“阻止”来实现这个方法

# Block randomization
randomize_block <- function(g) {
  df1 <- df %>% filter(group==g) 
  size <- nrow(df1)
  df1 <- df1 %>% mutate(urn = sample(x = c(1:J), 
                                     size = size, 
                                     replace = T, 
                                     prob = p))
  return(df1)

}

df2 <- lapply(1:g, randomize_block)
df2 <- data.table::rbindlist(df2)

有更好的方法吗?

3 个答案:

答案 0 :(得分:2)

不确定这是否更好,但这是一个带有data.frame df的基本R技术,它具有组名&#34; group&#34;以及具有长度为J的向量p中的赋值概率的urn赋值1:J

# get urn assignment
urnAssignment  <- lapply(unique(df$group), 
                    function(i) sample(1:J, nrow(df[group==i,]), replace =T, prob=p))

# get a list that collects position of observations
obsOrder  <- lapply(unique(df$group), 
                    function(i) which(df$group == i))

df$urnAssignment <- unlist(urnAssignment)[unlist(obsOrder)]

答案 1 :(得分:2)

randomizr::block_ra完全可以满足您的需求。

library(randomizr)
library(janitor) #just for the tabyl function

block_rand <-  as.tibble(randomizr::block_ra(blocks = df$group, conditions = c("urn_1","urn_2","urn_3")))

df2 <- as.tibble(bind_cols(df, block_rand))

df2 %>% janitor::tabyl(group, value)

答案 2 :(得分:0)

这可以使用dplyr

randomize <- function(data, groups=2, block_id = NULL, p=NULL, seed=9782) {
  if(is.null(p)) p <- rep(1/groups, groups) 
  if(is.null(block_id)){
    df1 <- data %>% 
      mutate(Treatment = sample(x = c(1:groups), 
                                size = n(), 
                                replace = T, 
                                prob = p))
    return(df1)
  }else{
    df1 <- data %>% group_by_(block_id) %>% 
      mutate(Treatment = sample(x = c(1:groups), 
                                size = n(), 
                                replace = T, 
                                prob = p))
  }
}    

df1 <- randomize(data = df, groups = J, block_id = "group", p = p, seed = 9782)