这是follow up question。上一个问题的答案是进行随机抽样和替换。如何更改代码,以便将每个观察分配到J
" urn"没有将观察结果放回'彩票'
这是我现在的代码:
set.seed(9782)
I <- 500
g <- 10
library(dplyr)
anon_id <- function(n = 1, lenght = 12) {
randomString <- c(1:n)
for (i in 1:n)
{
randomString[i] <- paste(sample(c(0:9, letters, LETTERS),
lenght, replace = TRUE),
collapse = "")
}
return(randomString)
}
df <- data.frame(id = anon_id(n = I, lenght = 16),
group = sample(1:g, I, T))
J <- 3
p <- c(0.25, 0.5, 0.25)
randomize <- function(data, urns=2, block_id = NULL, p=NULL, seed=9782) {
if(is.null(p)) p <- rep(1/urns, urns)
if(is.null(block_id)){
df1 <- data %>%
mutate(Treatment = sample(x = c(1:urns),
size = n(),
replace = T,
prob = p))
return(df1)
}else{
df1 <- data %>% group_by_(block_id) %>%
mutate(Treatment = sample(x = c(1:urns),
size = n(),
replace = T,
prob = p))
}
}
df1 <- randomize(data = df, urns = J, block_id = "group", p = p, seed = 9782)
如果我将replace = T
更改为replace = F
,我会收到以下错误:
Error: cannot take a sample larger than the population when 'replace = FALSE'
澄清我的目标:
假设我有10个教室(或村庄,或类似的东西)。为了简单起见,假设每个教室有20名学生(实际上他们将有N_j)。每个教室的课堂,我想将每个学生分配给J个小组中的一个,例如J=3
。 P表示将分配给每个组的分数。例如25%到1组40%到组2,35%到组3。
答案 0 :(得分:1)
此解决方案基于@Frank的评论。我创建了一个函数来执行块j
的随机化,另一个函数为每个块调用该函数。
randomize_block <- function(data, block=NULL, block_name=NULL, urns, p, seed=9782) {
set.seed(seed)
if(!is.null(block)) {
condition <- paste0(block_name,"==",block)
df <- data %>% filter_(condition)
} else df <- data
if(is.null(p)) p <- rep(1/urns, urns)
N <- nrow(df)
Np <- round(N*p,0)
if(sum(Np)!=N) Np[1] <- N - sum(Np[2:length(Np)])
Urns = rep(seq_along(p), Np)
Urns = sample(Urns)
df$urn <- Urns
return(df)
}
randomize <- function(data, block_name=NULL, urns, p, seed=9782) {
if(is.null(p)) p <- rep(1/urns, urns)
if(!is.null(block_name)){
blocks <- unique(data[,block_name])
df <- lapply(blocks, randomize_block,
data = data,
block_name=block_name,
urns = urns,
p = p,
seed=seed)
return(data.table::rbindlist(df))
}else {
df <- randomize_block(data = data,
urns = urns, p = p,
seed=seed)
}
}
test <- randomize(data = df, block_name = "group",
urns = 3, p = c(0.25, 0.5, 0.25),
seed=4222016)
我试图弄清楚是否可以使用dplyr
来执行此操作,实施该功能的替代解决方案非常受欢迎!
答案 1 :(得分:1)
My answer to your other question无法替换,如下所示:
block_rand <- as.tibble(randomizr::block_ra(blocks = df$group, conditions = c("urn_1","urn_2","urn_3")))
df2 <- as.tibble(bind_cols(df, block_rand))
df2 %>% janitor::tabyl(group, value)
df2 %>%
group_by(id) %>%
filter(n()>1) %>%
str()