Question

我有一个长格式的数据框，看起来像这样：

我想做的是（1）将条件1的试验随机分为4组，然后计算每个ID的Y平均值，（2）对条件2的试验执行相同的过程。这是新输出的样子：

是否有使用tidyverse的简洁方法？我才刚刚起步，很难过！

Answer 1

假设您的数据框不是那么小，这应该可以满足您的需求。

set.seed(2)
df <- tibble(ID = c(rep(1,50),rep(2,50)),
       Condition = rep(c(1,1,2,2,2,1,1,2,2,2,1,1,1,2,2,1,1,1,2,2),5),
       Y = rnorm(100,100,20)) ##Creates some fake data



result_df <- df %>% mutate(randomizer = sapply(1:length(row_number()), function(i)sample(c(1,2,3,4),1))) %>%
  group_by(Condition) %>%
  group_by(ID, Condition, randomizer) %>%
  summarize(mean_y = mean(Y, na.rm = TRUE)) %>%
  mutate(condition_status = paste0("Condition",Condition,"M",randomizer)) %>%
  ungroup() %>% 
  dplyr::select(-Condition, -randomizer) %>%
  spread(condition_status, mean_y)


result_df

Answer 2

这不是最好的方法，但是确实有效

x <- data.frame(c(rep(1,10), rep(2,10)), c(1,1,2,2,2,1,1,2,2,2,1,1,1,2,2,1,1,1,2,2), c(100,200,80,58,89,100,200,80,58,89,95,72,99,120,130,95,72,99,120,130))
colnames(x) <- c("ID", "Condition", "Y")
id <- unique(x[,1])
con <- unique(x[,2])
#multiple by the number of groups to split into
y <- data.frame(matrix(, ncol = (length(id) * 4) + 1, nrow = length(id)))
y[,1] <- id
colnames(y) <- c("ID", "Condition1M1", "Condition1M2", "Condition1M3", "Condition1M4", "Condition2M1", "Condition2M2", "Condition2M3", "Condition2M4")
x1 <- x[which(x[,2] == con[1]),]
x2 <- x[which(x[,2] == con[2]),]
#specify sample size
n <- 5
x11 <- x1[sample(nrow(x1), n, replace = FALSE),]
x12 <- x1[sample(nrow(x1), n, replace = FALSE),]
x13 <- x1[sample(nrow(x1), n, replace = FALSE),]
x14 <- x1[sample(nrow(x1), n, replace = FALSE),]
x21 <- x2[sample(nrow(x2), n, replace = FALSE),]
x22 <- x2[sample(nrow(x2), n, replace = FALSE),]
x23 <- x2[sample(nrow(x2), n, replace = FALSE),]
x24 <- x2[sample(nrow(x2), n, replace = FALSE),]

y[1,2] <- mean(x11[which(x11[,1] == id[1]),3])
y[1,3] <- mean(x12[which(x12[,1] == id[1]),3])
y[1,4] <- mean(x13[which(x13[,1] == id[1]),3])
y[1,5] <- mean(x14[which(x14[,1] == id[1]),3])
y[2,2] <- mean(x21[which(x21[,1] == id[2]),3])
y[2,3] <- mean(x22[which(x22[,1] == id[2]),3])
y[2,4] <- mean(x23[which(x23[,1] == id[2]),3])
y[2,5] <- mean(x24[which(x24[,1] == id[2]),3])
y[1,6] <- mean(x11[which(x11[,1] == id[1]),3])
y[1,7] <- mean(x12[which(x12[,1] == id[1]),3])
y[1,8] <- mean(x13[which(x13[,1] == id[1]),3])
y[1,9] <- mean(x14[which(x14[,1] == id[1]),3])
y[2,6] <- mean(x21[which(x21[,1] == id[2]),3])
y[2,7] <- mean(x22[which(x22[,1] == id[2]),3])
y[2,8] <- mean(x23[which(x23[,1] == id[2]),3])
y[2,9] <- mean(x24[which(x24[,1] == id[2]),3])

结果：

  ID Condition1M1 Condition1M2 Condition1M3 Condition1M4 Condition2M1 Condition2M2 Condition2M3 Condition2M4
1  1     133.3333          100          150     133.3333     133.3333          100          150     133.3333
2  2     125.0000          125          125     120.0000     125.0000          125          125     120.0000

使用tidyverse从条件中随机获取平均值

2 个答案: