我目前正在使用相当大的二分数据集(50个问题),并且遇到了一个难题,试图拼凑一个R脚本,该R脚本将在连续四次回答不正确后停止对响应进行求和
我在想可能需要一个循环,但不太确定从哪里开始。
我附上了一个示例工作簿,以(希望)阐明问题/解决方案。
Sample Workbook with Discontinue/Ceiling Rule
这是我的数据帧的输出:
structure(list(ID = 1:4, var1 = c(1L, 1L, 1L, 1L), var2 = c(1L,
1L, 0L, 0L), var3 = c(1L, 0L, 1L, 0L), var4 = c(1L, 0L, 1L, 0L
), var5 = c(1L, 0L, 0L, 0L), var6 = c(1L, 1L, 0L, 0L), var7 = c(1L,
1L, 0L, 0L), var8 = c(1L, 0L, 0L, 0L), var9 = c(1L, 1L, 0L, 0L
), var10 = c(1L, 1L, 0L, 0L), var11 = c(1L, 1L, 0L, 0L), var12 = c(1L,
0L, 1L, 0L), var13 = c(1L, 1L, 0L, 0L), var14 = c(1L, 1L, 0L,
0L), var15 = c(1L, 0L, 0L, 0L), var16 = c(1L, 0L, 0L, 0L), var17 = c(1L,
1L, 1L, 1L), var18 = c(1L, 1L, 1L, 0L), var19 = c(1L, 0L, 0L,
0L), var20 = c(1L, 0L, 1L, 1L), var21 = c(1L, 0L, 0L, 0L), var22 = c(1L,
0L, 0L, 0L), var23 = c(1L, 0L, 1L, 0L), var24 = c(1L, 0L, 0L,
1L), var25 = c(1L, 1L, 0L, 0L), var26 = c(1L, 0L, 0L, 0L), var27 = c(1L,
1L, 1L, 1L), var28 = c(1L, 0L, 0L, 0L), var29 = c(1L, 0L, 0L,
1L), var30 = c(1L, 0L, 0L, 0L), var31 = c(1L, 0L, 0L, 0L), var32 = c(1L,
1L, 0L, 1L), var33 = c(1L, 0L, 0L, 0L), var34 = c(1L, 1L, 0L,
1L), var35 = c(1L, 0L, 0L, 1L), var36 = c(1L, 0L, 0L, 0L), var37 = c(1L,
0L, 0L, 1L), var38 = c(1L, 0L, 0L, 0L), var39 = c(1L, 1L, 0L,
0L), var40 = c(1L, 1L, 1L, 1L), var41 = c(1L, 0L, 0L, 1L), var42 = c(1L,
0L, 0L, 0L), var43 = c(1L, 1L, 1L, 0L), var44 = c(1L, 0L, 0L,
0L), var45 = c(1L, 0L, 0L, 0L), var46 = c(1L, 0L, 0L, 0L), var47 = c(1L,
0L, 0L, 0L), var48 = c(1L, 0L, 0L, 1L), var49 = c(1L, 0L, 0L,
0L), var50 = c(0L, 0L, 0L, 1L)), row.names = c(NA, 4L), class =
"data.frame")
答案 0 :(得分:1)
这是一种应该为您工作的丑陋方式(如果我正确理解了您的问题/数据)。如果您正在寻找它,我将在今晚晚些时候清理并注释它。
rle_list <- lapply(df1[-1], rle)
four_plus_idx <- sapply(1:ncol(df1[-1]), function(x) {
sum(rle_list[[x]]$lengths[1:which(rle_list[[x]]$lengths >= 4 & rle_list[[x]]$values == 0)[1]])
}
)
valid_sums <- Map(`[`, df1[-1], Map(`:`, 1, four_plus_idx))
sapply(valid_sums, sum)
var1 var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13 var14 var15 var16 var17 var18 var19 var20 var21 var22 var23 var24
13 11 0 24 14 89 3 9 13 51 11 0 4 12 11 20 7 6 9 124 26 16 8 3
var25 var26 var27 var28 var29 var30 var31 var32 var33 var34 var35 var36 var37 var38 var39 var40 var41 var42 var43 var44 var45 var46 var47 var48
7 55 3 21 32 14 9 2 54 9 51 16 45 3 8 9 0 28 35 3 7 25 20 17
var49 var50
11 4
样本数据:
set.seed(1)
df1 <- data.frame(id = 1:1e4)
df1[paste0("var", 1:50)] <- replicate(50, sample(c(0,1), 1e4, replace = T))