在R

时间:2019-05-07 19:35:49

标签: r

我目前正在使用相当大的二分数据集(50个问题),并且遇到了一个难题,试图拼凑一个R脚本,该R脚本将在连续四次回答不正确后停止对响应进行求和

我在想可能需要一个循环,但不太确定从哪里开始。

我附上了一个示例工作簿,以(希望)阐明问题/解决方案。

Sample Workbook with Discontinue/Ceiling Rule

这是我的数据帧的输出:

structure(list(ID = 1:4, var1 = c(1L, 1L, 1L, 1L), var2 = c(1L, 
1L, 0L, 0L), var3 = c(1L, 0L, 1L, 0L), var4 = c(1L, 0L, 1L, 0L
), var5 = c(1L, 0L, 0L, 0L), var6 = c(1L, 1L, 0L, 0L), var7 = c(1L, 
1L, 0L, 0L), var8 = c(1L, 0L, 0L, 0L), var9 = c(1L, 1L, 0L, 0L
), var10 = c(1L, 1L, 0L, 0L), var11 = c(1L, 1L, 0L, 0L), var12 = c(1L, 
0L, 1L, 0L), var13 = c(1L, 1L, 0L, 0L), var14 = c(1L, 1L, 0L, 
0L), var15 = c(1L, 0L, 0L, 0L), var16 = c(1L, 0L, 0L, 0L), var17 = c(1L, 
1L, 1L, 1L), var18 = c(1L, 1L, 1L, 0L), var19 = c(1L, 0L, 0L, 
0L), var20 = c(1L, 0L, 1L, 1L), var21 = c(1L, 0L, 0L, 0L), var22 = c(1L, 
0L, 0L, 0L), var23 = c(1L, 0L, 1L, 0L), var24 = c(1L, 0L, 0L, 
1L), var25 = c(1L, 1L, 0L, 0L), var26 = c(1L, 0L, 0L, 0L), var27 = c(1L, 
1L, 1L, 1L), var28 = c(1L, 0L, 0L, 0L), var29 = c(1L, 0L, 0L, 
1L), var30 = c(1L, 0L, 0L, 0L), var31 = c(1L, 0L, 0L, 0L), var32 = c(1L, 
1L, 0L, 1L), var33 = c(1L, 0L, 0L, 0L), var34 = c(1L, 1L, 0L, 
1L), var35 = c(1L, 0L, 0L, 1L), var36 = c(1L, 0L, 0L, 0L), var37 = c(1L, 
0L, 0L, 1L), var38 = c(1L, 0L, 0L, 0L), var39 = c(1L, 1L, 0L, 
0L), var40 = c(1L, 1L, 1L, 1L), var41 = c(1L, 0L, 0L, 1L), var42 = c(1L, 
0L, 0L, 0L), var43 = c(1L, 1L, 1L, 0L), var44 = c(1L, 0L, 0L, 
0L), var45 = c(1L, 0L, 0L, 0L), var46 = c(1L, 0L, 0L, 0L), var47 = c(1L, 
0L, 0L, 0L), var48 = c(1L, 0L, 0L, 1L), var49 = c(1L, 0L, 0L, 
0L), var50 = c(0L, 0L, 0L, 1L)), row.names = c(NA, 4L), class = 
"data.frame")

1 个答案:

答案 0 :(得分:1)

这是一种应该为您工作的丑陋方式(如果我正确理解了您的问题/数据)。如果您正在寻找它,我将在今晚晚些时候清理并注释它。

rle_list <- lapply(df1[-1], rle)

four_plus_idx <- sapply(1:ncol(df1[-1]), function(x) {

  sum(rle_list[[x]]$lengths[1:which(rle_list[[x]]$lengths >= 4 & rle_list[[x]]$values == 0)[1]])

  }
)

valid_sums <- Map(`[`, df1[-1], Map(`:`, 1, four_plus_idx))

sapply(valid_sums, sum)
 var1  var2  var3  var4  var5  var6  var7  var8  var9 var10 var11 var12 var13 var14 var15 var16 var17 var18 var19 var20 var21 var22 var23 var24 
   13    11     0    24    14    89     3     9    13    51    11     0     4    12    11    20     7     6     9   124    26    16     8     3 
var25 var26 var27 var28 var29 var30 var31 var32 var33 var34 var35 var36 var37 var38 var39 var40 var41 var42 var43 var44 var45 var46 var47 var48 
    7    55     3    21    32    14     9     2    54     9    51    16    45     3     8     9     0    28    35     3     7    25    20    17 
var49 var50 
   11     4 

样本数据

set.seed(1)

df1 <- data.frame(id = 1:1e4)
df1[paste0("var", 1:50)] <- replicate(50, sample(c(0,1), 1e4, replace = T))