每组相同值的最长连续计数

时间:2018-12-07 14:54:05

标签: r group-by dplyr data.table plyr

我有一个如下所示的data.frame,我想添加一个变量,该变量描述在组中观察到的VALUE变量中最长连续计数为1(即VALUE中最长的连续行为1)每组)。

GROUP_ID    VALUE
    1         0
    1         1
    1         1
    1         1
    1         1
    1         0
    2         1
    2         1
    2         0
    2         1
    2         1
    2         1
    3         1
    3         0
    3         1
    3         0

所以输出看起来像这样:

GROUP_ID    VALUE    CONSECUTIVE
    1         0           4
    1         1           4
    1         1           4
    1         1           4
    1         1           4
    1         0           4
    2         1           3
    2         1           3
    2         0           3
    2         1           3
    2         1           3
    2         1           3
    3         1           1
    3         0           1
    3         1           1
    3         0           1

任何帮助将不胜感激!

3 个答案:

答案 0 :(得分:5)

使用

library(dplyr)
dat %>% 
  group_by(GROUP_ID) %>% 
  mutate(CONSECUTIVE = {rl <- rle(VALUE); max(rl$lengths[rl$values == 1])})

给出:

# A tibble: 16 x 3
# Groups:   GROUP_ID [3]
   GROUP_ID VALUE CONSECUTIVE
      <int> <int>       <int>
 1        1     0           4
 2        1     1           4
 3        1     1           4
 4        1     1           4
 5        1     1           4
 6        1     0           4
 7        2     1           3
 8        2     1           3
 9        2     0           3
10        2     1           3
11        2     1           3
12        2     1           3
13        3     1           1
14        3     0           1
15        3     1           1
16        3     0           1

或使用

library(data.table)
setDT(dat)            # convert to a 'data.table'

dat[, CONSECUTIVE := {rl <- rle(VALUE); max(rl$lengths[rl$values == 1])}
    , by = GROUP_ID][]

答案 1 :(得分:4)

我们可以将averle一起使用,以使每个组连续出现1的次数最多。 (GROUP_ID

df$Consecutive <- ave(df$VALUE, df$GROUP_ID, FUN = function(x) {
      y <- rle(x == 1)
      max(y$lengths[y$values])
})

df
#   GROUP_ID VALUE Consecutive
#1         1     0           4
#2         1     1           4
#3         1     1           4
#4         1     1           4
#5         1     1           4
#6         1     0           4
#7         2     1           3
#8         2     1           3
#9         2     0           3
#10        2     1           3
#11        2     1           3
#12        2     1           3
#13        3     1           1
#14        3     0           1
#15        3     1           1
#16        3     0           1

答案 2 :(得分:0)

这里是data.table

的另一个选项
library(data.table)
library(dplyr)
setDT(df1)[, CONSECUTIVE := max(table(na_if(rleid(VALUE)*VALUE, 0))), .(GROUP_ID)]
df1
#    GROUP_ID VALUE  CONSECUTIVE
# 1:        1     0            4
# 2:        1     1            4
# 3:        1     1            4
# 4:        1     1            4
# 5:        1     1            4
# 6:        1     0            4
# 7:        2     1            3
# 8:        2     1            3
# 9:        2     0            3
#10:        2     1            3
#11:        2     1            3
#12:        2     1            3
#13:        3     1            1
#14:        3     0            1
#15:        3     1            1
#16:        3     0            1

数据

df1 <- structure(list(GROUP_ID = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
 2L, 2L, 2L, 3L, 3L, 3L, 3L), VALUE = c(0L, 1L, 1L, 1L, 1L, 0L, 
1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L)), class = "data.frame", row.names = c(NA, 
 -16L))