复制重复观测值的数量并生成一个新ID,该ID根据计数值唯一地标识观测值

时间:2020-06-03 09:32:18

标签: r dplyr count tidyr unnest

我正在尝试复制观测值(ID)的簇,并生成一个新变量来标识 唯一地群集(new_ID)。例如,考虑数据帧df1

df1 <- data.frame(ID=c("1", "1", "1", "2", "2", "3"), sex=c("M", "M", "M", "F", "F", "M"),count=c(4,4,4,3,3,2))
df1
#>   ID sex count
#> 1  1   M     4
#> 2  1   M     4
#> 3  1   M     4
#> 4  2   F     3
#> 5  2   F     3
#> 6  3   M     2
df2 <- data.frame(
 ID=c("1","1","1","1","1","1","1","1","1","1","1","1","2","2","2","2","2","2","3","3"),
 new_ID = c("1","1","1","2","2","2","3","3","3","4","4","4","5","5","6","6","7","7", "8","9"),
 sex=c("M","M","M","M","M","M","M","M","M","M","M","M", "F", "F", "F", "F","F", "F","M","M"),
 count=c(4,4,4,4,4,4,4,4,4,4,4,4,3,3,3,3,3,3,2,2))
df2
#>    ID new_ID sex count
#> 1   1      1   M     4
#> 2   1      1   M     4
#> 3   1      1   M     4
#> 4   1      2   M     4
#> 5   1      2   M     4
#> 6   1      2   M     4
#> 7   1      3   M     4
#> 8   1      3   M     4
#> 9   1      3   M     4
#> 10  1      4   M     4
#> 11  1      4   M     4
#> 12  1      4   M     4
#> 13  2      5   F     3
#> 14  2      5   F     3
#> 15  2      6   F     3
#> 16  2      6   F     3
#> 17  2      7   F     3
#> 18  2      7   F     3
#> 19  3      8   M     2
#> 20  3      9   M     2

谢谢您的帮助。

1 个答案:

答案 0 :(得分:0)

如果我正确理解,

library(dplyr)

df1 %>%
  tidyr::uncount(count, .remove  = FALSE) %>%
  group_by(ID) %>%
  mutate(new_ID = rep(seq_len(first(count)), each = n()/first(count))) %>%
  ungroup() %>%
  mutate(new_ID = data.table::rleid(new_ID))


# A tibble: 20 x 4
#   ID    sex   count new_ID
#   <chr> <chr> <dbl>  <int>
# 1 1     M         4      1
# 2 1     M         4      1
# 3 1     M         4      1
# 4 1     M         4      2
# 5 1     M         4      2
# 6 1     M         4      2
# 7 1     M         4      3
# 8 1     M         4      3
# 9 1     M         4      3
#10 1     M         4      4
#11 1     M         4      4
#12 1     M         4      4
#13 2     F         3      5
#14 2     F         3      5
#15 2     F         3      6
#16 2     F         3      6
#17 2     F         3      7
#18 2     F         3      7
#19 3     M         2      8
#20 3     M         2      9