分组数据中的记录编号

时间:2018-06-24 06:41:20

标签: r dplyr

当满足特定条件时,我需要标记记录 例如在sw1=='q'type=='good'及其record_1record_1中需要重复的分组数据中,直到下一个命中sw1=='q'type=='good'为止

这就是我的意思

df <- data.frame(gr = rep(letters[1:2],each=8), 
             sw1=rep(c(letters[17:20],sw2=letters[c(17,21,17,23)]),2),

             type =c(c("good", "bad",'Visky','Wine',"good", "good","Visky","bad"),c("good","Wine","Visky","bad","good","Visky","good","bad")))

    gr sw1  type
1   a   q  good   #record 1
2   a   r   bad
3   a   s Visky
4   a   t  Wine
5   a   q  good   #record 2
6   a   u  good
7   a   q Visky
8   a   w   bad
9   b   q  good   #record 1
10  b   r  Wine
11  b   s Visky
12  b   t   bad
13  b   q  good   #record 2
14  b   u Visky
15  b   q  good   #record 3
16  b   w   bad

所以我尝试关注

library(dplyr)


df%>%
  group_by(gr)%>%
  mutate(label=cummax(type=="good"&sw1=="q"))

输出

# A tibble: 16 x 4
# Groups:   gr [2]
   gr    sw1   type  label
   <fct> <fct> <fct> <int>
 1 a     q     good      1
 2 a     r     bad       1
 3 a     s     Visky     1
 4 a     t     Wine      1
 5 a     q     good      1
 6 a     u     good      1
 7 a     q     Visky     1
 8 a     w     bad       1
 9 b     q     good      1
10 b     r     Wine      1
11 b     s     Visky     1
12 b     t     bad       1
13 b     q     good      1
14 b     u     Visky     1
15 b     q     good      1
16 b     w     bad       1

但预期输出是

   gr    sw1   type  label   record
 1 a     q     good      1   record_1
 2 a     r     bad       1   record_1 
 3 a     s     Visky     1   record_1
 4 a     t     Wine      1   record_1
 5 a     q     good      2   record_2 
 6 a     u     good      2   record_2
 7 a     q     Visky     2   record_2
 8 a     w     bad       2   record_2
 9 b     q     good      1   record_1
10 b     r     Wine      1   record_1
11 b     s     Visky     1   record_1
12 b     t     bad       1   record_1
13 b     q     good      2   record_2
14 b     u     Visky     2   record_2
15 b     q     good      3   record_3
16 b     w     bad       3   record_3

忘了提起我也尝试过dense_rank,但是输出甚至没有接近预期的输出。

2 个答案:

答案 0 :(得分:1)

您可以尝试:

library(dplyr)


df%>%
  group_by(gr)%>%
  mutate(label=cumsum(type=="good"&sw1=="q"))

答案 1 :(得分:1)

您应该像下面这样使用cumsum

library(tidyverse)

df%>%
    group_by(gr)%>%
    mutate(label=cumsum(type=="good"&sw1=="q")+0L,
           record = paste0("record_",label))