找到多个组的最后一行

时间:2018-01-03 14:58:38

标签: r dplyr

| id | msgid | source | value |
|----|-------|--------|-------|
| 1  | 1     | B      | 0     |
| 1  | 2     | A      | 1     |
| 1  | 3     | B      | 0     |
| 2  | 1     | B      | 0     |
| 2  | 2     | A      | 0     |
| 2  | 3     | A      | 1     |
| 2  | 4     | B      | 0     |

在上面的代码段中,我想从其他列创建列valueid是一个对话,msgId是每个对话中的消息。

我希望识别来自source=A的最后一条消息的行号。

我试图解决它。但是,我只能识别对话中的最后一行。

last_values <- dat %>% group_by(id) %>% 
   slice(which.max(msgid)) %>%
   ungroup %>%
   mutate(value = cumsum(msgid))

dat$final_val <- 0    
dat[last_values$value,5] <- 1

4 个答案:

答案 0 :(得分:3)

我们可以创建列&#39;值&#39;由

dat %>% 
  group_by(id) %>% 
  mutate(value1 = as.integer(source == "A" & !duplicated(source == "A", fromLast = TRUE)))
# A tibble: 7 x 5
# Groups: id [2]
#     id msgid source value value1
#  <int> <int> <chr>  <int>  <int>
#1     1     1 B          0      0
#2     1     2 A          1      1
#3     1     3 B          0      0
#4     2     1 B          0      0
#5     2     2 A          0      0
#6     2     3 A          1      1
#7     2     4 B          0      0

答案 1 :(得分:2)

另一个dplyr解决方案:

library(dplyr)

# create data
df <- data.frame(
  id = c(1, 1, 1, 2, 2, 2, 2),
  msgid = c(1, 2, 3, 1, 2, 3, 4),
  source = c("B", "A", "B", "B", "A", "A", "B")
)

df <- df %>% 
  group_by(id, source) %>% # group by id and source
  mutate(value = as.integer(ifelse((row_number() == n()) & source == "A", 1, 0))) # write 1 if it's the last occurence of a group and the source is "A"

> df
# A tibble: 7 x 4
# Groups:   id, source [4]
     id msgid source value
  <dbl> <dbl> <fctr> <dbl>
1     1     1      B     0
2     1     2      A     1
3     1     3      B     0
4     2     1      B     0
5     2     2      A     0
6     2     3      A     1
7     2     4      B     0

答案 2 :(得分:1)

我提出了以下解决方案

library(tidyverse)

# first we create the dataframe as it wasn't supplied in the question
df <- tibble(
  id = c(1, 1, 1, 2, 2, 2, 2),
  msgid = c(1, 2, 3, 1, 2, 3, 4),
  source = c("B", "A", "B", "B", "A", "A", "B")
)

df %>% 
  # group by both id and source
  group_by(id, source) %>% 
  mutate(
    # create a new column 
    value = max(msgid) == msgid & source == "A",
    # convert the new column to integers
    value = as.integer(value)
  )

输出:

# A tibble: 7 x 4
# Groups:   id, source [4]
     id msgid source value
  <dbl> <dbl>  <chr> <int>
1     1     1      B     0
2     1     2      A     1
3     1     3      B     0
4     2     1      B     0
5     2     2      A     0
6     2     3      A     1
7     2     4      B     0

答案 3 :(得分:1)

我使用索引标记来查找A的最终位置,并检查该数字是否与行号匹配,以便将1分配给value

library(dplyr)

mydf <- data.frame(id = c(1, 1, 1, 2, 2, 2, 2),
                   msgid = c(1, 2, 3, 1, 2, 3, 4),
                   source = c("B", "A", "B", "B", "A", "A", "B"))

group_by(mydf, id) %>%
mutate(value = if_else(last(grep(source, pattern = "A")) == row_number(), 
                       1, 0)

     id msgid source value
  <dbl> <dbl> <fctr> <dbl>
1  1.00  1.00 B       0   
2  1.00  2.00 A       1.00
3  1.00  3.00 B       0   
4  2.00  1.00 B       0   
5  2.00  2.00 A       0   
6  2.00  3.00 A       1.00
7  2.00  4.00 B       0