无法弄清楚这个……我在R中有一个像这样的数据集:
x <- c("A", "B", "C", "A", "A", "A", "C", "B", "A", "A", "B", "B", "A", "A", "C", "B","A")
z <- c(1, 1, 1, 1,2, 2, 2, 2, 2, 2, 2,3, 3, 3, 3, 3,3)
y <- c(43, 32, 12, 32, 22, 42, 45, 13, 15, 61, 71, 13, 32, 24, 11, 15,33)
df <- data.frame(x,z,y)
像这样:
x z y
A 1 43
B 1 32
C 1 12
A 1 32
A 2 22
A 2 42
C 2 45
B 2 13
A 2 15
A 2 61
B 2 71
B 3 13
A 3 32
A 3 24
C 3 11
B 3 15
A 3 33
现在,如果A的第一个出现在z的每个值内都是连续的,我想标记它们。
理想情况下,最终结果应如下所示:
x z y tag
A 1 43 1
B 1 32 0
C 1 12 0
A 1 32 0
A 2 22 1
A 2 42 1
C 2 45 0
B 2 13 0
A 2 15 0
A 2 61 0
B 2 71 0
B 3 13 0
A 3 32 1
A 3 24 1
C 3 11 0
B 3 15 0
A 3 33 0
我也很高兴提出有关如何改写此问题的标题以使其更直接的建议。 谢谢!
答案 0 :(得分:1)
x <- c("A", "B", "C", "A", "A", "A", "C", "B", "A", "A", "B", "B", "A", "A", "C", "B","A")
z <- c(1, 1, 1, 1,2, 2, 2, 2, 2, 2, 2,3, 3, 3, 3, 3,3)
y <- c(43, 32, 12, 32, 22, 42, 45, 13, 15, 61, 71, 13, 32, 24, 11, 15,33)
df <- data.frame(x,z,y)
library(dplyr)
library(data.table)
df %>%
group_by(z) %>% # for each z
mutate(id = rleid(x), # create group based on consecutive values
tag = cumsum(x == "A")) %>% # create group based on consecutive As
group_by(z, id) %>% # for each z and id
mutate(tag = ifelse(x == "A" & min(tag) == 1, min(tag), 0)) %>% # tag cases where we have the 1st A or As
ungroup() %>% # forget the grouping
select(-id) # remove id column
# # A tibble: 17 x 4
# x z y tag
# <fct> <dbl> <dbl> <dbl>
# 1 A 1 43 1
# 2 B 1 32 0
# 3 C 1 12 0
# 4 A 1 32 0
# 5 A 2 22 1
# 6 A 2 42 1
# 7 C 2 45 0
# 8 B 2 13 0
# 9 A 2 15 0
#10 A 2 61 0
#11 B 2 71 0
#12 B 3 13 0
#13 A 3 32 1
#14 A 3 24 1
#15 C 3 11 0
#16 B 3 15 0
#17 A 3 33 0
答案 1 :(得分:0)
基本解决方案
x <- c("A", "B", "C", "A", "A", "A", "C", "B", "A", "A", "B", "B", "A", "A", "C", "B","A")
z <- c(1, 1, 1, 1,2, 2, 2, 2, 2, 2, 2,3, 3, 3, 3, 3,3)
y <- c(43, 32, 12, 32, 22, 42, 45, 13, 15, 61, 71, 13, 32, 24, 11, 15,33)
df <- data.frame(x,z,y)
within(df, {
tag <- +(ave(as.character(df$x), df$z, FUN = function(xx) {
r <- rle(xx %in% 'A')
r$values[cumsum(r$values) > 1] <- FALSE
inverse.rle(r)
}) %in% 'TRUE')
})
# x z y tag
# 1 A 1 43 1
# 2 B 1 32 0
# 3 C 1 12 0
# 4 A 1 32 0
# 5 A 2 22 1
# 6 A 2 42 1
# 7 C 2 45 0
# 8 B 2 13 0
# 9 A 2 15 0
# 10 A 2 61 0
# 11 B 2 71 0
# 12 B 3 13 0
# 13 A 3 32 1
# 14 A 3 24 1
# 15 C 3 11 0
# 16 B 3 15 0
# 17 A 3 33 0