我有以下几点:
library(tidyverse)
dput(test2)
structure(list(Cat2 = structure(c(3L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("A",
"B", "NADA"), class = "factor"), start = c(0, 94, 95, 98, 98,
98, 101, 104, 105, 106, 108, 108, 112, 112, 114, 114, 117, 119,
119, 120, 120, 123, 123, 125, 128, 128, 131, 131, 134, 134, 137,
137, 139, 141, 141, 153, 155, 156), end = c(94, 95, 98, 98, 98,
101, 104, 105, 106, 108, 108, 112, 112, 114, 114, 117, 119, 119,
120, 120, 123, 123, 125, 128, 128, 131, 131, 134, 134, 137, 137,
139, 141, 141, 153, 155, 156, 157)), .Names = c("Cat2", "start",
"end"), row.names = c(NA, -38L), class = c("tbl_df", "tbl", "data.frame"
))
如何转换此时间序列以便
预期输出
NADA 0 94
A 94 98
A,B 98 98
B 98 114
NADA 114 119
B 119 141
NADA 141 153 ###because of long B duration
B 153 157
答案 0 :(得分:0)
这是一种不太方便的方式:
ab <- c('A', 'B')
df %>%
mutate(
Cat2 = ifelse(end - start + 1 > 6, 'NADA', as.character(Cat2)), # change to NADA if > 6
Cat2 = ifelse(
start == lag(end) & Cat2 != lag(Cat2) & Cat2 %in% ab & lag(Cat2) %in% ab,
'A,B', Cat2), # change state to A,B if start and end of A and B overlap
g = cumsum(Cat2 != lag(Cat2, default = Cat2[1])) # group intervals to be merged
) %>%
group_by(g) %>%
summarize(
Cat2 = Cat2[1],
start = start[1],
end = end[n()]
) %>% select(-g) # remove grouping variable
# # A tibble: 8 × 3
# Cat2 start end
# <chr> <dbl> <dbl>
# 1 NADA 0 94
# 2 A 94 98
# 3 A,B 98 98
# 4 B 98 114
# 5 NADA 114 119
# 6 B 119 141
# 7 NADA 141 153
# 8 B 153 157