我想创建一个索引列,该索引列会查找诸如“开始”和“结束”之类的某些关键字以分配唯一的ID,并具有一个计数器/循环以在映射下一个“开始”时分配序列中的下一个数字< / p>
library(data.table)
in.data <- data.table(colA=c("random", "random", "random", "random", "random", "start", "random", "random", "end", "start", "random", "end", "start", "end", "random"))
in.data$wanted.column <- c(NA, NA, NA, NA, NA, 1, 1, 1, 1, 2, 2, 2, 3, 3, NA)
in.data
colA wanted.column
1: random NA
2: random NA
3: random NA
4: random NA
5: random NA
6: start 1
7: random 1
8: random 1
9: end 1
10: start 2
11: random 2
12: end 2
13: start 3
14: end 3
15: random NA
答案 0 :(得分:0)
数据:
in.data <- data.table(colA=c("random", "random", "random", "random", "random", "start", "random", "random", "end", "start", "random", "end", "start", "end", "random"))
in.data$wanted.column <- c(NA, NA, NA, NA, NA, 1, 1, 1, 1, 2, 2, 2, 3, 3, NA)
代码:
in.data$wanted.column2 = NA
IND <- min(which(in.data$colA %in% "start")):max(which(in.data$colA %in% "end"))
in.data$wanted.column2[IND] = cumsum(in.data$colA %in% "start")[IND]
结果:
# colA wanted.column wanted.column2
# 1: random NA NA
# 2: random NA NA
# 3: random NA NA
# 4: random NA NA
# 5: random NA NA
# 6: start 1 1
# 7: random 1 1
# 8: random 1 1
# 9: end 1 1
#10: start 2 2
#11: random 2 2
#12: end 2 2
#13: start 3 3
#14: end 3 3
#15: random NA NA