基于开始和结束值(所有可能的值)的行的索引块

时间:2018-10-22 18:36:16

标签: r

我想创建一个索引列,该索引列会查找诸如“开始”和“结束”之类的某些关键字以分配唯一的ID,并具有一个计数器/循环以在映射下一个“开始”时分配序列中的下一个数字< / p>

library(data.table)

in.data <- data.table(colA=c("random", "random", "random", "random", "random", "start", "random", "random", 
                             "end", "start", "random", "end", "start", "end", "random", "random", "random",
                             "start","end","random","random", "start","start", "end", "end"))
in.data$wanted.column <- c(NA, NA, NA, NA, NA, 1, 1, 1,
                           1, 2, 2, 2, 3, 3, NA, NA, NA,
                           4, 4, NA, NA, 5, 6, 6, 6)

in.data$wanted.column2 = NA
IND <- min(which(in.data$colA %in% "start")):max(which(in.data$colA %in% "end"))
in.data$wanted.column2[IND] = cumsum(in.data$colA %in% "start")[IND]

in.data$wanted.column3 = NA
IND <- min(which(in.data$colA =="start")):max(which(in.data$colA == "end"))
in.data$wanted.column3[IND] = cumsum(in.data$colA == "start")[IND]

in.data

      colA wanted.column wanted.column2 wanted.column3
 1: random            NA             NA             NA
 2: random            NA             NA             NA
 3: random            NA             NA             NA
 4: random            NA             NA             NA
 5: random            NA             NA             NA
 6:  start             1              1              1
 7: random             1              1              1
 8: random             1              1              1
 9:    end             1              1              1
10:  start             2              2              2
11: random             2              2              2
12:    end             2              2              2
13:  start             3              3              3
14:    end             3              3              3
15: random            NA              3              3
16: random            NA              3              3
17: random            NA              3              3
18:  start             4              4              4
19:    end             4              4              4
20: random            NA              4              4
21: random            NA              4              4
22:  start             5              5              5
23:  start             6              6              6
24:    end             6              6              6
25:    end             6              6              6
      colA wanted.column wanted.column2 wanted.column3

0 个答案:

没有答案