在R中加入时间间隔的有效方法

时间:2017-06-15 17:33:04

标签: r

我有一张这样的表格(1):

START;END;CATEGORY
20.05.2017 19:23:00;20.05.2017 19:27:00;A
20.05.2017 19:27:00;20.05.2017 19:32:00;B
20.05.2017 19:32:00;20.05.2017 19:38:00;A

和这样的表格(2):

TIMESTAMP;VALUES
20.05.2017 19:24:09;323
20.05.2017 19:23:12;2322
20.05.2017 19:27:55;23333
20.05.2017 19:36:12;123123

现在我想加入表1到表2中的类别。关键是timstamps。如果表2中的TIMESTAMP位于table1的START和END之间,则添加类别。我基本上想要一张这样的表:

TIMESTAMP;VALUES;CATEGORY
20.05.2017 19:24:09;323;A
20.05.2017 19:23:12;2322;A
20.05.2017 19:27:55;23333;B
20.05.2017 19:36:12;123123;B

这些是我的尝试,但它们并不高效:

I)

for(j in seq(dim(table1)[1])){
  for(i in seq(dim(table2)[1])){
    table2[table2$TIMESTAMP[i]>=table1$START[j] & table2$TIMESTAMP[i]<=table1$END[j]] <- table1$CATEGORY[j]
  }

II)

mapped_df <- data.frame()
for(i in seq(dim(table1)[1])){
  start <- as.POSIXct(table1$START[i])
  end   <- as.POSIXct(table1$END[i])
  cat <- table1$CATEGORY[i]
  mapped_df <- rbind(mapped_df, data.frame(TIMESTAMP=seq(from=start, by=1, to=end), CATEGORY=cat))
}

merge(table2 , mapped_df)

提前致谢!

1 个答案:

答案 0 :(得分:3)

我倾向于使用SQL来执行此操作。 sqldf包非常方便。

Table1 <- 
  structure(
    list(START = structure(c(1495322580, 1495322820, 1495323120), 
                           class = c("POSIXct", "POSIXt"), 
                           tzone = ""), 
         END = structure(c(1495322820, 1495323120, 1495323480), 
                         class = c("POSIXct", "POSIXt"), 
                         tzone = ""), 
         CATEGORY = c("A", "B", "A")), 
    class = "data.frame", 
    .Names = c("START", "END", "CATEGORY"), 
    row.names = c(NA, -3L)
  ) 

Table2 <- 
  structure(
    list(TIMESTAMP = structure(c(1495322649, 1495322592, 1495322875, 1495323372), 
                               class = c("POSIXct", "POSIXt"), 
                               tzone = ""), 
     VALUES = c(323L, 2322L, 23333L, 123123L)), 
     class = "data.frame", 
     .Names = c("TIMESTAMP", "VALUES"), 
     row.names = c(NA, -4L))

library(sqldf)

sqldf("SELECT T2.TIMESTAMP, T2.[VALUES], T1.CATEGORY
       FROM Table2 T2
          LEFT JOIN Table1 T1
            ON T2.TIMESTAMP > T1.START AND T2.TIMESTAMP < T1.END")