我有一张这样的表格(1):
START;END;CATEGORY
20.05.2017 19:23:00;20.05.2017 19:27:00;A
20.05.2017 19:27:00;20.05.2017 19:32:00;B
20.05.2017 19:32:00;20.05.2017 19:38:00;A
和这样的表格(2):
TIMESTAMP;VALUES
20.05.2017 19:24:09;323
20.05.2017 19:23:12;2322
20.05.2017 19:27:55;23333
20.05.2017 19:36:12;123123
现在我想加入表1到表2中的类别。关键是timstamps。如果表2中的TIMESTAMP位于table1的START和END之间,则添加类别。我基本上想要一张这样的表:
TIMESTAMP;VALUES;CATEGORY
20.05.2017 19:24:09;323;A
20.05.2017 19:23:12;2322;A
20.05.2017 19:27:55;23333;B
20.05.2017 19:36:12;123123;B
这些是我的尝试,但它们并不高效:
I)
for(j in seq(dim(table1)[1])){
for(i in seq(dim(table2)[1])){
table2[table2$TIMESTAMP[i]>=table1$START[j] & table2$TIMESTAMP[i]<=table1$END[j]] <- table1$CATEGORY[j]
}
II)
mapped_df <- data.frame()
for(i in seq(dim(table1)[1])){
start <- as.POSIXct(table1$START[i])
end <- as.POSIXct(table1$END[i])
cat <- table1$CATEGORY[i]
mapped_df <- rbind(mapped_df, data.frame(TIMESTAMP=seq(from=start, by=1, to=end), CATEGORY=cat))
}
merge(table2 , mapped_df)
提前致谢!
答案 0 :(得分:3)
我倾向于使用SQL来执行此操作。 sqldf
包非常方便。
Table1 <-
structure(
list(START = structure(c(1495322580, 1495322820, 1495323120),
class = c("POSIXct", "POSIXt"),
tzone = ""),
END = structure(c(1495322820, 1495323120, 1495323480),
class = c("POSIXct", "POSIXt"),
tzone = ""),
CATEGORY = c("A", "B", "A")),
class = "data.frame",
.Names = c("START", "END", "CATEGORY"),
row.names = c(NA, -3L)
)
Table2 <-
structure(
list(TIMESTAMP = structure(c(1495322649, 1495322592, 1495322875, 1495323372),
class = c("POSIXct", "POSIXt"),
tzone = ""),
VALUES = c(323L, 2322L, 23333L, 123123L)),
class = "data.frame",
.Names = c("TIMESTAMP", "VALUES"),
row.names = c(NA, -4L))
library(sqldf)
sqldf("SELECT T2.TIMESTAMP, T2.[VALUES], T1.CATEGORY
FROM Table2 T2
LEFT JOIN Table1 T1
ON T2.TIMESTAMP > T1.START AND T2.TIMESTAMP < T1.END")