我对 R 比较陌生,我正在尝试在多个数据帧之间进行查找。 我有 df1 显示我的车何时进入/退出我们的车队。 df2 显示正在维修的时间。 我想创建 df3,它显示一组特定的汽车(每个 colheaders)它们是否在给定的时间步长(是/否或 1/0 结果)可用。
示例数据如下,df1 和 df2 中会有许多汽车,但并非所有这些都在 df3 中。
我试过在 between 和 foverlaps 之间做,但没有成功,我开始怀疑这是否真的可以在 R 中完成,或者我是否需要在 excel 中恢复到很多 if 语句。
df1
CarID Entry Exit
Car1 1 100
Car2 5 95
等
df2(显示服务时间表)
CarID ServiceType Start End
Car1 TypeA 10 20
Car1 TypeA 30 40
Car1 TypeB 45 46
Car2 TypeA 20 30
等
df3(想要创建/填充这个表)
Date Car1 Car2
1
2
答案 0 :(得分:3)
这是另一种选择:
library(data.table)
setDT(df1)
setDT(df2)
df3 <- df1[, .(Date = seq(Entry, Exit)), by = CarID]
df3[df2, avail := ServiceType, on = .(CarID, Date >= Start, Date <= End)
][, avail := is.na(avail) ][]
# CarID Date avail
# <char> <int> <lgcl>
# 1: Car1 1 TRUE
# 2: Car1 2 TRUE
# 3: Car1 3 TRUE
# 4: Car1 4 TRUE
# 5: Car1 5 TRUE
# 6: Car1 6 TRUE
# 7: Car1 7 TRUE
# 8: Car1 8 TRUE
# 9: Car1 9 TRUE
# 10: Car1 10 FALSE
# ---
# 182: Car2 86 TRUE
# 183: Car2 87 TRUE
# 184: Car2 88 TRUE
# 185: Car2 89 TRUE
# 186: Car2 90 TRUE
# 187: Car2 91 TRUE
# 188: Car2 92 TRUE
# 189: Car2 93 TRUE
# 190: Car2 94 TRUE
# 191: Car2 95 TRUE
最后的加宽:
dcast(df3, Date ~ CarID, value.var = "avail")
# Date Car1 Car2
# 1 1 TRUE NA
# 2 2 TRUE NA
# 3 3 TRUE NA
# 4 4 TRUE NA
# 5 5 TRUE TRUE
# 6 6 TRUE TRUE
# 7 7 TRUE TRUE
# 8 8 TRUE TRUE
# 9 9 TRUE TRUE
# 10 10 FALSE TRUE
# 11 11 FALSE TRUE
### ...
如果您需要将那些 NA
改为 FALSE
(因为它们根本不可用),那么 dcast(..., fill = FALSE)
。
答案 1 :(得分:-1)
使用 data.table
:
library(data.table)
df1 <- df1[, .(Date = seq(Entry, Exit, by = 1), Available = "Yes"), by=.(CarID)]
df2 <- df2[, .(Date = seq(Start, End, by = 1), Available ="No"), by=.(CarID, ServiceType, Start)][, -c(2,3)]
df2 <- dcast(df2, Date~CarID, value.var = "Available")
df3 <- dcast(df1, Date~CarID, value.var = "Available")
df3 <- merge(df3, df2, by = "Date", all = T)
for (i in unique(df1$CarID)) {
df3[get(paste0(i, ".y")) == "No", paste0(i, ".x") := "No"]
df3[, eval(i) := get(paste0(i, ".x"))]
}
df3 <- df3[, -(2:as.numeric(2*length(unique(df1$CarID))+1))]
结果:
> dput(df3)
structure(list(Date = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92,
93, 94, 95, 96, 97, 98, 99, 100), Car1 = c("Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes",
"Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes"), Car2 = c(NA, NA, NA, NA, "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "No", "No", "No", "No", "No", "No",
"No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes",
"Yes", "Yes", "Yes", "Yes", "Yes", "Yes", NA, NA, NA, NA, NA)), sorted = "Date", class = c("data.table",
"data.frame"), row.names = c(NA, -100L), .internal.selfref = <pointer: 0x0000017d31e21ef0>)