我有一个名为dayfile
的数据框,其中包含以下变量:
Period SubCode EchoTime PosX PosY PosZ
1 8753 13 2016-06-19 00:06:27 16.81 39.21 12.66
2 8753 13 2016-06-19 00:06:35 16.67 39.08 12.66
3 8753 13 2016-06-19 00:06:36 16.33 39.60 13.03
4 8753 13 2016-06-19 00:06:45 17.14 38.14 12.23
5 8753 13 2016-06-19 00:06:53 16.95 38.21 12.38
6 8753 13 2016-06-19 00:06:53 17.44 37.67 11.95
我还有另一个名为probe.DOT1
的数据框,如下所示:
DO.time.1m DO.1m Temp.1m
1 2016-06-18 10:24:50 7.69 18.04
2 2016-06-18 11:24:50 7.54 19.12
3 2016-06-18 11:54:50 7.57 18.98
4 2016-06-18 12:24:50 9.51 19.88
5 2016-06-18 12:54:50 9.30 11.62
6 2016-06-18 13:24:50 8.81 11.54
我想在dayfile
中O2
创建一个名为probe.DOT1$DO.1m
的新列,其中dayfile$EchoTime
位于两个连续值probe.DOT1$Do.time.1m
之间的时间范围内,即它应该看起来像这样:
Period SubCode EchoTime PosX PosY PosZ O2
1 8753 13 2016-06-19 00:06:27 16.81 39.21 12.66 7.54
2 8753 13 2016-06-19 00:06:35 16.67 39.08 12.66 7.54
3 8753 13 2016-06-19 00:06:36 16.33 39.60 13.03 7.59
4 8753 13 2016-06-19 00:06:45 17.14 38.14 12.23 7.59
5 8753 13 2016-06-19 00:06:53 16.95 38.21 12.38 7.59
6 8753 13 2016-06-19 00:06:53 17.44 37.67 11.95 7.59
我试过这个:
dayfile$O2 <- probe.DOT1[dayfile$EchoTime < probe.DOT1$DO.time.1m &
dayfile$EchoTime > diff(probe.DOT1$DO.time.1m, lag = 1) , 'DO.1m']
但它给了我错误。有没有人有任何建议?
感谢。
修改的
这是一些dput,因此您可以重新创建我的文件的一小部分:
dayfile
:
structure(list(Period = c(7017, 7017, 7017, 7017, 7017, 7017,
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017,
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017,
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017,
7017, 7017), SubCode = c(20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
), EchoTime = structure(c(1466249003, 1466249010, 1466249017,
1466249025, 1466249032, 1466249039, 1466249046, 1466249053, 1466249060,
1466249067, 1466249074, 1466249081, 1466249088, 1466249095, 1466249102,
1466249109, 1466249116, 1466249123, 1466249130, 1466249137, 1466249144,
1466249151, 1466249158, 1466249165, 1466249172, 1466249179, 1466249186,
1466249193, 1466249200, 1466249207, 1466249214, 1466249221, 1466249228,
1466249235, 1466249242, 1466249249, 1466249256, 1466249263, 1466249270,
1466249277, 1466249284), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
PosX = c(14.69, 14.69, 14.69, 16.31, 14.94, 14.94, 14.85,
14.73, 16.64, 16.65, 16.7, 16.36, 16.38, 16.37, 16.41, 16.39,
16.12, 15.8, 16.33, 16.17, 16.32, 15.61, 15.5, 15.82, 16.68,
16.68, 16.68, 16.61, 16.68, 16.68, 11.39, 11.39, 16.68, 16.6,
16.6, 15.08, 15.08, 14.9, 14.81, 15.08, 15.15), PosY = c(36.98,
36.98, 36.98, 37.68, 36.9, 36.9, 36.96, 37.09, 37.19, 37.19,
37.01, 37.49, 37.47, 37.47, 37.45, 37.46, 37.75, 38.08, 38.06,
38.49, 38.06, 38.54, 38.39, 38.06, 37.16, 37.16, 37.16, 37.36,
37.16, 37.16, 40.09, 40.09, 37.16, 37.37, 37.37, 36.79, 36.79,
36.98, 36.94, 36.79, 36.73), PosZ = c(14.68, 14.68, 14.68,
15.67, 15.03, 15.03, 14.66, 14.88, 15.12, 15.22, 14.84, 15.28,
15.58, 15.48, 15.88, 15.68, 16.05, 15.91, 15.37, 15.64, 15.27,
16.1, 16.3, 16.33, 15.61, 15.61, 15.61, 15.8, 15.61, 15.61,
18.06, 18.06, 15.61, 15.7, 15.7, 15.13, 15.13, 15.3, 15.38,
15.13, 15.19)), .Names = c("Period", "SubCode", "EchoTime",
"PosX", "PosY", "PosZ"), row.names = c(213387L, 213389L, 213391L,
213393L, 213395L, 213397L, 213399L, 213401L, 213403L, 213405L,
213407L, 213409L, 213411L, 213413L, 213415L, 213417L, 213419L,
213421L, 213423L, 213425L, 213427L, 213429L, 213431L, 213433L,
213435L, 213437L, 213439L, 213441L, 213443L, 213445L, 213447L,
213449L, 213450L, 213452L, 213454L, 213456L, 213458L, 213460L,
213462L, 213464L, 213466L), class = "data.frame")
probe.DOT1
:
structure(list(DO.time.1m = structure(c(1466245490, 1466249090,
1466250890, 1466252690, 1466254490, 1466256290, 1466258090, 1466259890,
1466261690, 1466263490, 1466265290, 1466267090, 1466268890, 1466270690,
1466272490, 1466274290, 1466276090, 1466277890, 1466279690, 1466281490
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), DO.1m = c(7.69,
7.54, 7.57, 9.51, 9.3, 8.81, 8.71, 8.66, 8.19, 8.52, 8.79, 9.01,
9.19, 9.39, 9.79, 9.82, 9.48, 9.5, 9.29, 9.38), Temp.1m = c(18.04,
19.12, 18.98, 19.88, 11.62, 11.54, 12.24, 12.22, 12.32, 12.2,
12.18, 12.16, 12.14, 11.8, 11.62, 11.68, 11.52, 11.32, 11.34,
11.14)), .Names = c("DO.time.1m", "DO.1m", "Temp.1m"), row.names = c(NA,
20L), class = "data.frame")
答案 0 :(得分:1)
我会为此
使用双循环dayfile$O2 <- NA
for (i in 1:nrow(dayfile)) {
for (j in 2:nrow(probe.DOT1)) {
# between previous do time and before current do time
if (dayfile$EchoTime[i] > probe.DOT1$DO.time.1m[j - 1] & dayfile$EchoTime[i] < probe.DOT1$DO.time.1m[j]) {
dayfile$O2[i] <- probe.DOT1$DO.1m[j]
}
}
}
答案 1 :(得分:1)
啊男人...... fuzzyjoin
几乎可以解决这个问题(这将是无限的“优雅”),但是很好。
这是我的管道来完成这个w / out循环......但是你给我们的输出有点蹩脚,因为在范围之间只有1个值...但是让我们看看如何它会对你的真实数据产生影响。
## First create a df that contains all the timestamp info:
all <- data.frame(ts = unique(unlist(c(dayfile$EchoTime,probe.DOT1$DO.time.1m))))
## Now join both tables to this master record, fill in the blanks in
## Period,SubCode,PosX,Y,Z, remove Temp.1m, remove useless rows, rename O2
library(dplyr)
library(tidyr)
all %>%
left_join(dayfile, by=c("ts"="EchoTime")) %>%
left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
arrange(ts) %>%
fill(Period,SubCode,PosX,PosY,PosZ) %>%
select(-Temp.1m) %>%
filter(!is.na(DO.1m)) %>%
rename(O2=DO.1m)
# ts Period SubCode PosX PosY PosZ O2
# 1 2016-06-18 10:24:50 NA NA NA NA NA 7.69
# 2 2016-06-18 11:24:50 7017 20 16.38 37.47 15.58 7.54
# 3 2016-06-18 11:54:50 7017 20 15.15 36.73 15.19 7.57
# 4 2016-06-18 12:24:50 7017 20 15.15 36.73 15.19 9.51
# 5 2016-06-18 12:54:50 7017 20 15.15 36.73 15.19 9.30
# 6 2016-06-18 13:24:50 7017 20 15.15 36.73 15.19 8.81
# 7 2016-06-18 13:54:50 7017 20 15.15 36.73 15.19 8.71
# 8 2016-06-18 14:24:50 7017 20 15.15 36.73 15.19 8.66
# 9 2016-06-18 14:54:50 7017 20 15.15 36.73 15.19 8.19
# 10 2016-06-18 15:24:50 7017 20 15.15 36.73 15.19 8.52
# 11 2016-06-18 15:54:50 7017 20 15.15 36.73 15.19 8.79
# 12 2016-06-18 16:24:50 7017 20 15.15 36.73 15.19 9.01
# 13 2016-06-18 16:54:50 7017 20 15.15 36.73 15.19 9.19
# 14 2016-06-18 17:24:50 7017 20 15.15 36.73 15.19 9.39
# 15 2016-06-18 17:54:50 7017 20 15.15 36.73 15.19 9.79
# 16 2016-06-18 18:24:50 7017 20 15.15 36.73 15.19 9.82
# 17 2016-06-18 18:54:50 7017 20 15.15 36.73 15.19 9.48
# 18 2016-06-18 19:24:50 7017 20 15.15 36.73 15.19 9.50
# 19 2016-06-18 19:54:50 7017 20 15.15 36.73 15.19 9.29
# 20 2016-06-18 20:24:50 7017 20 15.15 36.73 15.19 9.38
要查看包含所有数据的整个数据集,请使用:
all %>%
left_join(dayfile, by=c("ts"="EchoTime")) %>%
left_join(probe.DOT1,by=c("ts" = "DO.time.1m"))
在R中,您可以(并且应该)一次运行一行...只是为了查看每一行的作用。通过从all
选择“文本块”来做到这一点。看看我上面的运行情况如何显示到目前为止的所有步骤?您也可以使用管道中的其他每一行...所以只需选择包括填充在内的所有内容,然后运行:
all %>%
left_join(dayfile, by=c("ts"="EchoTime")) %>%
left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
arrange(ts) %>%
fill(Period,SubCode,PosX,PosY,PosZ)
答案 2 :(得分:0)
以下是基于@Amit
代码的问题解决方案all <- data.frame(ts = unique(unlist(c(dayfile$EchoTime, probe.DOT1$DO.time.1m))))
library(dplyr)
library(tidyr)
all <- all %>%
left_join(dayfile, by=c("ts"="EchoTime")) %>%
left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
arrange(ts) %>%
fill(DO.1m) %>%
select(-Temp.1m) %>%
filter(!is.na(PosX)) %>%
rename(O2=DO.1m) %>%
arrange(Period, ts)
dayfile$O2 <- all$O2
我通过填写Do.1m
而不是dayfile
列,然后从PosX
而不是DO.1m
过滤出来,更改了@Amit的代码。这是一种享受!
这是输出:
Period SubCode EchoTime PosX PosY PosZ O2
213387 7017 20 2016-06-18 11:23:23 14.69 36.98 14.68 7.69
213389 7017 20 2016-06-18 11:23:30 14.69 36.98 14.68 7.69
213391 7017 20 2016-06-18 11:23:37 14.69 36.98 14.68 7.69
213393 7017 20 2016-06-18 11:23:45 16.31 37.68 15.67 7.69
213395 7017 20 2016-06-18 11:23:52 14.94 36.90 15.03 7.69
213397 7017 20 2016-06-18 11:23:59 14.94 36.90 15.03 7.69
213399 7017 20 2016-06-18 11:24:06 14.85 36.96 14.66 7.69
213401 7017 20 2016-06-18 11:24:13 14.73 37.09 14.88 7.69
213403 7017 20 2016-06-18 11:24:20 16.64 37.19 15.12 7.69
213405 7017 20 2016-06-18 11:24:27 16.65 37.19 15.22 7.69
213407 7017 20 2016-06-18 11:24:34 16.70 37.01 14.84 7.69
213409 7017 20 2016-06-18 11:24:41 16.36 37.49 15.28 7.69
213411 7017 20 2016-06-18 11:24:48 16.38 37.47 15.58 7.69
213413 7017 20 2016-06-18 11:24:55 16.37 37.47 15.48 7.54
213415 7017 20 2016-06-18 11:25:02 16.41 37.45 15.88 7.54
213417 7017 20 2016-06-18 11:25:09 16.39 37.46 15.68 7.54
213419 7017 20 2016-06-18 11:25:16 16.12 37.75 16.05 7.54
213421 7017 20 2016-06-18 11:25:23 15.80 38.08 15.91 7.54
213423 7017 20 2016-06-18 11:25:30 16.33 38.06 15.37 7.54
213425 7017 20 2016-06-18 11:25:37 16.17 38.49 15.64 7.54