如何根据时间戳从查找表中选择值?

时间:2016-11-16 12:33:32

标签: r

我有一个名为dayfile的数据框,其中包含以下变量:

Period   SubCode            EchoTime  PosX  PosY  PosZ
1   8753      13 2016-06-19 00:06:27 16.81 39.21 12.66
2   8753      13 2016-06-19 00:06:35 16.67 39.08 12.66
3   8753      13 2016-06-19 00:06:36 16.33 39.60 13.03
4   8753      13 2016-06-19 00:06:45 17.14 38.14 12.23
5   8753      13 2016-06-19 00:06:53 16.95 38.21 12.38
6   8753      13 2016-06-19 00:06:53 17.44 37.67 11.95

我还有另一个名为probe.DOT1的数据框,如下所示:

  DO.time.1m           DO.1m Temp.1m
1 2016-06-18 10:24:50  7.69   18.04
2 2016-06-18 11:24:50  7.54   19.12
3 2016-06-18 11:54:50  7.57   18.98
4 2016-06-18 12:24:50  9.51   19.88
5 2016-06-18 12:54:50  9.30   11.62
6 2016-06-18 13:24:50  8.81   11.54

我想在dayfileO2创建一个名为probe.DOT1$DO.1m的新列,其中dayfile$EchoTime位于两个连续值probe.DOT1$Do.time.1m之间的时间范围内,即它应该看起来像这样:

Period   SubCode            EchoTime  PosX  PosY  PosZ  O2
1   8753      13 2016-06-19 00:06:27 16.81 39.21 12.66  7.54
2   8753      13 2016-06-19 00:06:35 16.67 39.08 12.66  7.54
3   8753      13 2016-06-19 00:06:36 16.33 39.60 13.03  7.59
4   8753      13 2016-06-19 00:06:45 17.14 38.14 12.23  7.59
5   8753      13 2016-06-19 00:06:53 16.95 38.21 12.38  7.59
6   8753      13 2016-06-19 00:06:53 17.44 37.67 11.95  7.59

我试过这个:

dayfile$O2 <- probe.DOT1[dayfile$EchoTime < probe.DOT1$DO.time.1m &
dayfile$EchoTime > diff(probe.DOT1$DO.time.1m, lag = 1) , 'DO.1m']

但它给了我错误。有没有人有任何建议?

感谢。

修改

这是一些dput,因此您可以重新创建我的文件的一小部分:

dayfile

structure(list(Period = c(7017, 7017, 7017, 7017, 7017, 7017, 
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 
7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 7017, 
7017, 7017), SubCode = c(20, 20, 20, 20, 20, 20, 20, 20, 20, 
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
), EchoTime = structure(c(1466249003, 1466249010, 1466249017, 
1466249025, 1466249032, 1466249039, 1466249046, 1466249053, 1466249060, 
1466249067, 1466249074, 1466249081, 1466249088, 1466249095, 1466249102, 
1466249109, 1466249116, 1466249123, 1466249130, 1466249137, 1466249144, 
1466249151, 1466249158, 1466249165, 1466249172, 1466249179, 1466249186, 
1466249193, 1466249200, 1466249207, 1466249214, 1466249221, 1466249228, 
1466249235, 1466249242, 1466249249, 1466249256, 1466249263, 1466249270, 
1466249277, 1466249284), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    PosX = c(14.69, 14.69, 14.69, 16.31, 14.94, 14.94, 14.85, 
    14.73, 16.64, 16.65, 16.7, 16.36, 16.38, 16.37, 16.41, 16.39, 
    16.12, 15.8, 16.33, 16.17, 16.32, 15.61, 15.5, 15.82, 16.68, 
    16.68, 16.68, 16.61, 16.68, 16.68, 11.39, 11.39, 16.68, 16.6, 
    16.6, 15.08, 15.08, 14.9, 14.81, 15.08, 15.15), PosY = c(36.98, 
    36.98, 36.98, 37.68, 36.9, 36.9, 36.96, 37.09, 37.19, 37.19, 
    37.01, 37.49, 37.47, 37.47, 37.45, 37.46, 37.75, 38.08, 38.06, 
    38.49, 38.06, 38.54, 38.39, 38.06, 37.16, 37.16, 37.16, 37.36, 
    37.16, 37.16, 40.09, 40.09, 37.16, 37.37, 37.37, 36.79, 36.79, 
    36.98, 36.94, 36.79, 36.73), PosZ = c(14.68, 14.68, 14.68, 
    15.67, 15.03, 15.03, 14.66, 14.88, 15.12, 15.22, 14.84, 15.28, 
    15.58, 15.48, 15.88, 15.68, 16.05, 15.91, 15.37, 15.64, 15.27, 
    16.1, 16.3, 16.33, 15.61, 15.61, 15.61, 15.8, 15.61, 15.61, 
    18.06, 18.06, 15.61, 15.7, 15.7, 15.13, 15.13, 15.3, 15.38, 
    15.13, 15.19)), .Names = c("Period", "SubCode", "EchoTime", 
"PosX", "PosY", "PosZ"), row.names = c(213387L, 213389L, 213391L, 
213393L, 213395L, 213397L, 213399L, 213401L, 213403L, 213405L, 
213407L, 213409L, 213411L, 213413L, 213415L, 213417L, 213419L, 
213421L, 213423L, 213425L, 213427L, 213429L, 213431L, 213433L, 
213435L, 213437L, 213439L, 213441L, 213443L, 213445L, 213447L, 
213449L, 213450L, 213452L, 213454L, 213456L, 213458L, 213460L, 
213462L, 213464L, 213466L), class = "data.frame")

probe.DOT1

structure(list(DO.time.1m = structure(c(1466245490, 1466249090, 
1466250890, 1466252690, 1466254490, 1466256290, 1466258090, 1466259890, 
1466261690, 1466263490, 1466265290, 1466267090, 1466268890, 1466270690, 
1466272490, 1466274290, 1466276090, 1466277890, 1466279690, 1466281490
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), DO.1m = c(7.69, 
7.54, 7.57, 9.51, 9.3, 8.81, 8.71, 8.66, 8.19, 8.52, 8.79, 9.01, 
9.19, 9.39, 9.79, 9.82, 9.48, 9.5, 9.29, 9.38), Temp.1m = c(18.04, 
19.12, 18.98, 19.88, 11.62, 11.54, 12.24, 12.22, 12.32, 12.2, 
12.18, 12.16, 12.14, 11.8, 11.62, 11.68, 11.52, 11.32, 11.34, 
11.14)), .Names = c("DO.time.1m", "DO.1m", "Temp.1m"), row.names = c(NA, 
20L), class = "data.frame")

3 个答案:

答案 0 :(得分:1)

我会为此

使用双循环
dayfile$O2 <- NA
for (i in 1:nrow(dayfile)) {
  for (j in 2:nrow(probe.DOT1)) {
    # between previous do time and before current do time
    if (dayfile$EchoTime[i] > probe.DOT1$DO.time.1m[j - 1] & dayfile$EchoTime[i] < probe.DOT1$DO.time.1m[j]) {
      dayfile$O2[i] <- probe.DOT1$DO.1m[j]
    }
  }
}

答案 1 :(得分:1)

啊男人...... fuzzyjoin几乎可以解决这个问题(这将是无限的“优雅”),但是很好。

这是我的管道来完成这个w / out循环......但是你给我们的输出有点蹩脚,因为在范围之间只有1个值...但是让我们看看如何它会对你的真实数据产生影响。

## First create a df that contains all the timestamp info:
all <- data.frame(ts = unique(unlist(c(dayfile$EchoTime,probe.DOT1$DO.time.1m))))

## Now join both tables to this master record, fill in the blanks in 
## Period,SubCode,PosX,Y,Z, remove Temp.1m, remove useless rows, rename O2
library(dplyr)
library(tidyr)
all %>%
  left_join(dayfile,   by=c("ts"="EchoTime")) %>%
  left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
  arrange(ts) %>%
  fill(Period,SubCode,PosX,PosY,PosZ) %>%
  select(-Temp.1m) %>%
  filter(!is.na(DO.1m)) %>%
  rename(O2=DO.1m)

# ts Period SubCode  PosX  PosY  PosZ   O2
# 1  2016-06-18 10:24:50     NA      NA    NA    NA    NA 7.69
# 2  2016-06-18 11:24:50   7017      20 16.38 37.47 15.58 7.54
# 3  2016-06-18 11:54:50   7017      20 15.15 36.73 15.19 7.57
# 4  2016-06-18 12:24:50   7017      20 15.15 36.73 15.19 9.51
# 5  2016-06-18 12:54:50   7017      20 15.15 36.73 15.19 9.30
# 6  2016-06-18 13:24:50   7017      20 15.15 36.73 15.19 8.81
# 7  2016-06-18 13:54:50   7017      20 15.15 36.73 15.19 8.71
# 8  2016-06-18 14:24:50   7017      20 15.15 36.73 15.19 8.66
# 9  2016-06-18 14:54:50   7017      20 15.15 36.73 15.19 8.19
# 10 2016-06-18 15:24:50   7017      20 15.15 36.73 15.19 8.52
# 11 2016-06-18 15:54:50   7017      20 15.15 36.73 15.19 8.79
# 12 2016-06-18 16:24:50   7017      20 15.15 36.73 15.19 9.01
# 13 2016-06-18 16:54:50   7017      20 15.15 36.73 15.19 9.19
# 14 2016-06-18 17:24:50   7017      20 15.15 36.73 15.19 9.39
# 15 2016-06-18 17:54:50   7017      20 15.15 36.73 15.19 9.79
# 16 2016-06-18 18:24:50   7017      20 15.15 36.73 15.19 9.82
# 17 2016-06-18 18:54:50   7017      20 15.15 36.73 15.19 9.48
# 18 2016-06-18 19:24:50   7017      20 15.15 36.73 15.19 9.50
# 19 2016-06-18 19:54:50   7017      20 15.15 36.73 15.19 9.29
# 20 2016-06-18 20:24:50   7017      20 15.15 36.73 15.19 9.38

编辑:

要查看包含所有数据的整个数据集,请使用:

all %>%
  left_join(dayfile,   by=c("ts"="EchoTime")) %>%
  left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) 

在R中,您可以(并且应该)一次运行一行...只是为了查看每一行的作用。通过从all选择“文本块”来做到这一点。看看我上面的运行情况如何显示到目前为止的所有步骤?您也可以使用管道中的其他每一行...所以只需选择包括填充在内的所有内容,然后运行:

all %>%
  left_join(dayfile,   by=c("ts"="EchoTime")) %>%
  left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
  arrange(ts) %>%
  fill(Period,SubCode,PosX,PosY,PosZ)

答案 2 :(得分:0)

以下是基于@Amit

代码的问题解决方案
all <- data.frame(ts = unique(unlist(c(dayfile$EchoTime, probe.DOT1$DO.time.1m))))

library(dplyr)
library(tidyr)
all <- all %>%
  left_join(dayfile,   by=c("ts"="EchoTime")) %>%
  left_join(probe.DOT1,by=c("ts" = "DO.time.1m")) %>%
  arrange(ts) %>%
  fill(DO.1m) %>%
  select(-Temp.1m) %>%
  filter(!is.na(PosX)) %>%
  rename(O2=DO.1m) %>%
  arrange(Period, ts) 

dayfile$O2 <- all$O2

我通过填写Do.1m而不是dayfile列,然后从PosX而不是DO.1m过滤出来,更改了@Amit的代码。这是一种享受!

这是输出:

       Period SubCode            EchoTime  PosX  PosY  PosZ   O2
213387   7017      20 2016-06-18 11:23:23 14.69 36.98 14.68 7.69
213389   7017      20 2016-06-18 11:23:30 14.69 36.98 14.68 7.69
213391   7017      20 2016-06-18 11:23:37 14.69 36.98 14.68 7.69
213393   7017      20 2016-06-18 11:23:45 16.31 37.68 15.67 7.69
213395   7017      20 2016-06-18 11:23:52 14.94 36.90 15.03 7.69
213397   7017      20 2016-06-18 11:23:59 14.94 36.90 15.03 7.69
213399   7017      20 2016-06-18 11:24:06 14.85 36.96 14.66 7.69
213401   7017      20 2016-06-18 11:24:13 14.73 37.09 14.88 7.69
213403   7017      20 2016-06-18 11:24:20 16.64 37.19 15.12 7.69
213405   7017      20 2016-06-18 11:24:27 16.65 37.19 15.22 7.69
213407   7017      20 2016-06-18 11:24:34 16.70 37.01 14.84 7.69
213409   7017      20 2016-06-18 11:24:41 16.36 37.49 15.28 7.69
213411   7017      20 2016-06-18 11:24:48 16.38 37.47 15.58 7.69
213413   7017      20 2016-06-18 11:24:55 16.37 37.47 15.48 7.54
213415   7017      20 2016-06-18 11:25:02 16.41 37.45 15.88 7.54
213417   7017      20 2016-06-18 11:25:09 16.39 37.46 15.68 7.54
213419   7017      20 2016-06-18 11:25:16 16.12 37.75 16.05 7.54
213421   7017      20 2016-06-18 11:25:23 15.80 38.08 15.91 7.54
213423   7017      20 2016-06-18 11:25:30 16.33 38.06 15.37 7.54
213425   7017      20 2016-06-18 11:25:37 16.17 38.49 15.64 7.54