R,data.table,滚动连接不匹配最接近的值

时间:2018-01-16 05:57:39

标签: r join merge data.table

我绝不是R专家,非常感谢任何帮助!

我有两个data.tables。以下示例仅包括(部分)来自单个团队的第一个块的第一个试验,实际数据具有多个团队,块和试验。每次试验的“时间”栏始终为0 可以找到完整的eyeDatP1和eyeDatP2表herehere

'eyeDatP1':

+--------+---------+---------+----------+----------+--------------------+--------------------+-------------+
| "team" | "block" | "trial" | "x.pos1" | "y.pos1" |       "time"       |      "time1"       | "condition" |
+--------+---------+---------+----------+----------+--------------------+--------------------+-------------+
|    910 |       1 |       1 |    7.012 |   19.034 |                  0 |                  0 | "normal"    |
|    910 |       1 |       1 |   13.342 |  134.199 | 0.0900001525878906 | 0.0900001525878906 | "normal"    |
|    910 |       1 |       1 |   18.525 |  127.041 |  0.180000066757202 |  0.180000066757202 | "normal"    |
|    910 |       1 |       1 |   16.098 |  119.385 |  0.279999971389771 |  0.279999971389771 | "normal"    |
|    910 |       1 |       1 |   17.218 |  118.263 |  0.370000123977661 |  0.370000123977661 | "normal"    |
|    910 |       1 |       1 |   18.385 |  116.526 |  0.470000028610229 |  0.470000028610229 | "normal"    |
+--------+---------+---------+----------+----------+--------------------+--------------------+-------------+

和'eyeDatP2':

+--------+---------+---------+----------+----------+-------------------+-------------------+-------------+
| "team" | "block" | "trial" | "x.pos2" | "y.pos2" |      "time"       |      "time2"      | "condition" |
+--------+---------+---------+----------+----------+-------------------+-------------------+-------------+
|    910 |       1 |       1 |   72.656 | -136.216 |                 0 |                 0 | "normal"    |
|    910 |       1 |       1 |   71.398 | -139.887 | 0.070000171661377 | 0.070000171661377 | "normal"    |
|    910 |       1 |       1 |   68.219 | -146.002 | 0.170000076293945 | 0.170000076293945 | "normal"    |
|    910 |       1 |       1 | -166.067 |  125.859 | 0.259999990463257 | 0.259999990463257 | "normal"    |
|    910 |       1 |       1 | -164.175 |  112.627 | 0.360000133514404 | 0.360000133514404 | "normal"    |
|    910 |       1 |       1 |   27.089 |   96.251 | 0.450000047683716 | 0.450000047683716 | "normal"    |
+--------+---------+---------+----------+----------+-------------------+-------------------+-------------+

我希望通过最接近的时间戳(通过'时间'列加入)加入这两个表。

修改

我最终试图让它工作(它仍然没有)是相当混乱的,并使用一堆循环所以......提前道歉。继承人我得到了什么:

# Prepare empty df for merging
eyeDat <- data.frame(team = numeric(),
                     block = numeric(),
                     trial = numeric(),
                     x.pos1 = numeric(),
                     y.pos1 = numeric(),
                     time = numeric(),
                     time1 = numeric(),
                     condition = numeric(),
                     time.match = numeric(),
                     i.team = numeric(),
                     i.block = numeric(),
                     i.trial = numeric(),
                     x.pos2 = numeric(),
                     y.pos2 = numeric(),
                     i.time = numeric(),
                     time2 = numeric(),
                     i.condition = numeric())

# Some timestamps are identical, only keep the first of these
eyeDatP1 <- eyeDatP1[!duplicated(eyeDatP1[,c("team", "block", "trial", "time")]),]
eyeDatP2 <- eyeDatP2[!duplicated(eyeDatP2[,c("team", "block", "trial", "time")]),]

# Change the 'time' column, such that the trial begins at time 0
# then merge at the trial level
for (team.num in unique(eyeDatP1$team)) { # separate at team level
  temp1P1 <- subset(eyeDatP1, eyeDatP1$team == team.num)
  temp1P2 <- subset(eyeDatP2, eyeDatP2$team == team.num)
  for (condition.num in unique(temp1P1$condition)) { # separate at condition level
    temp2P1 <- subset(temp1P1, temp1P1$condition == condition.num)
    temp2P2 <- subset(temp1P2, temp1P2$condition == condition.num)
    for (trial.num in unique(temp2P1$trial)) { # separate at trial level
      temp3P1 <- subset(temp2P1, temp2P1$trial == trial.num)
      temp3P1$time1 <- temp3P1$time - temp3P1$time[1] # subtract the starting time (of trial) from all timestamps (for that trial)

      temp3P2 <- subset(temp2P2, temp2P2$trial == trial.num)
      temp3P2$time2 <- temp3P2$time - temp3P2$time[1] # subtract the starting time (of trial) from all timestamps (for that trial)

      temp3P1$time.match <- temp3P1$time1
      temp3P2$time.match <- temp3P2$time2

      # Merge by closest timestamp
      temp3P1 <- setDT(temp3P1)
      temp3P2 <- setDT(temp3P2)
      setkey(temp3P1, time.match)
      setkey(temp3P2, time.match)
      temp3 <- temp3P1[temp3P2, roll = "nearest"]

      eyeDat <- rbind(eyeDat, temp3)
    }
  }
}

上面给出了'匹配'时间戳

之间的一些非常大的差距
eyeDat$time.diff <- eyeDat$time2 - eyeDat$time1
eyeDat <- subset(eyeDat, !is.na(eyeDat$time.diff))
bad <- subset(eyeDat, eyeDat$time.diff >= 1 | eyeDat$time.diff <= -1)

这输出data.frame'bad',下面是'bad'的最后n行:

+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|  "team" "block" "trial" "x.pos1" "y.pos1" "time" "time1" "condition" "time.match" "i.team" "i.block" "i.trial" "x.pos2" "y.pos2" "i.time" "time2" "i.condition" "time.diff"   |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.2599999904633 910 2 69 -168.622 -21.305 1499754632.32 28.2599999904633 "individual" 6.35000014305115 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.2699999809265 910 2 69 -167.567 -19.674 1499754632.33 28.2699999809265 "individual" 6.3600001335144  |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.2799999713898 910 2 69 -167.567 -19.674 1499754632.34 28.2799999713898 "individual" 6.37000012397766 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.289999961853 910 2 69 -166.493 -17.976 1499754632.35 28.289999961853 "individual" 6.38000011444092   |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.2999999523163 910 2 69 -164.281 -17.984 1499754632.36 28.2999999523163 "individual" 6.39000010490417 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3099999427795 910 2 69 -164.281 -17.984 1499754632.37 28.3099999427795 "individual" 6.40000009536743 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3200001716614 910 2 69 -162.836 -16.612 1499754632.38 28.3200001716614 "individual" 6.41000032424927 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3300001621246 910 2 69 -163.784 -15.401 1499754632.39 28.3300001621246 "individual" 6.42000031471252 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3400001525879 910 2 69 -163.784 -15.401 1499754632.4 28.3400001525879 "individual" 6.43000030517578  |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3500001430511 910 2 69 -164.593 -14.366 1499754632.41 28.3500001430511 "individual" 6.44000029563904 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3600001335144 910 2 69 -164.593 -14.366 1499754632.42 28.3600001335144 "individual" 6.45000028610229 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3700001239777 910 2 69 -165.06 -13.378 1499754632.43 28.3700001239777 "individual" 6.46000027656555  |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3800001144409 910 2 69 -165.06 -13.378 1499754632.44 28.3800001144409 "individual" 6.47000026702881  |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.3900001049042 910 2 69 -165.737 -14.238 1499754632.45 28.3900001049042 "individual" 6.48000025749207 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.4000000953674 910 2 69 -166.492 -15.478 1499754632.46 28.4000000953674 "individual" 6.49000024795532 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.4100000858307 910 2 69 -166.492 -15.478 1499754632.47 28.4100000858307 "individual" 6.50000023841858 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.4200000762939 910 2 69 -165.439 -17.974 1499754632.48 28.4200000762939 "individual" 6.51000022888184 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.4300000667572 910 2 69 -165.439 -17.974 1499754632.49 28.4300000667572 "individual" 6.52000021934509 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.4400000572205 910 2 69 -164.605 -19.655 1499754632.5 28.4400000572205 "individual" 6.53000020980835  |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.4500000476837 910 2 69 -163.724 -20.565 1499754632.51 28.4500000476837 "individual" 6.54000020027161 |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.460000038147 910 2 69 -163.724 -20.565 1499754632.52 28.460000038147 "individual" 6.55000019073486   |
| 910 2 69 413.955 -392.938 1499754525.32 21.9099998474121 "individual" 28.4700000286102 910 2 69 -163.435 -23.499 1499754632.53 28.4700000286102 "individual" 6.56000018119812 |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

1 个答案:

答案 0 :(得分:0)

这是你在找什么?

eyeDatP1 <- fread("eyedatp1.txt")
eyeDatP2 <- fread("eyedatp2.txt")

#Setting keys only on the variable to be used for the 'join'
setkey(eyeDatP1,time)
setkey(eyeDatP2,time)

eyeDat  <- eyeDatP1[eyeDatP2, roll = "nearest"]