这是我的数据框:
sensors_data <- structure(list(timestamp = c(164424852623558, 164424852623558,
164424855959288, 164424855959288, 164424857931288, 164424857931288,
164424859915288, 164424859915288, 164424861778558, 164424861903788
), acc_x = c(5.4230323, 5.4230323, 5.4086666, 5.4086666, 5.4086666,
5.4086666, 5.3895125, 5.3895125, 5.3895125, 5.3895125), acc_y = c(7.9681463,
7.9681463, 7.9777236, 7.9777236, 7.9777236, 7.9777236, 7.987301,
7.987301, 7.987301, 7.987301), acc_z = c(2.6097596, 2.6097596,
2.6337023, 2.6337023, 2.6337023, 2.6337023, 2.6528566, 2.6528566,
2.6528566, 2.6528566), grav_x = c(-1, -1, -1, -1, -1, -1, -1,
-1, 5.82316, 5.82316), grav_y = c(-1, -1, -1, -1, -1, -1, -1,
-1, 7.7164946, 7.7164946), grav_z = c(-1, -1, -1, -1, -1, -1,
-1, -1, 1.6482342, 1.6482342), gyro_x = c(-1, -0.3230286, -0.3230286,
-0.3230286, -0.3230286, -0.3230286, -0.3230286, -0.3230286, -0.3230286,
-0.13915816), gyro_y = c(-1, 0.77723867, 0.77723867, 0.77723867,
0.77723867, 0.77723867, 0.77723867, 0.77723867, 0.77723867, 1.0466303
), gyro_z = c(-1, -0.365287, -0.365287, -0.365287, -0.365287,
-0.365287, -0.365287, -0.365287, -0.365287, -0.57420295), press_id = c(-1,
-1, -1, -1, -1, -1, -1, -1, -1, -1)), .Names = c("timestamp",
"acc_x", "acc_y", "acc_z", "grav_x", "grav_y", "grav_z", "gyro_x",
"gyro_y", "gyro_z", "press_id"), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
我想根据其他数据框将press_id更改/更改为特定值:
pin_press_time_range <- structure(list(press_id = 1:4, start_time = c(164429106370978,
164429411618824, 164429837271939, 164430399454284), end_time = c(164429182443824,
164429512525747, 164429903243169, 164430465927554)), class = c("tbl_df",
"tbl", "data.frame"), .Names = c("press_id", "start_time", "end_time"
), row.names = c(NA, -4L))
表示对于在press_id 1、2,...的开始时间和结束时间之间的所有时间戳,请根据新闻ID对其进行标记。
我正在尝试mutate_if
或mutate_at
来做到这一点,但没有运气。
这就是我手动执行的操作,但是我需要自动执行:
sensors_data %>% filter(timestamp >= pin_press_time_range[1,]$start_time &
timestamp <= pin_press_time_range[1,]$end_time) %>%
mutate(press_id = pin_press_time_range[1,]$press_id)
请告知。
答案 0 :(得分:2)
使用sqldf
library(sqldf)
sqldf("select a.*, b.*
from sensors_data a
left join pin_press_time_range b
on a.timestamp >= b.start_time
AND a.timestamp <= b.end_time")
答案 1 :(得分:2)
使用Fuzzyjoin
library(fuzzyjoin)
sensors_data %>%
fuzzy_left_join(pin_press_time_range,
by = c("timestamp" = "start_time", "timestamp" = "end_time"),
match_fun = list(`>=`, `<=`))
答案 2 :(得分:2)
使用data.table::foverlaps
library(data.table)
setDT(sensors_data)[, `:=`(start_time = timestamp, end_time = timestamp)]
setDT(pin_press_time_range)
setkey(pin_press_time_range, start_time, end_time)
dt <- foverlaps(sensors_data, pin_press_time_range)
说明:foverlaps
基于两个data.table
之间的间隔执行重叠连接;由于foverlaps
需要起点和终点,因此我们选择timestamp
作为sensors_data
的起点和终点。然后,我们基于两个公用键sensors_data
和pin_press_time_range
离开start_time
和end_time
重叠连接。