我有一列描述按时间顺序排列的数据帧中是否存在活动(0,1)。我想获得每次数据集和相应的位置数据每次活动(每个1系列)开始和结束的时间。
即具有以下数据集;
library(chron)
tt <- times(1:200/144)
time <- chron(rep("1/1/09", length = length(tt)), tt)
activity <- rep(c(0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0),times=10)
lat <- runif(200,-10.8544921875,2.021484375)
lon <- runif(200,56.82380908513249,62.478568831926395)
df <- data.frame(time,activity,lat,lon)
# i'd like to end up with this
start <- c("01/01/09 01:20:00","01/01/09 04:40:00","01/01/09
08:00:00","01/01/09 11:20:00",
"01/01/09 14:40:00")
end <- c("01/01/09 03:10:00","01/01/09 06:30:00","01/01/09
09:50:00","01/01/09 13:10:00",
"01/01/09 16:30:00")
startLat <- c(rep("appropriate_value",5))
startLon <- c(rep("appropriate_value",5))
endLat <- c(rep("appropriate_value",5))
endLon <- c(rep("appropriate_value",5))
result <- data.frame(start,end,startLat,startLon,endLat,endLon)
答案 0 :(得分:1)
使用dplyr的lag
和lead
函数,很容易检查activity
的值何时更改:
library(dplyr)
df$next_activity <- lead(df$activity)
df$prev_activity <- lag(df$activity)
start <- df$time[df$activity == 1 & df$prev_activity == 0]
end <- df$time[df$activity == 1 & df$next_activity == 0]
要获取具有开始时间和结束时间/坐标的数据帧:
df_start <- filter(df, activity == 1 & lag(activity) == 0) %>%
select(start_time = time,
start_lat = lat,
start_lon = lon)
df_end <- filter(df, activity == 1 & lead(activity) == 0) %>%
select(end_time = time,
end_lat = lat,
end_lon = lon)
result <- cbind(df_start, df_end)