查找列序列开始和结束的相应行值

时间:2019-02-01 12:00:34

标签: r match sequence

我有一列描述按时间顺序排列的数据帧中是否存在活动(0,1)。我想获得每次数据集和相应的位置数据每次活动(每个1系列)开始和结束的时间。

即具有以下数据集;

library(chron) 

tt <- times(1:200/144) 

time <- chron(rep("1/1/09", length = length(tt)), tt) 

activity <- rep(c(0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0),times=10)

lat <- runif(200,-10.8544921875,2.021484375)
lon <- runif(200,56.82380908513249,62.478568831926395) 

df <- data.frame(time,activity,lat,lon)

# i'd like to end up with this
start <- c("01/01/09 01:20:00","01/01/09 04:40:00","01/01/09 
08:00:00","01/01/09 11:20:00",
       "01/01/09 14:40:00")

end <- c("01/01/09 03:10:00","01/01/09 06:30:00","01/01/09 
09:50:00","01/01/09 13:10:00",
    "01/01/09 16:30:00")

startLat <- c(rep("appropriate_value",5))

startLon <- c(rep("appropriate_value",5))

endLat <- c(rep("appropriate_value",5))

endLon <- c(rep("appropriate_value",5))


result <- data.frame(start,end,startLat,startLon,endLat,endLon)

1 个答案:

答案 0 :(得分:1)

使用dplyr的laglead函数,很容易检查activity的值何时更改:

library(dplyr)

df$next_activity <- lead(df$activity)
df$prev_activity <- lag(df$activity)

start <- df$time[df$activity == 1 & df$prev_activity == 0]
end   <- df$time[df$activity == 1 & df$next_activity == 0]

要获取具有开始时间和结束时间/坐标的数据帧:

df_start <- filter(df, activity == 1 & lag(activity) == 0) %>%
  select(start_time = time,
         start_lat = lat,
         start_lon = lon)
df_end <- filter(df, activity == 1 & lead(activity) == 0) %>%
  select(end_time = time,
         end_lat = lat,
         end_lon = lon)

result <- cbind(df_start, df_end)