具有给定顺序模式的行之间的时差

时间:2017-06-26 16:47:42

标签: r datetime pattern-matching data.table difference

我有一个数据表,描述两个不同设备的ON和OFF状态的时间。 ON由 -1 1 表示,OFF由 0 表示。

myData <- data.frame(
   date = as.POSIXct(c(
      '2017-06-12 19:35:51','2017-06-12 19:36:49','2017-06-12 19:38:41','2017-06-12 19:39:50','2017-06-12 19:39:18','2017-06-12 19:39:35',
      '2017-06-12 19:41:53','2017-06-12 19:42:56','2017-06-12 19:42:01','2017-06-12 19:42:41','2017-06-12 19:44:56','2017-06-12 19:45:09')),
   device1 = c(1,NA,0,1,NA,NA,0,1,NA,NA,0,1),
   device2 = c(NA,-1,NA,NA,0,-1,NA,NA,0,-1,NA,NA)
)

> myData
                  date device1 device2
1  2017-06-12 19:35:51       1      NA
2  2017-06-12 19:36:49      NA      -1
3  2017-06-12 19:38:41       0      NA
4  2017-06-12 19:39:50       1      NA
5  2017-06-12 19:39:18      NA       0
6  2017-06-12 19:39:35      NA      -1
7  2017-06-12 19:41:53       0      NA
8  2017-06-12 19:42:56       1      NA
9  2017-06-12 19:42:01      NA       0
10 2017-06-12 19:42:41      NA      -1
11 2017-06-12 19:44:56       0      NA
12 2017-06-12 19:45:09       1      NA

对于每个设备的ON / OFF状态,我想计算状态交替的时间差:

  • -1 1 后跟 0 (开启至关闭)
  • 0 ,然后是 -1 1 (关闭为ON)

到目前为止,我只找到了一种方法来区分后续出现的相同状态,例如

device1_OFF_to_OFF_diff <- diff.difftime(myData$date[(is.na(myData$device1) == FALSE) & myData$device1 == '0' ])
device1_ON_to_ON_diff <- diff.difftime(myData$date[(is.na(myData$device1) == FALSE) & myData$device1 == '1' ])

> device1_OFF_to_OFF_diff
Time differences in 
[1] 3.20 3.05

> device1_ON_to_ON_diff
Time differences in 
[1] 3.983333 3.100000 2.216667

然而,目标是在特定模式存在时获得差异,提供device1_ON_to_OFF_diffdevice1_OFF_to_ON_diff等表格(希望您明白这一点)。 有没有方便的方法来实现这个目标?

1 个答案:

答案 0 :(得分:1)

这是一个可能适合你的for循环

myData <- data.frame(
  date = as.POSIXct(c(
    '2017-06-12 19:35:51','2017-06-12 19:36:49','2017-06-12 19:38:41','2017-06-12 19:39:50','2017-06-12 19:39:18','2017-06-12 19:39:35',
    '2017-06-12 19:41:53','2017-06-12 19:42:56','2017-06-12 19:42:01','2017-06-12 19:42:41','2017-06-12 19:44:56','2017-06-12 19:45:09')),
  device1 = c(1,NA,0,1,NA,NA,0,1,NA,NA,0,1),
  device2 = c(NA,-1,NA,NA,0,-1,NA,NA,0,-1,NA,NA)
)

devices <- colnames(myData)[substr(colnames(myData),1,6) == "device"]

for(d in devices){
  last.on <- NA
  last.off <- NA

  for(i in 1:nrow(myData)){
    cur.val <- myData[i,d]
    cur.ts <- myData[i,"date"]

    if(!is.na(cur.val) & cur.val %in% c(1,-1)){
      last.on <- cur.ts
      if(is.na(last.off)){
        myData[i,paste0(d,"_OFF_to_ON")] <- 0
      } else {
        myData[i,paste0(d,"_OFF_to_ON")] <- round(difftime(cur.ts, last.off, units = "mins"),2)
      }
    } else if(!is.na(cur.val) & cur.val == 0){
      last.off <- cur.ts
      if(is.na(last.on)){
        myData[i,paste0(d,"_ON_to_OFF")] <- 0
      } else{
        myData[i,paste0(d,"_ON_to_OFF")] <- round(difftime(cur.ts, last.on, units = "mins"),2)
      }
    } else {
      myData[i,paste0(d,"_OFF_to_ON")] <- NA
      myData[i,paste0(d,"_ON_to_OFF")] <- NA
    }
  }
}


# Change column order to keep device information together
myData <- myData[,sort(colnames(myData))]