@Akrun
这是我运行代码时的输出...当我输入View(df)
时,我得到旧的数据集。
> df %>%
+ group_by(ID) %>%
+ mutate(difftime=c(15, format(.POSIXct(diff(TIMESTAMP)), '%M:%S')))
Source: local data frame [192 x 9]
Groups: ID
TIMESTAMP ID STATUS ACTIVE PSET
1 2014-08-05 00:00:00 28808 DOWN 67.410 70
2 2014-08-05 00:15:00 28808 DOWN 67.410 70
3 2014-08-05 00:30:00 28808 DOWN 67.410 70
4 2014-08-05 00:45:00 28808 DOWN 67.410 70
5 2014-08-05 01:00:00 28808 DOWN 67.410 70
.. ... ... ... ... ...
Variables not shown: STAT (chr), PIN (chr), ONOFF (chr), difftime (chr)

我需要找到每个ID的时间戳差异,使每个ID的第一个时间戳为NA。
我试过diff
,但我无法找到每个ID的差异。
这可以通过order
更加简单,但让R's diff function
知道ID可以有所不同
以下是供您参考的样本数据。
这是我现在按要求使用的代码。
foreach(i=1:length(xyzID)) %dopar%{
xyz$timediff<-c(15,diff(xyz$TIMESTAMP))
}
TIMESTAMP ID timediff
07/29/2014 10:15 AM 189252 NA
07/29/2014 10:45 AM 189252 0:30
07/29/2014 11:00 AM 189252 0:15
07/29/2014 11:15 AM 189252 0:15
07/29/2014 11:30 AM 302859 NA
07/29/2014 11:45 AM 302859 0:15
07/29/2014 04:00 PM 302859 04:15
07/29/2014 12:15 PM 189252 01:00
07/29/2014 12:30 PM 189252 0:15
07/29/2014 01:00 PM 189252 0:30
07/29/2014 01:15 PM 189252 0:15
07/29/2014 01:30 PM 178502 NA
07/29/2014 01:45 PM 178502 0:15
答案 0 :(得分:3)
尝试
library(dplyr)
df$TIMESTAMP <- as.POSIXct(df$TIMESTAMP, format='%m/%d/%Y %I:%M %p')
df %>%
group_by(ID) %>%
mutate(timediff=c(NA, format(.POSIXct(diff(TIMESTAMP)), '%M:%S')))
# TIMESTAMP ID timediff
#1 2014-07-29 10:15:00 189252 NA
#2 2014-07-29 10:45:00 189252 00:30
#3 2014-07-29 11:00:00 189252 00:15
#4 2014-07-29 11:15:00 189252 00:15
#5 2014-07-29 11:30:00 302859 NA
#6 2014-07-29 11:45:00 302859 00:15
#7 2014-07-29 16:00:00 302859 04:15
#8 2014-07-29 12:15:00 189252 01:00
#9 2014-07-29 12:30:00 189252 00:15
#10 2014-07-29 13:00:00 189252 00:30
#11 2014-07-29 13:15:00 189252 00:15
#12 2014-07-29 13:30:00 178502 NA
#13 2014-07-29 13:45:00 178502 00:15
df <- structure(list(TIMESTAMP = c("07/29/2014 10:15 AM", "07/29/2014 10:45 AM",
"07/29/2014 11:00 AM", "07/29/2014 11:15 AM", "07/29/2014 11:30 AM",
"07/29/2014 11:45 AM", "07/29/2014 04:00 PM", "07/29/2014 12:15 PM",
"07/29/2014 12:30 PM", "07/29/2014 01:00 PM", "07/29/2014 01:15 PM",
"07/29/2014 01:30 PM", "07/29/2014 01:45 PM"), ID = c(189252L,
189252L, 189252L, 189252L, 302859L, 302859L, 302859L, 189252L,
189252L, 189252L, 189252L, 178502L, 178502L)), .Names = c("TIMESTAMP",
"ID"), class = "data.frame", row.names = c(NA, -13L))
答案 1 :(得分:1)
或使用data.table
(否则与dplyr
相同):
# data
df <- structure(list(TIMESTAMP = structure(c(1406621700, 1406623500,
1406624400, 1406625300, 1406626200, 1406627100, 1406642400, 1406628900,
1406629800, 1406631600, 1406632500, 1406633400, 1406634300), class = c("POSIXct",
"POSIXt"), tzone = ""), ID = c(189252L, 189252L, 189252L, 189252L,
302859L, 302859L, 302859L, 189252L, 189252L, 189252L, 189252L,
178502L, 178502L)), .Names = c("TIMESTAMP", "ID"), row.names = c(NA,
-13L), class = "data.frame")
# set to data.table
require(data.table)
setDT(df)
# calculate timedifference
df[, timediff := c(NA, format(.POSIXct(diff(TIMESTAMP)), '%M:%S')), by=ID]
df
# TIMESTAMP ID timediff
# 1: 2014-07-29 10:15:00 189252 NA
# 2: 2014-07-29 10:45:00 189252 00:30
# 3: 2014-07-29 11:00:00 189252 00:15
# 4: 2014-07-29 11:15:00 189252 00:15
# 5: 2014-07-29 11:30:00 302859 NA
# 6: 2014-07-29 11:45:00 302859 00:15
# 7: 2014-07-29 16:00:00 302859 04:15
# 8: 2014-07-29 12:15:00 189252 01:00
# 9: 2014-07-29 12:30:00 189252 00:15
# 10: 2014-07-29 13:00:00 189252 00:30
# 11: 2014-07-29 13:15:00 189252 00:15
# 12: 2014-07-29 13:30:00 178502 NA
# 13: 2014-07-29 13:45:00 178502 00:15