日期时间差异输出不一致

时间:2015-02-18 18:38:32

标签: r

我只是想计算每组观察结果之间的差异。

数据集:

structure(list(IDYEAR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "A0712002", class = "factor"), 
    MONTH = c(12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
    12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), DAY = c(5L, 
    6L, 6L, 7L, 8L, 8L, 9L, 9L, 10L, 12L, 12L, 13L, 13L, 13L, 
    14L, 14L, 14L, 15L, 15L), YEAR = c(2002L, 2002L, 2002L, 2002L, 
    2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 
    2002L, 2002L, 2002L, 2002L, 2002L, 2002L), HOUR = c(9L, 19L, 
    23L, 1L, 1L, 3L, 19L, 21L, 17L, 17L, 19L, 17L, 19L, 23L, 
    3L, 9L, 19L, 3L, 11L), MINUTE = c(43L, 43L, 43L, 42L, 42L, 
    42L, 42L, 43L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 
    42L, 42L), SECOND = c(24L, 13L, 13L, 41L, 54L, 54L, 54L, 
    12L, 54L, 54L, 48L, 43L, 59L, 55L, 43L, 44L, 54L, 43L, 55L
    ), DATETIME = structure(c(12L, 13L, 14L, 15L, 16L, 17L, 18L, 
    19L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 7L, 11L, 10L), .Label = c("12/10/2002 17:42", 
    "12/12/2002 17:42", "12/12/2002 19:42", "12/13/2002 17:42", 
    "12/13/2002 19:42", "12/13/2002 23:42", "12/14/2002 19:42", 
    "12/14/2002 3:42", "12/14/2002 9:42", "12/15/2002 11:42", 
    "12/15/2002 3:42", "12/5/2002 9:43", "12/6/2002 19:43", "12/6/2002 23:43", 
    "12/7/2002 1:42", "12/8/2002 1:42", "12/8/2002 3:42", "12/9/2002 19:42", 
    "12/9/2002 21:43"), class = "factor"), GRP1700 = c(873L, 
    873L, 874L, 875L, 875L, 876L, 876L, 876L, 876L, 876L, 877L, 
    877L, 877L, 877L, 877L, 878L, 878L, 878L, 879L), ID1700 = structure(c(1L, 
    1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 
    6L, 6L, 7L), .Label = c("A0712002873", "A0712002874", "A0712002875", 
    "A0712002876", "A0712002877", "A0712002878", "A0712002879"
    ), class = "factor")), .Names = c("IDYEAR", "MONTH", "DAY", 
"YEAR", "HOUR", "MINUTE", "SECOND", "DATETIME", "GRP1700", "ID1700"
), class = "data.frame", row.names = c(NA, -19L))

代码

rm(list = ls())
dfa1<-read.csv("test.csv")
head(dfa1)
dput(dfa1)
dfa1[["TESTDATE"]]<-as.POSIXct(dfa1$DATETIME,format="%m/%d/%Y %H:%M",tz="GMT")
dfa1$ID1700<-as.factor(dfa1$ID1700)
dfa1<-dfa1 %>%
  arrange(IDYEAR, GRP1700, TESTDATE) %>%
  group_by(ID1700) %>%
  mutate(TIME1700 = TESTDATE - lag (TESTDATE))
write.csv(dfa1, "test2.csv")

输出:

              TESTDATE       TIME1700
1  2002-12-05 09:43:00        NA days
2  2002-12-06 19:43:00  1.416667 days
3  2002-12-06 23:43:00        NA days
4  2002-12-07 01:42:00        NA days
5  2002-12-08 01:42:00  1.000000 days
6  2002-12-08 03:42:00        NA days
7  2002-12-09 19:42:00 40.000000 days
8  2002-12-09 21:43:00  2.016667 days
9  2002-12-10 17:42:00 19.983333 days
10 2002-12-12 17:42:00 48.000000 days
11 2002-12-12 19:42:00        NA days
12 2002-12-13 17:42:00 22.000000 days
13 2002-12-13 19:42:00  2.000000 days
14 2002-12-13 23:42:00  4.000000 days
15 2002-12-14 03:42:00  4.000000 days
16 2002-12-14 09:42:00        NA days
17 2002-12-14 19:42:00 10.000000 days
18 2002-12-15 03:42:00  8.000000 days
19 2002-12-15 11:42:00        NA days

我注意到一些输出是以小时为单位(第8行 - 第7行),而有些输出是以天为单位(第5行 - 第4行)。如何获得一致的输出(小时是首选)?提前谢谢。

0 个答案:

没有答案