我只是想计算每组观察结果之间的差异。
数据集:
structure(list(IDYEAR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "A0712002", class = "factor"),
MONTH = c(12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), DAY = c(5L,
6L, 6L, 7L, 8L, 8L, 9L, 9L, 10L, 12L, 12L, 13L, 13L, 13L,
14L, 14L, 14L, 15L, 15L), YEAR = c(2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L), HOUR = c(9L, 19L,
23L, 1L, 1L, 3L, 19L, 21L, 17L, 17L, 19L, 17L, 19L, 23L,
3L, 9L, 19L, 3L, 11L), MINUTE = c(43L, 43L, 43L, 42L, 42L,
42L, 42L, 43L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L,
42L, 42L), SECOND = c(24L, 13L, 13L, 41L, 54L, 54L, 54L,
12L, 54L, 54L, 48L, 43L, 59L, 55L, 43L, 44L, 54L, 43L, 55L
), DATETIME = structure(c(12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 7L, 11L, 10L), .Label = c("12/10/2002 17:42",
"12/12/2002 17:42", "12/12/2002 19:42", "12/13/2002 17:42",
"12/13/2002 19:42", "12/13/2002 23:42", "12/14/2002 19:42",
"12/14/2002 3:42", "12/14/2002 9:42", "12/15/2002 11:42",
"12/15/2002 3:42", "12/5/2002 9:43", "12/6/2002 19:43", "12/6/2002 23:43",
"12/7/2002 1:42", "12/8/2002 1:42", "12/8/2002 3:42", "12/9/2002 19:42",
"12/9/2002 21:43"), class = "factor"), GRP1700 = c(873L,
873L, 874L, 875L, 875L, 876L, 876L, 876L, 876L, 876L, 877L,
877L, 877L, 877L, 877L, 878L, 878L, 878L, 879L), ID1700 = structure(c(1L,
1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 7L), .Label = c("A0712002873", "A0712002874", "A0712002875",
"A0712002876", "A0712002877", "A0712002878", "A0712002879"
), class = "factor")), .Names = c("IDYEAR", "MONTH", "DAY",
"YEAR", "HOUR", "MINUTE", "SECOND", "DATETIME", "GRP1700", "ID1700"
), class = "data.frame", row.names = c(NA, -19L))
代码
rm(list = ls())
dfa1<-read.csv("test.csv")
head(dfa1)
dput(dfa1)
dfa1[["TESTDATE"]]<-as.POSIXct(dfa1$DATETIME,format="%m/%d/%Y %H:%M",tz="GMT")
dfa1$ID1700<-as.factor(dfa1$ID1700)
dfa1<-dfa1 %>%
arrange(IDYEAR, GRP1700, TESTDATE) %>%
group_by(ID1700) %>%
mutate(TIME1700 = TESTDATE - lag (TESTDATE))
write.csv(dfa1, "test2.csv")
输出:
TESTDATE TIME1700
1 2002-12-05 09:43:00 NA days
2 2002-12-06 19:43:00 1.416667 days
3 2002-12-06 23:43:00 NA days
4 2002-12-07 01:42:00 NA days
5 2002-12-08 01:42:00 1.000000 days
6 2002-12-08 03:42:00 NA days
7 2002-12-09 19:42:00 40.000000 days
8 2002-12-09 21:43:00 2.016667 days
9 2002-12-10 17:42:00 19.983333 days
10 2002-12-12 17:42:00 48.000000 days
11 2002-12-12 19:42:00 NA days
12 2002-12-13 17:42:00 22.000000 days
13 2002-12-13 19:42:00 2.000000 days
14 2002-12-13 23:42:00 4.000000 days
15 2002-12-14 03:42:00 4.000000 days
16 2002-12-14 09:42:00 NA days
17 2002-12-14 19:42:00 10.000000 days
18 2002-12-15 03:42:00 8.000000 days
19 2002-12-15 11:42:00 NA days
我注意到一些输出是以小时为单位(第8行 - 第7行),而有些输出是以天为单位(第5行 - 第4行)。如何获得一致的输出(小时是首选)?提前谢谢。