我正在尝试按照8天的时间段对数据进行分组,我发现第一年的代码运行良好,但第二年的代码运行不正常。我遇到了这个问题。
df$date<-as.Date(df$date)
z <-aggregate(df$variable,
list(Dts = cut(as.Date(df$date, format="%d/%m/%Y"),
breaks="8 day")), FUN=sum)
我有不同的年份,我的数据如下:
structure(list(date = structure(c(5583, 5584, 5585, 5586, 5587,
5588, 5589, 5590, 5591, 5592, 5593, 5594, 5595, 5596, 5597, 5598,
5599, 5600, 5601, 5602, 5603, 5604, 5605, 5606, 5607, 5608, 5609,
5610, 5611, 5612, 5613, 5614, 5615, 5616, 5617, 5618, 5619, 5620,
5621, 5622, 5623, 5624, 5625, 5626, 5627, 5628, 5629, 5630, 5631,
5632, 5633, 5634, 5635, 5636, 5637, 5638, 5639, 5640, 5641, 5642,
5643, 5644, 5645, 5646, 5948, 5949, 5950, 5951, 5952, 5953, 5954,
5955, 5956, 5957, 5958, 5959, 5960, 5961, 5962, 5963, 5964, 5965,
5966, 5967, 5968, 5969, 5970, 5971, 5972, 5973, 5974, 5975, 5976,
5977, 5978, 5979, 5980, 5981, 5982, 5983, 5984, 5985, 5986, 5987,
5988, 5989, 5990, 5991, 5992, 5993, 5994, 5995, 5996, 5997, 5998,
5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009,
6010, 6011, 6313, 6314, 6315, 6316, 6317, 6318, 6319, 6320, 6321,
6322, 6323, 6324, 6325, 6326, 6327, 6328, 6329, 6330, 6331, 6332,
6333, 6334, 6335, 6336, 6337, 6338, 6339, 6340, 6341, 6342, 6343,
6344, 6345, 6346, 6347, 6348, 6349, 6350, 6351, 6352, 6353, 6354,
6355, 6356, 6357, 6358, 6359, 6360, 6361, 6362, 6363, 6364, 6365,
6366, 6367, 6368, 6369, 6370, 6371, 6372, 6373, 6374, 6375, 6376
), class = "Date"), variable = c(2, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0)), .Names = c("date", "variable"), row.names = c(105L,
106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L,
117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L,
128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L,
139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L,
150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L,
161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 470L, 471L, 472L,
473L, 474L, 475L, 476L, 477L, 478L, 479L, 480L, 481L, 482L, 483L,
484L, 485L, 486L, 487L, 488L, 489L, 490L, 491L, 492L, 493L, 494L,
495L, 496L, 497L, 498L, 499L, 500L, 501L, 502L, 503L, 504L, 505L,
506L, 507L, 508L, 509L, 510L, 511L, 512L, 513L, 514L, 515L, 516L,
517L, 518L, 519L, 520L, 521L, 522L, 523L, 524L, 525L, 526L, 527L,
528L, 529L, 530L, 531L, 532L, 533L, 835L, 836L, 837L, 838L, 839L,
840L, 841L, 842L, 843L, 844L, 845L, 846L, 847L, 848L, 849L, 850L,
851L, 852L, 853L, 854L, 855L, 856L, 857L, 858L, 859L, 860L, 861L,
862L, 863L, 864L, 865L, 866L, 867L, 868L, 869L, 870L, 871L, 872L,
873L, 874L, 875L, 876L, 877L, 878L, 879L, 880L, 881L, 882L, 883L,
884L, 885L, 886L, 887L, 888L, 889L, 890L, 891L, 892L, 893L, 894L,
895L, 896L, 897L, 898L), class = "data.frame")
这是我得到的输出,第9行应该再次从1986-04-15开始:
Dts x
1 1985-04-15 2
2 1985-04-23 0
3 1985-05-01 0
4 1985-05-09 0
5 1985-05-17 0
6 1985-05-25 0
7 1985-06-02 0
8 1985-06-10 0
9 1986-04-10 0
10 1986-04-18 0
11 1986-04-26 0
12 1986-05-04 NA
13 1986-05-12 NA
14 1986-05-20 NA
15 1986-05-28 NA
16 1986-06-05 NA
17 1986-06-13 NA
18 1987-04-13 0
19 1987-04-21 0
20 1987-04-29 0
21 1987-05-07 0
22 1987-05-15 0
23 1987-05-23 0
24 1987-05-31 0
25 1987-06-08 0
26 1987-06-16 0
数据在那里,但我不知道出了什么问题。非常感谢你的帮助。
答案 0 :(得分:1)
如果从1985年4月15日开始以8天为增量,那么1986年4月10日将是8天期间之一的开始。这是一种使用1985年使用的其他年份的休息时间的方法:
要设置cut
函数的日期中断,请在将1985-04-15表示的年份的日期除以8时获取余数。然后使用此选项选择{{范围内的所有日期1}}具有相同的余数。这将给所有年份的同一天(除了我们需要调整闰年,因为额外的一天)。这些是我们将用于df$date
函数的日期中断。
请注意,此方法导致每年的最后一个时间间隔为五天而不是八天。此外,在闰年,2月29日左右的间隔为9天。
cut
library(lubridate) library(dplyr) index = yday(df$date[1]) %% 8 dates = seq(min(df$date), max(df$date), "1 day") breaks = dates[yday(dates) %% 8 == index] # Ensure that leap years have same breaks as other years breaks = if_else(year(breaks) %in% seq(1980,2020,4) & yday(breaks) > yday(as.Date("1980-02-28")), breaks + 1, breaks) aggregate(df$variable, list(Dts = cut(df$date, breaks=breaks)), FUN=sum)