考虑以下数据集:
data_example = structure(list(date = structure(c(16764, 16700, 17080, 16969,
16714, 17002, 16713, 17010, NA, 16729, 16756, 16892, 16891, 17135,
17094, 17108, 16861, 16765, NA, 16974, 16960, NA, 17105, 17084,
17086, 17155, 17158, 17157, 17157, 16742, 16741, 17051, 17128,
NA, 17133, 16790, 16743, 17109, 16924, 17158, 17049, 17158, 16806,
16773, 17143, 16799, 16770, 17142, 17021, 16902, 16995, 16862,
16848, NA, 16833, 17065, NA, 16933, 17148, 16721, 16798, 16792,
16724, 17099, 16687, 17049, 16920, 16905, 16745, 17039, 16699,
16840, 16778, 17016, 16709, 16741, 17137, 16821, 16833, 16792,
16948, 16757, 16983, 16632, 16902, 17164, 16653, 16994, 16986,
16986, 17049, 17072, 16874, 16757, 16941, 16988, 16706, 17000,
16745, 16710), class = "Date"), tdiff = c(0, 0, 0, 0, 0, 1, 0,
0, NA, 0, 0, 0, 0, 0, 0, 0, 1, 0, NA, 0, 0, NA, 0, 0, 0, 0, 0,
0, 0, 0, 3, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, NA, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("date", "tdiff"
), row.names = c("1506", "705", "5585", "4182", "894", "4583",
"851", "4628", "NA.169", "1079", "1423", "3263", "3244", "6181",
"5730", "5902", "2786", "1529", "NA.268", "4229", "4061", "NA.166",
"5870", "5606", "5643", "6438", "6494", "6475", "6478", "1276",
"1235", "5180", "6074", "NA.286", "6121", "1800", "1310", "5923",
"3703", "6485", "5137", "6483", "2030", "1617", "6311", "1928",
"1577", "6282", "4763", "3379", "4505", "2828", "2614", "NA.283",
"2416", "5364", "NA.279", "3777", "6355", "994", "1903", "1856",
"1019", "5804", "545", "5140", "3674", "3431", "1342", "5003",
"675", "2509", "1658", "4699", "805", "1260", "6217", "2244",
"2409", "1852", "3894", "1445", "4375", "243", "3386", "6530",
"319", "4502", "4385", "4399", "5147", "5452", "3003", "1444",
"3848", "4443", "761", "4552", "1336", "835"), class = "data.frame")
因此,要获得当前周的tdiff值的总和,我会这样做:
data_example_0 = data_example %>%
mutate(week = as.POSIXct(cut(date, "week")))
data_example_0 %<>%
group_by(week) %>%
mutate(week_sum = sum(tdiff, na.rm = TRUE))
但是我需要创建一个变量来查看下周的tdiff
值的总和; (next_week_sum
)。怎么做?
答案 0 :(得分:1)
只需添加lead(week_sum, 1)
,就可以在没有任何订单或尊重周的情况下对行进行处理。因此,我们必须首先总结每个组的周总和(按周),根据汇总数据添加lead
,然后重新加入原始数据。
# first we have to create a dummy data set, so that the weeks will be handeled on their own
data_example_0 %>%
group_by(week) %>%
summarise(week_sum = sum(tdiff)) %>%
mutate(next_week_sum = lead(week_sum, 1, order_by = week)) %>%
### and then we join the original data
right_join(data_example_0, by = "week")