我正在向我的data wrangling大部分时间转移到dplyr,但我无法弄清楚如何基于它的因子来进行行差异。
我可以使用plyr中的ddply,如下所示:
ddply(.data = dat_frame, .variables = .(the_factor), .fun = summarise, diff = diff(the_number))
the_factor diff
1 169 0.000
2 169 0.000
3 372 22.557
4 372 0.000
5 372 -19.491
6 372 2.940
7 372 -2.767
8 372 -5.310
9 508 0.000
来源数据:
structure(list(the_factor = structure(c(3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 8L, 7L, 10L, 5L, 9L, 2L, 2L, 2L, 1L, 11L, 6L, 6L), .Label = c("166",
"169", "276", "372", "409", "508", "523", "714", "846", "876",
"969"), class = "factor"), the_date = structure(c(4L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 1L, 8L, 7L, 2L, 11L, 2L, 3L, 5L, 10L,
8L, 6L, 9L), .Label = c("2012-05-19 21:27:00", "2012-08-02 03:49:00",
"2012-08-02 03:50:00", "2012-08-02 03:52:00", "2012-08-02 08:36:00",
"2013-03-15 03:38:00", "2013-03-15 03:40:00", "2013-03-15 03:41:00",
"2013-03-15 09:14:00", "2013-04-24 13:45:00", "2013-09-04 09:17:00",
"2014-03-12 14:21:00", "2014-03-12 19:45:00", "2014-03-13 04:51:00",
"2014-03-13 21:04:00", "2014-03-14 01:18:00", "2014-03-14 04:49:00",
"2014-03-14 12:09:00"), class = "factor"), the_number = c(0.02,
17.443, 40, 40, 20.509, 23.449, 20.682, 15.372, 0.02, 0.02, 0.02,
0.02, 1.74, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02)), .Names = c("the_factor",
"the_date", "the_number"), row.names = c(NA, -20L), class = "data.frame")