f1 f2
1 11/1/16 0
2 12/1/16 0
3 11/2/16 56.25
4 12/2/16 0
5 11/3/16 56.25
6 12/3/16 0
7 11/4/16 111
8 12/4/16 0
9 11/5/16 0
10 12/5/16 13
11 11/6/16 0
12 12/6/16 0
13 11/7/16 0
14 12/7/16 0
15 10/8/16 0
16 11/8/16 35
17 12/8/16 0
18 10/9/16 0
19 11/9/16 415.21
20 12/9/16 0
21 10/10/16 0
22 11/10/16 280
23 12/10/16 817
24 10/11/16 830
25 11/11/16 644
26 12/11/16 0
27 10/12/16 0
28 11/12/16 90
29 12/12/16 0
30 1/1/17 0
31 2/1/17 250
32 2/1/17 0
33 3/1/17 45
34 3/1/17 184
35 4/1/17 578.16
36 4/1/17 160
37 5/1/17 21
38 5/1/17 0
39 6/1/17 352
40 6/1/17 0
41 6/1/17 2089
42 7/1/17 0
43 7/1/17 0
44 7/1/17 855
45 8/1/17 488
46 8/1/17 573
47 8/1/17 654
48 9/1/17 0
49 9/1/17 0
50 9/1/17 1995
51 10/1/17 0
52 10/1/17 0
53 10/1/17 0
54 11/1/17 0
55 11/1/17 115
56 11/1/17 2147
57 12/1/17 74
58 12/1/17 0
59 12/1/17 1431
60 1/2/17 50
在上面的数据集中给出了如何将此数据集转换为如下所示的时间序列数据,如下所示,以获取月份的值,并在同一个月添加值。
Jan Feb March April May June July August Sep Oct Nov Dec
2016 0 56.25 56.25 111 13 0 0 35 415.21 1097 1474 90
2017 12011.16 50 0 0 0 0 0 0 0 0 0 0
我使用过ts功能,但我不知道如何找到总和并以适当的格式排列。
dat = ts(data, start=c(2016,1), end=c(2017,1), frequency=12)
答案 0 :(得分:0)
以下是使用dplyr
,tidyr
和lubridate
的解决方案。
dat %>%
mutate(f1 = as.POSIXct(dat$f1, format = '%d/%m/%Y')) %>%
mutate(month = month.abb[month(f1)], year = year(f1)) %>%
group_by(month, year) %>%
summarise(sum = sum(f2)) %>%
spread(month, sum, fill = 0) %>%
select(year, month.abb)
# # A tibble: 2 x 13
# year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 16. 0. 56.2 56.2 111. 13. 0. 0. 35. 415. 1097. 1474. 90.
# 2 17. 12011. 50.0 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
dat <- structure(list(f1 = c("11/1/16", "12/1/16", "11/2/16", "12/2/16",
"11/3/16", "12/3/16", "11/4/16", "12/4/16", "11/5/16", "12/5/16",
"11/6/16", "12/6/16", "11/7/16", "12/7/16", "10/8/16", "11/8/16",
"12/8/16", "10/9/16", "11/9/16", "12/9/16", "10/10/16", "11/10/16",
"12/10/16", "10/11/16", "11/11/16", "12/11/16", "10/12/16", "11/12/16",
"12/12/16", "1/1/17", "2/1/17", "2/1/17", "3/1/17", "3/1/17",
"4/1/17", "4/1/17", "5/1/17", "5/1/17", "6/1/17", "6/1/17", "6/1/17",
"7/1/17", "7/1/17", "7/1/17", "8/1/17", "8/1/17", "8/1/17", "9/1/17",
"9/1/17", "9/1/17", "10/1/17", "10/1/17", "10/1/17", "11/1/17",
"11/1/17", "11/1/17", "12/1/17", "12/1/17", "12/1/17", "1/2/17"
), f2 = c(0, 0, 56.25, 0, 56.25, 0, 111, 0, 0, 13, 0, 0, 0, 0,
0, 35, 0, 0, 415.21, 0, 0, 280, 817, 830, 644, 0, 0, 90, 0, 0,
250, 0, 45, 184, 578.16, 160, 21, 0, 352, 0, 2089, 0, 0, 855,
488, 573, 654, 0, 0, 1995, 0, 0, 0, 0, 115, 2147, 74, 0, 1431,
50)), .Names = c("f1", "f2"), class = "data.frame", row.names = c(NA,
-60L))
要转换为时间序列,您可以执行以下操作:
dat2 <- dat %>%
mutate(f1 = as.POSIXct(dat$f1, format = '%d/%m/%Y')) %>%
mutate(month = month.abb[month(f1)], year = year(f1)) %>%
group_by(month, year) %>%
summarise(sum = sum(f2)) %>%
spread(month, sum, fill = 0) %>%
select(year, month.abb) %>%
select(-year) %>%
as.matrix() %>%
as.vector()
dat.ts <- ts(dat2, start = c(2016, 1), frequency = 12)
dat.ts