如何使用R将数据帧转换为时间序列

时间:2018-04-07 07:04:02

标签: r time-series

                         f1                      f2                 
1                        11/1/16                0
2                         12/1/16               0
3                         11/2/16           56.25
4                         12/2/16               0
5                         11/3/16           56.25
6                         12/3/16               0
7                         11/4/16             111
8                         12/4/16               0
9                         11/5/16               0
10                        12/5/16              13
11                        11/6/16               0
12                        12/6/16               0
13                        11/7/16               0
14                        12/7/16               0
15                        10/8/16               0
16                        11/8/16              35
17                        12/8/16               0
18                        10/9/16               0
19                        11/9/16          415.21
20                        12/9/16               0
21                       10/10/16               0
22                       11/10/16             280
23                       12/10/16             817
24                       10/11/16             830
25                       11/11/16             644
26                       12/11/16               0
27                       10/12/16               0
28                       11/12/16              90
29                       12/12/16               0
30                         1/1/17               0
31                         2/1/17             250
32                         2/1/17               0
33                         3/1/17              45
34                         3/1/17             184
35                         4/1/17          578.16
36                         4/1/17             160
37                         5/1/17              21
38                         5/1/17               0
39                         6/1/17             352
40                         6/1/17               0
41                         6/1/17            2089
42                         7/1/17               0
43                         7/1/17               0
44                         7/1/17             855
45                         8/1/17             488
46                         8/1/17             573
47                         8/1/17             654
48                         9/1/17               0
49                         9/1/17               0
50                         9/1/17            1995
51                        10/1/17               0
52                        10/1/17               0
53                        10/1/17               0
54                        11/1/17               0
55                        11/1/17             115
56                        11/1/17            2147
57                        12/1/17              74
58                        12/1/17               0
59                        12/1/17            1431
60                         1/2/17              50

在上面的数据集中给出了如何将此数据集转换为如下所示的时间序列数据,如下所示,以获取月份的值,并在同一个月添加值。

       Jan            Feb   March    April   May  June  July  August  Sep      Oct   Nov    Dec
2016   0             56.25   56.25     111     13    0     0     35       415.21   1097  1474  90
2017  12011.16    50       0           0       0     0     0      0           0      0       0    0

我使用过ts功能,但我不知道如何找到总和并以适当的格式排列。

dat =  ts(data, start=c(2016,1), end=c(2017,1), frequency=12)

1 个答案:

答案 0 :(得分:0)

以下是使用dplyrtidyrlubridate的解决方案。

dat %>%
  mutate(f1 = as.POSIXct(dat$f1, format = '%d/%m/%Y')) %>%
  mutate(month = month.abb[month(f1)], year = year(f1)) %>%
  group_by(month, year) %>%
  summarise(sum = sum(f2)) %>% 
  spread(month, sum, fill = 0) %>%
  select(year, month.abb)

# # A tibble: 2 x 13
#    year    Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
#   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1   16.     0.  56.2  56.2  111.   13.    0.    0.   35.  415. 1097. 1474.   90.
# 2   17. 12011.  50.0   0.     0.    0.    0.    0.    0.    0.    0.    0.    0.

数据

dat <- structure(list(f1 = c("11/1/16", "12/1/16", "11/2/16", "12/2/16", 
"11/3/16", "12/3/16", "11/4/16", "12/4/16", "11/5/16", "12/5/16", 
"11/6/16", "12/6/16", "11/7/16", "12/7/16", "10/8/16", "11/8/16", 
"12/8/16", "10/9/16", "11/9/16", "12/9/16", "10/10/16", "11/10/16", 
"12/10/16", "10/11/16", "11/11/16", "12/11/16", "10/12/16", "11/12/16", 
"12/12/16", "1/1/17", "2/1/17", "2/1/17", "3/1/17", "3/1/17", 
"4/1/17", "4/1/17", "5/1/17", "5/1/17", "6/1/17", "6/1/17", "6/1/17", 
"7/1/17", "7/1/17", "7/1/17", "8/1/17", "8/1/17", "8/1/17", "9/1/17", 
"9/1/17", "9/1/17", "10/1/17", "10/1/17", "10/1/17", "11/1/17", 
"11/1/17", "11/1/17", "12/1/17", "12/1/17", "12/1/17", "1/2/17"
), f2 = c(0, 0, 56.25, 0, 56.25, 0, 111, 0, 0, 13, 0, 0, 0, 0, 
0, 35, 0, 0, 415.21, 0, 0, 280, 817, 830, 644, 0, 0, 90, 0, 0, 
250, 0, 45, 184, 578.16, 160, 21, 0, 352, 0, 2089, 0, 0, 855, 
488, 573, 654, 0, 0, 1995, 0, 0, 0, 0, 115, 2147, 74, 0, 1431, 
50)), .Names = c("f1", "f2"), class = "data.frame", row.names = c(NA, 
-60L))

修改

要转换为时间序列,您可以执行以下操作:

dat2 <- dat %>%
  mutate(f1 = as.POSIXct(dat$f1, format = '%d/%m/%Y')) %>%
  mutate(month = month.abb[month(f1)], year = year(f1)) %>%
  group_by(month, year) %>%
  summarise(sum = sum(f2)) %>% 
  spread(month, sum, fill = 0) %>%
  select(year, month.abb) %>%
  select(-year) %>%
  as.matrix() %>%
  as.vector() 
dat.ts <- ts(dat2, start = c(2016, 1), frequency = 12) 
dat.ts