在R中使用tidyverse准备数据时不适用

时间:2018-12-19 08:14:24

标签: r tidyr

我想准备我的时间序列, 这里是初始dput()

df=structure(list(group = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
                            2L, 2L, 2L), year = c(1973L, 1974L, 1975L, 1976L, 1977L, 1978L, 
                                                  1973L, 1974L, 1975L, 1976L, 1977L, 1978L), Jan = c(9007L, 7750L, 
                                                                                                     8162L, 7717L, 7792L, 7836L, 9007L, 7750L, 8162L, 7717L, 7792L, 
                                                                                                     7836L), Feb = c(8106L, 6981L, 7306L, 7461L, 6957L, 6892L, 8106L, 
                                                                                                                     6981L, 7306L, 7461L, 6957L, 6892L), Mar = c(8928L, 8038L, 8124L, 
                                                                                                                                                                 7767L, 7726L, 7791L, 8928L, 8038L, 8124L, 7767L, 7726L, 7791L
                                                                                                                     ), Apr = c(9137L, 8422L, 7870L, 7925L, 8106L, 8192L, 9137L, 8422L, 
                                                                                                                                7870L, 7925L, 8106L, 8192L), May = c(10017L, 8714L, 9387L, 8623L, 
                                                                                                                                                                     8890L, 9115L, 10017L, 8714L, 9387L, 8623L, 8890L, 9115L), Jun = c(10826L, 
                                                                                                                                                                                                                                       9512L, 9556L, 8945L, 9299L, 9434L, 10826L, 9512L, 9556L, 8945L, 
                                                                                                                                                                                                                                       9299L, 9434L), Jul = c(11317L, 10120L, 10093L, 10078L, 10625L, 
                                                                                                                                                                                                                                                              10484L, 11317L, 10120L, 10093L, 10078L, 10625L, 10484L), Aug = c(10744L, 
                                                                                                                                                                                                                                                                                                                               9823L, 9620L, 9179L, 9302L, 9827L, 10744L, 9823L, 9620L, 9179L, 
                                                                                                                                                                                                                                                                                                                               9302L, 9827L), Sep = c(9713L, 8743L, 8285L, 8037L, 8314L, 9110L, 
                                                                                                                                                                                                                                                                                                                                                      9713L, 8743L, 8285L, 8037L, 8314L, 9110L), Oct = c(9938L, 9129L, 
                                                                                                                                                                                                                                                                                                                                                                                                         8466L, 8488L, 8850L, 9070L, 9938L, 9129L, 8466L, 8488L, 8850L, 
                                                                                                                                                                                                                                                                                                                                                                                                         9070L), Nov = c(9161L, 8710L, 8160L, 7874L, 8265L, 8633L, 9161L, 
                                                                                                                                                                                                                                                                                                                                                                                                                         8710L, 8160L, 7874L, 8265L, 8633L), Dec = c(8927L, 8680L, 8034L, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                     8647L, 8796L, 9240L, 8927L, 8680L, 8034L, 8647L, 8796L, 9240L
                                                                                                                                                                                                                                                                                                                                                                                                                         )), .Names = c("group", "year", "Jan", "Feb", "Mar", "Apr", "May", 
                                                                                                                                                                                                                                                                                                                                                                                                                                        "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              -12L))

所以我就像过去的主题中那样

load_pkgs <- c("forecast", "zoo", "timetk", "tidyverse") 
sapply(load_pkgs, function(x) suppressPackageStartupMessages(library(x, character.only = T)))

ld <- split(df[, -1], df$group)

# Tidy-up the splits

ld <- lapply(ld, function(x) {
  x %>%
    gather(key, value, -year) %>%
    unite(date, year, key, sep = "-") %>%
    mutate(date = paste0(date, "-01")) %>%
    mutate(date = as.Date(date, format = "%Y-%b-%d"))    
})

结果我得到:

$`1`
   date value
1  <NA>  9007
2  <NA>  7750
3  <NA>  8162
4  <NA>  7717

但实际上我必须得到如下结果:

         date value
1  1973-01-01  9007
2  1974-01-01  7750
3  1975-01-01  8162
4  1976-01-01  7717
5  1977-01-01  7792
6  1978-01-01  7836

为什么我找不到它?
此代码在我的控制台中不起作用。我安装了所有库。

2 个答案:

答案 0 :(得分:3)

同时,您可以尝试使用lubridate软件包的解决方法:

library(lubridate)
ld <- lapply(ld, function(x) {
  x %>%
    gather(key, value, -year) %>%
    unite(date, year, key, sep = "-") %>%
    mutate(date = paste0(date, "-01")) %>%
    mutate(date =ymd(date))    # here you use it   
})

结果:

$`1`
         date value
1  1973-01-01  9007
2  1974-01-01  7750
3  1975-01-01  8162
4  1976-01-01  7717
5  1977-01-01  7792
...

   $`2`
         date value
1  1973-01-01  9007
2  1974-01-01  7750
3  1975-01-01  8162
4  1976-01-01  7717
5  1977-01-01  7792
...

答案 1 :(得分:1)

使用tidyverselubridate的不同(且可能更快)的解决方案:

lapply(ld, function(x) {
 x %>%
 gather(var, value, -year) %>%
 mutate(date = ymd(paste(year, match(var, month.abb), "01", sep = "-"))) %>%
 select(-year, -var) 
})

$`1`
   value       date
1   9007 1973-01-01
2   7750 1974-01-01
3   8162 1975-01-01
4   7717 1976-01-01
5   7792 1977-01-01
6   7836 1978-01-01
7   8106 1973-02-01
8   6981 1974-02-01
9   7306 1975-02-01
10  7461 1976-02-01

$`2`
   value       date
1   9007 1973-01-01
2   7750 1974-01-01
3   8162 1975-01-01
4   7717 1976-01-01
5   7792 1977-01-01
6   7836 1978-01-01
7   8106 1973-02-01
8   6981 1974-02-01
9   7306 1975-02-01
10  7461 1976-02-01

首先,它正在重塑数据。然后,将年份,缩写的月份转换为数字,并将“ 01”粘贴为1,然后通过ymd()将其转换为日期。