Question

这是我的data_set_final

       state   city   base     year_1995  year_1996
1:      AZ     Bell 170104.516  0.448619 0.4019198
2:      AZ   Benson   1343.781  0.448619 0.4019198
3:      AZ  Buckeye  34155.261  0.448619 0.4019198

像

一样按月计算CAGR

monthly <- data_set_final %>%
                  group_by (state,city) %>%
mutate (Jan_1995 = base * (((1 + year_1995)^(1/12))-1) + base)   %>%
mutate (Feb_1995 = Jan_1995 * (((1 + year_1995)^(1/12))-1) + Jan_1995)%>%
mutate (Mar_1995 = Feb_1995 * (((1 + year_1995)^(1/12))-1) + Feb_1995) %>%
mutate (Apr_1995 = Mar_1995 * (((1 + year_1995)^(1/12))-1) + Mar_1995) %>%
mutate (May_1995 = Apr_1995 * (((1 + year_1995)^(1/12))-1) + Apr_1995) %>%
mutate (Jun_1995 = May_1995 * (((1 + year_1995)^(1/12))-1) + May_1995) %>%

一直到2000年。想要得到日期'1995-01-31'而不是Jan_1995，因为我拥有这么多年的数据，所以也没有任何动态的计算方法。修改后的输出有点像下面的

state   city     Jan_1995  Feb_1995 Mar_1995 Apr_1995 
 AZ      Bell      175440.  180943.  186618.  192472.    
 AZ      Benson      1386.    1429.    1474.    1520.       
 AZ      Buckeye    35227.   36331.   37471.   38646

在这里，我希望通过将所有每月日期（日期列而不是Jan_1995）作为行来融化我的OP。

state city   date        value
AZ    Bell  1995-01-31  175440
AZ    Bell  1995-02-29  180943
AZ    Bell  1995-03-31  186618

有人可以帮我吗

Answer 1

首先，您可以大大简化您要进行的计算

library(tidyverse)

data_set_final %>%
  group_by(state, city) %>%
  mutate(Jan_1995 = base * (1 + year_1995)^(1/12), 
         Feb_1995 = base * (1 + year_1995)^(2/12),
         Mar_1995 = base * (1 + year_1995)^(3/12),
         Apr_1995 = base * (1 + year_1995)^(4/12),
         May_1995 = base * (1 + year_1995)^(5/12),
         Jun_1995 = base * (1 + year_1995)^(6/12))

# A tibble: 3 x 11
# Groups:   state, city [3]
#   state city       base year_1995 year_1996 Jan_1995 Feb_1995 Mar_1995 Apr_1995 May_1995 Jun_1995
#   <chr> <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
# 1 AZ    Bell    170105.     0.449     0.402  175440.  180943.  186618.  192472.  198509.  204735.
# 2 AZ    Benson    1344.     0.449     0.402    1386.    1429.    1474.    1520.    1568.    1617.
# 3 AZ    Buckeye  34155.     0.449     0.402   35227.   36331.   37471.   38646.   39859.   41109.

如您所见，它等于

data_set_final %>%
  group_by(state, city) %>%
  mutate (Jan_1995 = base * (((1 + year_1995)^(1/12))-1) + base)   %>%
  mutate (Feb_1995 = Jan_1995 * (((1 + year_1995)^(1/12))-1) + Jan_1995)%>%
  mutate (Mar_1995 = Feb_1995 * (((1 + year_1995)^(1/12))-1) + Feb_1995) %>%
  mutate (Apr_1995 = Mar_1995 * (((1 + year_1995)^(1/12))-1) + Mar_1995) %>%
  mutate (May_1995 = Apr_1995 * (((1 + year_1995)^(1/12))-1) + Apr_1995) %>%
  mutate (Jun_1995 = May_1995 * (((1 + year_1995)^(1/12))-1) + May_1995)

# A tibble: 3 x 11
# Groups:   state, city [3]
#   state city       base year_1995 year_1996 Jan_1995 Feb_1995 Mar_1995 Apr_1995 May_1995 Jun_1995
#   <chr> <chr>     <dbl>     <dbl>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
# 1 AZ    Bell    170105.     0.449     0.402  175440.  180943.  186618.  192472.  198509.  204735.
# 2 AZ    Benson    1344.     0.449     0.402    1386.    1429.    1474.    1520.    1568.    1617.
# 3 AZ    Buckeye  34155.     0.449     0.402   35227.   36331.   37471.   38646.   39859.   41109.

现在使用，我们可以通过

计算每年的新基准

data_cagr <- data_set_final %>%
  group_by(state, city) %>%
  gather(year, perc, -state, -city, -base) %>% 
  mutate(year = parse_number(year)) %>% 
  arrange(year) %>% 
  mutate(previous_year = lag(perc), 
         year_base = if_else(!is.na(previous_year), base*(1 + previous_year), base)) 

data_cagr
# A tibble: 6 x 7
# Groups:   state, city [3]
#   state city       base  year  perc previous_year year_base
#   <chr> <chr>     <dbl> <dbl> <dbl>         <dbl>     <dbl>
# 1 AZ    Bell    170105.  1995 0.449        NA       170105.
# 2 AZ    Benson    1344.  1995 0.449        NA         1344.
# 3 AZ    Buckeye  34155.  1995 0.449        NA        34155.
# 4 AZ    Bell    170105.  1996 0.402         0.449   246417.
# 5 AZ    Benson    1344.  1996 0.402         0.449     1947.

然后每个月计算正确的值

data_cagr %>% 
  rowwise() %>% 
  mutate(CAGR = 
           list(map_df(1:12, ~ tibble(month = str_pad(.x, width = 2, side = "left", pad = "0"), 
                                      value = year_base * (1 + perc)^(.x/12))))) %>% 
  unnest(CAGR) %>% 
  unite(date, year, month, sep = "-") %>% 
  mutate(date = parse_date(date, format = "%Y-%m") + months(1) - days(1)) %>% 
  select(-previous_year, -base, -year_base, -perc)   

# A tibble: 72 x 4
#    state city  date         value
#    <chr> <chr> <date>       <dbl>
#  1 AZ    Bell  1995-01-31 175440.
#  2 AZ    Bell  1995-02-28 180943.
#  3 AZ    Bell  1995-03-31 186618.
#  4 AZ    Bell  1995-04-30 192472.
#  5 AZ    Bell  1995-05-31 198509.
#  6 AZ    Bell  1995-06-30 204735.
#  7 AZ    Bell  1995-07-31 211157.
#  8 AZ    Bell  1995-08-31 217780.
#  9 AZ    Bell  1995-09-30 224611.
# 10 AZ    Bell  1995-10-31 231656.
# ... with 62 more rows

数据

data_set_final <- 
  structure(list(state = c("AZ", "AZ", "AZ"), 
                 city = c("Bell", "Benson", "Buckeye"), 
                 base = c(170104.516, 1343.781, 34155.261), 
                 year_1995 = c(0.448619, 0.448619, 0.448619), 
                 year_1996 = c(0.4019198, 0.4019198, 0.4019198)), 
            row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))

在R中的dplyr中使用mutate函数创建一个具有日期值（2000-05-06）的新列

1 个答案: