这是我的data_set_final
state city base year_1995 year_1996
1: AZ Bell 170104.516 0.448619 0.4019198
2: AZ Benson 1343.781 0.448619 0.4019198
3: AZ Buckeye 34155.261 0.448619 0.4019198
像
一样按月计算CAGRmonthly <- data_set_final %>%
group_by (state,city) %>%
mutate (Jan_1995 = base * (((1 + year_1995)^(1/12))-1) + base) %>%
mutate (Feb_1995 = Jan_1995 * (((1 + year_1995)^(1/12))-1) + Jan_1995)%>%
mutate (Mar_1995 = Feb_1995 * (((1 + year_1995)^(1/12))-1) + Feb_1995) %>%
mutate (Apr_1995 = Mar_1995 * (((1 + year_1995)^(1/12))-1) + Mar_1995) %>%
mutate (May_1995 = Apr_1995 * (((1 + year_1995)^(1/12))-1) + Apr_1995) %>%
mutate (Jun_1995 = May_1995 * (((1 + year_1995)^(1/12))-1) + May_1995) %>%
一直到2000年。想要得到日期'1995-01-31'而不是Jan_1995,因为我拥有这么多年的数据,所以也没有任何动态的计算方法。 修改后的输出有点像下面的
state city Jan_1995 Feb_1995 Mar_1995 Apr_1995
AZ Bell 175440. 180943. 186618. 192472.
AZ Benson 1386. 1429. 1474. 1520.
AZ Buckeye 35227. 36331. 37471. 38646
在这里,我希望通过将所有每月日期(日期列而不是Jan_1995)作为行来融化我的OP。
state city date value
AZ Bell 1995-01-31 175440
AZ Bell 1995-02-29 180943
AZ Bell 1995-03-31 186618
有人可以帮我吗
答案 0 :(得分:0)
首先,您可以大大简化您要进行的计算
library(tidyverse)
data_set_final %>%
group_by(state, city) %>%
mutate(Jan_1995 = base * (1 + year_1995)^(1/12),
Feb_1995 = base * (1 + year_1995)^(2/12),
Mar_1995 = base * (1 + year_1995)^(3/12),
Apr_1995 = base * (1 + year_1995)^(4/12),
May_1995 = base * (1 + year_1995)^(5/12),
Jun_1995 = base * (1 + year_1995)^(6/12))
# A tibble: 3 x 11
# Groups: state, city [3]
# state city base year_1995 year_1996 Jan_1995 Feb_1995 Mar_1995 Apr_1995 May_1995 Jun_1995
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 AZ Bell 170105. 0.449 0.402 175440. 180943. 186618. 192472. 198509. 204735.
# 2 AZ Benson 1344. 0.449 0.402 1386. 1429. 1474. 1520. 1568. 1617.
# 3 AZ Buckeye 34155. 0.449 0.402 35227. 36331. 37471. 38646. 39859. 41109.
如您所见,它等于
data_set_final %>%
group_by(state, city) %>%
mutate (Jan_1995 = base * (((1 + year_1995)^(1/12))-1) + base) %>%
mutate (Feb_1995 = Jan_1995 * (((1 + year_1995)^(1/12))-1) + Jan_1995)%>%
mutate (Mar_1995 = Feb_1995 * (((1 + year_1995)^(1/12))-1) + Feb_1995) %>%
mutate (Apr_1995 = Mar_1995 * (((1 + year_1995)^(1/12))-1) + Mar_1995) %>%
mutate (May_1995 = Apr_1995 * (((1 + year_1995)^(1/12))-1) + Apr_1995) %>%
mutate (Jun_1995 = May_1995 * (((1 + year_1995)^(1/12))-1) + May_1995)
# A tibble: 3 x 11
# Groups: state, city [3]
# state city base year_1995 year_1996 Jan_1995 Feb_1995 Mar_1995 Apr_1995 May_1995 Jun_1995
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 AZ Bell 170105. 0.449 0.402 175440. 180943. 186618. 192472. 198509. 204735.
# 2 AZ Benson 1344. 0.449 0.402 1386. 1429. 1474. 1520. 1568. 1617.
# 3 AZ Buckeye 34155. 0.449 0.402 35227. 36331. 37471. 38646. 39859. 41109.
现在使用,我们可以通过
计算每年的新基准data_cagr <- data_set_final %>%
group_by(state, city) %>%
gather(year, perc, -state, -city, -base) %>%
mutate(year = parse_number(year)) %>%
arrange(year) %>%
mutate(previous_year = lag(perc),
year_base = if_else(!is.na(previous_year), base*(1 + previous_year), base))
data_cagr
# A tibble: 6 x 7
# Groups: state, city [3]
# state city base year perc previous_year year_base
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 AZ Bell 170105. 1995 0.449 NA 170105.
# 2 AZ Benson 1344. 1995 0.449 NA 1344.
# 3 AZ Buckeye 34155. 1995 0.449 NA 34155.
# 4 AZ Bell 170105. 1996 0.402 0.449 246417.
# 5 AZ Benson 1344. 1996 0.402 0.449 1947.
然后每个月计算正确的值
data_cagr %>%
rowwise() %>%
mutate(CAGR =
list(map_df(1:12, ~ tibble(month = str_pad(.x, width = 2, side = "left", pad = "0"),
value = year_base * (1 + perc)^(.x/12))))) %>%
unnest(CAGR) %>%
unite(date, year, month, sep = "-") %>%
mutate(date = parse_date(date, format = "%Y-%m") + months(1) - days(1)) %>%
select(-previous_year, -base, -year_base, -perc)
# A tibble: 72 x 4
# state city date value
# <chr> <chr> <date> <dbl>
# 1 AZ Bell 1995-01-31 175440.
# 2 AZ Bell 1995-02-28 180943.
# 3 AZ Bell 1995-03-31 186618.
# 4 AZ Bell 1995-04-30 192472.
# 5 AZ Bell 1995-05-31 198509.
# 6 AZ Bell 1995-06-30 204735.
# 7 AZ Bell 1995-07-31 211157.
# 8 AZ Bell 1995-08-31 217780.
# 9 AZ Bell 1995-09-30 224611.
# 10 AZ Bell 1995-10-31 231656.
# ... with 62 more rows
数据
data_set_final <-
structure(list(state = c("AZ", "AZ", "AZ"),
city = c("Bell", "Benson", "Buckeye"),
base = c(170104.516, 1343.781, 34155.261),
year_1995 = c(0.448619, 0.448619, 0.448619),
year_1996 = c(0.4019198, 0.4019198, 0.4019198)),
row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"))