我试图计算分组数据的年增长率;按1)组,2)产品类别和3)年分组。
我尝试按这三个参数进行分组,然后将增长率计算为:((x / dplyr :: lag(x,1))-1)* 100。但是,此操作将完全导致新列的不适用。
group_exports_g.rate <- baci_exports %>% ungroup() %>%
group_by(group,sna,t) %>% summarise(exports = sum(exports),
n= sum(n)) %>% ungroup() %>%
group_by(group,sna,t) %>% arrange(group,sna,t) %>%
mutate(gr.exports = 100*((exports/lag(exports,1))-1))
dput(baci_exports)
structure(list(t = c(1995, 1995, 1995, 1995, 1995, 1995, 1995,
1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995,
1995, 1995), i = c(4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
8, 12, 12, 12, 12, 12), sna = c("Capital goods", "Consumer goods",
"Fuels and lubricants", "Intermediate goods", "Parts and accessories of capital goods",
"Passenger motor cars", "Primary goods", "Capital goods", "Consumer goods",
"Fuels and lubricants", "Goods not elsewhere specified", "Intermediate goods",
"Parts and accessories of capital goods", "Passenger motor cars",
"Primary goods", "Capital goods", "Consumer goods", "Fuels and lubricants",
"Intermediate goods", "Parts and accessories of capital goods"
), exports = c(1195.2624224154, 22997.0533036558, 5.3693833059,
9720.564817782, 183.9809856813, 111.2556313572, 47976.2051093033,
5711.1685900189, 120659.569187797, 2264.5410811663, 75.806, 60184.2530566294,
2249.821, 280.439, 36458.4499193458, 30316.017597217, 150361.471507771,
8686043.01945958, 448718.59940394, 6633.9115459232), n = c(36L,
207L, 1L, 137L, 30L, 5L, 58L, 143L, 654L, 5L, 3L, 322L, 57L,
12L, 143L, 270L, 364L, 106L, 563L, 150L), name_baci = c("Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Albania", "Albania", "Albania", "Albania", "Albania",
"Albania", "Albania", "Albania", "Algeria", "Algeria", "Algeria",
"Algeria", "Algeria"), cntry = c("Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Albania", "Albania", "Albania", "Albania", "Albania", "Albania",
"Albania", "Albania", "Algeria", "Algeria", "Algeria", "Algeria",
"Algeria"), group = c("Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "SE",
"SE", "SE", "SE", "SE", "SE", "SE", "SE", "Algeria", "Algeria",
"Algeria", "Algeria", "Algeria")), row.names = c(NA, -20L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(t = c(1995,
1995, 1995), i = c(4, 8, 12), .rows = list(1:7, 8:15, 16:20)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
>
structure(list(t = c(1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
2013, 2014, 2015, 2016, 2017), i = c(4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), sna = c("Capital goods",
"Capital goods", "Capital goods", "Capital goods", "Capital goods",
"Capital goods", "Capital goods", "Capital goods", "Capital goods",
"Capital goods", "Capital goods", "Capital goods", "Capital goods",
"Capital goods", "Capital goods", "Capital goods", "Capital goods",
"Capital goods", "Capital goods", "Capital goods", "Capital goods",
"Capital goods", "Capital goods"), exports = c(1195.2624224154,
1487.4614064276, 1525.1489543903, 1598.3509917338, 2687.9284436967,
1754.2923408387, 5913.8963941332, 2619.5146133123, 2915.904116471,
6412.63136988, 8158.0444400432, 4312.0519543819, 31170.4665315818,
39146.7973036179, 184970.163402516, 20611.8847778549, 28046.1267778067,
12125.1990587805, 15435.0095479273, 15942.6566817083, 10549.8178035657,
9092.5422325593, 7967.3875079918), n = c(36L, 49L, 71L, 50L,
64L, 88L, 107L, 76L, 105L, 138L, 191L, 178L, 175L, 264L, 448L,
306L, 210L, 232L, 306L, 280L, 207L, 180L, 198L), name_baci = c("Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan"), cntry = c("Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan"), group = c("Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan"
)), row.names = c(NA, -23L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), groups = structure(list(t = c(1995, 1996,
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017),
i = c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4), .rows = list(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L)), row.names = c(NA, -23L), class = c("tbl_df",
"tbl", "data.frame")))
我希望每个组,产品类别和年份的年增长率(百分比变化)。
答案 0 :(得分:2)
正如@Croote所指出的,您的样本数据只有一年,因此计算延迟会(正确)产生NA。我想这就是您想要的,因为1995年的增长不确定。
要用两年的数据测试您的数据,下面是一些假数据,其中每个值均为大约。明年提高50%:
public class ImplementationGetter {
@Inject
private IterableProvider<MyInterface> interfaceProvider;
public MyInterface getImplementation(final String type_) {
return interfaceProvider.named(type_).get();
}
}
此代码似乎产生了正确的输出:(请注意,每个baci_exports2 <- bind_rows(
baci_exports %>% ungroup(),
baci_exports %>%
ungroup() %>%
mutate(t = 1996,
exports = exports * rnorm(n(), mean = 1.5, sd = 0.01))
)
接近50,即增长率为50%,并且gr.exports
应用于伪造数据的第二年。)>
~1.5x
使用OP中的新阿富汗数据进行编辑:
cgroup_exports_g.rate <- baci_exports2 %>%
group_by(group,sna,t) %>%
summarise(exports = sum(exports),
n= sum(n)) %>%
mutate(gr.exports = 100*((exports/lag(exports,1))-1)) %>%
ungroup()
> cgroup_exports_g.rate
# A tibble: 40 x 6
group sna t exports n gr.exports
<chr> <chr> <dbl> <dbl> <int> <dbl>
1 Afghanistan Capital goods 1995 1195. 36 NA
2 Afghanistan Capital goods 1996 1784. 36 49.3
3 Afghanistan Consumer goods 1995 22997. 207 NA
4 Afghanistan Consumer goods 1996 34932. 207 51.9
5 Afghanistan Fuels and lubricants 1995 5.37 1 NA
6 Afghanistan Fuels and lubricants 1996 8.00 1 49.0
7 Afghanistan Intermediate goods 1995 9721. 137 NA
8 Afghanistan Intermediate goods 1996 14647. 137 50.7
9 Afghanistan Parts and accessories of capital goods 1995 184. 30 NA
10 Afghanistan Parts and accessories of capital goods 1996 272. 30 47.9
# ... with 30 more rows
答案 1 :(得分:1)
在这里,您需要为lag
表达式设置默认值,因为第一个值的滞后默认值为NA
。然后导致NA
感染您的其余计算。
因此,设置lag(exports, 1, 1)
(您应根据自己的目的决定默认值,例如,我将其设置为1)
group_exports_g.rate <- baci_exports %>% ungroup() %>%
group_by(group,sna,t) %>% summarise(exports = sum(exports),
n= sum(n)) %>% ungroup() %>%
group_by(group,sna,t) %>% arrange(group,sna,t) %>%
mutate(gr.exports = 100*((exports/lag(exports,1, 1))-1))
group_exports_g.rate
# A tibble: 20 x 6
# Groups: group, sna, t [20]
group sna t exports n gr.exports
<chr> <chr> <dbl> <dbl> <int> <dbl>
1 Afghanistan Capital goods 1995 1195. 36 119426.
2 Afghanistan Consumer goods 1995 22997. 207 2299605.
3 Afghanistan Fuels and lubricants 1995 5.37 1 437.
4 Afghanistan Intermediate goods 1995 9721. 137 971956.
5 Afghanistan Parts and accessories of capital goods 1995 184. 30 18298.
6 Afghanistan Passenger motor cars 1995 111. 5 11026.
7 Afghanistan Primary goods 1995 47976. 58 4797521.
8 Algeria Capital goods 1995 30316. 270 3031502.
9 Algeria Consumer goods 1995 150361. 364 15036047.
10 Algeria Fuels and lubricants 1995 8686043. 106 868604202.
11 Algeria Intermediate goods 1995 448719. 563 44871760.
12 Algeria Parts and accessories of capital goods 1995 6634. 150 663291.
13 SE Capital goods 1995 5711. 143 571017.
14 SE Consumer goods 1995 120660. 654 12065857.
15 SE Fuels and lubricants 1995 2265. 5 226354.
16 SE Goods not elsewhere specified 1995 75.8 3 7481.
17 SE Intermediate goods 1995 60184. 322 6018325.
18 SE Parts and accessories of capital goods 1995 2250. 57 224882.
19 SE Passenger motor cars 1995 280. 12 27944.
20 SE Primary goods 1995 36458. 143 3645745.