我正在尝试使用长格式和tidyverse数据操作环境来完成一些估算。即,这个等式:
[(SA_2016/TOTPOP_2016 - SA_2014/TOTPOP_2014 ) / (2016 - 2014)] * (2020 - 2016) + SA_2016/TOTPOP_2016 * TARGET_2020
因此我将需要计算4个变量(我认为)
code
是该地区的名称; year
是2014年或2016年; TN_2020
2020年的目标人口;
SA_2015
- > 2016年i区域总人口group_by(国家,年份,代码)
TOTPOP_2016
- > 2016年全国总人口group_by(国家,年)
SA_2014
- > 2014年第i区的总人口;
TOTPOP_2014
- > 2014年全国总人口;
计算四个变量的代码:
dt %>%
group_by(country, year, code) %>%
mutate(SA_t = sum(value)) %>%
ungroup() %>%
group_by(country, year) %>%
mutate(TOTPOP_t=sum(value),
TN_2020 = sum(Target_2020),
...
我将如何进行下一步:
factor1= [(SA_2016/TOTPOP_2016-SA_2014/TOTPOP_2014)/2]*4 + SA_2016/TOTPOP_2016 * TN_2020 ?
DPUT:
dt =
structure(list(country = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("ZAMBIA", "ZIMBABWE"), class = "factor"),
adm_level = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), code = structure(c(1L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, 5L,
1L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, 5L,
1L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, 5L, 2L, 3L, 6L,
2L, 3L, 6L, 2L, 3L, 6L, 2L, 3L, 6L, 2L, 3L, 6L, 2L, 3L, 6L,
2L, 3L, 6L, 2L, 3L, 6L, 2L, 3L, 6L, 2L, 3L, 6L, 2L, 3L, 6L,
2L, 3L, 6L), .Label = c("BULAWAYOBULAWAYONA", "CENTRALCHIBOMBOCHISAMBA",
"LUSAKALUSAKAKANYAMA", "MASVINGOZAKANA", "MIDLANDSZVISHAVANE URBANNA",
"WESTERNSHANG'OMBOSINJEMBELA"), class = "factor"), year = c(2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L),
value = c(47708L, 16217L, 2684L, 39013L, 15588L, 1896L, 36164L,
17131L, 1568L, 46676L, 15333L, 2644L, 39502L, 15391L, 2020L,
39857L, 16328L, 1826L, 49727L, 17058L, 2789L, 38842L, 15658L,
1882L, 36332L, 17361L, 1570L, 48645L, 16127L, 2747L, 39321L,
15458L, 2004L, 40036L, 16546L, 1828L, 13437L, 22551L, 10394L,
11123L, 18626L, 8552L, 9148L, 14996L, 7195L, 13162L, 21530L,
10676L, 10689L, 18522L, 8404L, 8681L, 15839L, 7082L, 13726L,
23035L, 10616L, 11520L, 19290L, 8856L, 9464L, 15514L, 7443L,
13444L, 21989L, 10902L, 11067L, 19176L, 8699L, 8978L, 16381L,
7323L), sex = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"),
age = c(0L, 0L, 0L, 5L, 5L, 5L, 10L, 10L, 10L, 0L, 0L, 0L,
5L, 5L, 5L, 10L, 10L, 10L, 0L, 0L, 0L, 5L, 5L, 5L, 10L, 10L,
10L, 0L, 0L, 0L, 5L, 5L, 5L, 10L, 10L, 10L, 0L, 0L, 0L, 5L,
5L, 5L, 10L, 10L, 10L, 0L, 0L, 0L, 5L, 5L, 5L, 10L, 10L,
10L, 0L, 0L, 0L, 5L, 5L, 5L, 10L, 10L, 10L, 0L, 0L, 0L, 5L,
5L, 5L, 10L, 10L, 10L), TARGET_2020 = c(1563.028, 1563.028,
1563.028, 1358.582, 1358.582, 1358.582, 1202.966, 1202.966,
1202.966, 1267.404, 1267.404, 1267.404, 1235.774, 1235.774,
1235.774, 1079.602, 1079.602, 1079.602, 1563.028, 1563.028,
1563.028, 1358.582, 1358.582, 1358.582, 1202.966, 1202.966,
1202.966, 1267.404, 1267.404, 1267.404, 1235.774, 1235.774,
1235.774, 1079.602, 1079.602, 1079.602, 1534.3, 1534.3, 1534.3,
1339.217, 1339.217, 1339.217, 1188.892, 1188.892, 1188.892,
1257.031, 1257.031, 1257.031, 1227.773, 1227.773, 1227.773,
1074.339, 1074.339, 1074.339, 1534.3, 1534.3, 1534.3, 1339.217,
1339.217, 1339.217, 1188.892, 1188.892, 1188.892, 1257.031,
1257.031, 1257.031, 1227.773, 1227.773, 1227.773, 1074.339,
1074.339, 1074.339)), .Names = c("country", "adm_level",
"code", "year", "value", "sex", "age", "TARGET_2020"), class = "data.frame",
row.names = c(NA,
-72L))