dplyr获得比例

时间:2017-05-19 13:29:57

标签: r dplyr

我一直在尝试切换到dplyr和tidyr进行数据操作(而不是data.table和excel)。我有一个长格式的数据框,如下所示:

TIME  GEO geo_num  sex_num AGE    Value
2014 EU28       1  1        0     13486357
2014 EU28       1  1        5     13683976
2014 EU28       1  1       10     13430899
2014 EU28       1  1       15     13945295
2014 EU28       1  1       20     15417002
2014 EU28       1  1       25     16233349

我想获得的是每个年龄组(AGE)的sex_num比例:

TIME  GEO geo_num  sex_num AGE    Value     percent
2014 EU28       1  1        0     13486357  0.537
2014 EU28       1  1        5     13683976  0.548
2014 EU28       1  1       10     13430899  0.537
2014 EU28       1  1       15     13945295  0.555
2014 EU28       1  1       20     15417002  0.613
2014 EU28       1  1       25     16233349  0.646

这样我就会按性别(我的分母)获得总数

mydata %>% 
group_by(geo_num,sex_num,TIME) %>% 
summarize(total_sex=sum(Value))

但如何使用它来获得百分比并不完全清楚

mydata %>%
group_by(sex_num, TIME, geo_num, AGE) %>%
mutate(freq = Value / total_sex)

有什么想法吗?

这是数据的子集

structure(list(X = 1:40, TIME = c(2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L
), GEO = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "EU28", class = "factor"), 
geo_num = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), GEO.1 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "European Union (28 countries)", class = "factor"), 
SEX = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Females", "Males"), class = "factor"), sex_num = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), AGE = c(0, 5, 10, 15, 
20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 85.99, 
90.99, 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 
65, 70, 75, 80, 85, 85.99, 90.99), Value = c(13486357L, 13683976L, 
13430899L, 13945295L, 15417002L, 16233349L, 17074499L, 17647415L, 
18386977L, 18914596L, 17914397L, 16416147L, 14854062L, 12613840L, 
10126857L, 8102599L, 5377238L, 2718258L, 3756915L, 1038657L, 
12805779L, 12992860L, 12754636L, 13227105L, 14824565L, 15915997L, 
16894408L, 17437631L, 18269939L, 18931544L, 18244203L, 17188595L, 
16064384L, 14111303L, 12145307L, 10862721L, 8471793L, 5480758L, 
8448678L, 2967920L)), .Names = c("X", "TIME", "GEO", "geo_num", 
"GEO.1", "SEX", "sex_num", "AGE", "Value"), class = "data.frame", row.names = c(NA, -40L))

2 个答案:

答案 0 :(得分:3)

这样的事情可能会让你得到你正在寻找的东西

mydata <- mydata %>% 
          group_by(TIME, GEO, geo_num, GEO.1, SEX, sex_num) %>% 
          mutate(total_sex = sum(Value), 
                 percent = Value / total_sex * 100)

> head(mydata)

# A tibble: 6 x 11
X  TIME    GEO    SEX   AGE    Value total_sex  percent
1  2014   EU28  Males     0 13486357 251139335 5.370070
2  2014   EU28  Males     5 13683976 251139335 5.448759
3  2014   EU28  Males    10 13430899 251139335 5.347987
4  2014   EU28  Males    15 13945295 251139335 5.552812
5  2014   EU28  Males    20 15417002 251139335 6.138824
6  2014   EU28  Males    25 16233349 251139335 6.463881
# ... with 3 more variables

答案 1 :(得分:0)

我们可以使用data.table

library(data.table)
setDT(mydata)[, percent := 10*Value/sum(Value) , c(names(mydata)[2:7])]
head(mydata)
#   X TIME  GEO geo_num                         GEO.1   SEX sex_num AGE    Value   percent
#1: 1 2014 EU28       1 European Union (28 countries) Males       1   0 13486357 0.5370070
#2: 2 2014 EU28       1 European Union (28 countries) Males       1   5 13683976 0.5448759
#3: 3 2014 EU28       1 European Union (28 countries) Males       1  10 13430899 0.5347987
#4: 4 2014 EU28       1 European Union (28 countries) Males       1  15 13945295 0.5552812
#5: 5 2014 EU28       1 European Union (28 countries) Males       1  20 15417002 0.6138824
#6: 6 2014 EU28       1 European Union (28 countries) Males       1  25 16233349 0.6463881