structure(list(Effective_Date = structure(c(17257, 17563, 17652,
17257, 17257, 17532, 17713, 17348, 17683, 17563, 17775, 17501,
17713, 17683, 17622, 17226, 17744, 17318, 17410, 17471, 17622,
17652, 17652, 17775, 17348, 17287, 17563, 17167, 17198, 17744,
17440, 17257, 17379, 17591, 17501, 17622, 17532, 17652, 17501,
17167, 17287, 17683, 17318, 17622, 17744, 17287, 17622, 17652,
17713, 17167, 17622, 17532, 17652, 17652, 17775, 17198, 17622,
17198, 17652, 17318, 17563, 17287, 17226, 17563, 17318, 17257,
17379, 17379, 17563, 17591, 17563, 17440, 17501, 17471, 17287,
17287, 17652, 17622, 17713, 17318, 17226, 17226, 17410, 17563,
17501, 17348, 17257, 17471, 17379, 17652, 17775, 17226, 17563,
17410, 17410, 17683, 17287, 17348, 17652, 17257), class = "Date"),
Gender = c("Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Female", "Male", "Male", "Male", "Female",
"Male", "Male", "Male", "Female", "Female", "Male", "Male",
"Male", "Male", "Male", "Female", "Male", "Female", "Male",
"Male", "Male", "Male", "Female", "Male", "Male", "Female",
"Female", "Female", "Female", "Male", "Male", "Female", "Male",
"Male", "Male", "Male", "Male", "Female", "Male", "Female",
"Female", "Male", "Female", "Male", "Male", "Female", "Female",
"Female", "Female", "Male", "Female", "Female", "Female",
"Female", "Male", "Female", "Female", "Male", "Male", "Female",
"Male", "Female", "Male", "Female", "Female", "Female", "Male",
"Female", "Female", "Male", "Male", "Female", "Male", "Male",
"Male", "Female", "Male", "Male", "Female", "Female", "Male",
"Female", "Male", "Female", "Male", "Male", "Male", "Male",
"Male", "Male", "Female", "Male"), Location = c("US", "US",
"US", "US", "US", "US", "US", "US", "US", "US", "India",
"India", "US", "US", "US", "US", "US", "US", "US", "India",
"India", "US", "US", "US", "India", "US", "US", "US", "US",
"India", "US", "India", "India", "US", "US", "US", "India",
"US", "India", "US", "US", "India", "US", "India", "US",
"India", "US", "US", "US", "US", "US", "US", "US", "India",
"US", "US", "US", "US", "US", "US", "US", "India", "US",
"US", "US", "US", "US", "US", "India", "India", "US", "US",
"India", "US", "US", "US", "US", "US", "US", "US", "US",
"US", "US", "US", "US", "US", "US", "US", "India", "US",
"US", "US", "US", "India", "US", "US", "India", "US", "US",
"India")), row.names = c(NA, -100L), class = c("tbl_df",
"tbl", "data.frame"))
# A tibble: 100 x 3
Effective_Date Gender Location
<date> <chr> <chr>
1 2018-09-01 Male India
2 2018-05-01 Male India
3 2018-03-01 Male US
4 2018-03-01 Male US
5 2017-06-01 Female US
6 2018-09-01 Male US
7 2017-03-01 Female India
8 2017-11-01 Male US
9 2017-10-01 Female US
10 2018-09-01 Female US
我正在尝试按日期和国家/地区计算女性人口的百分比。因此输出应显示为“在2018年9月1日,美国女性占45%,印度女性占47%”
当我运行以下代码时:
female = tbl %>% filter(Gender %in% c("Male",'Female')) %>%
group_by(Effective_Date,Gender,Location) %>%
summarise(n = n(),freq=n/sum(n)) %>% filter(Gender == "Female")
我的频率全部恢复为1。我觉得我缺少一些非常基本的东西。如何再次按日期和位置分组以计算频率?
注意:filter()位于其中,以避免任何“未声明”的