这是this question的后续行动。我无法获得所需的输出。
Reg <- rep(LETTERS[1:5], c(4, 6, 4, 4, 6))
City <- rep(letters[1:12], each = 2)
Res <- rep(c("Urban", "Rural"), times = length(Reg)/2)
set.seed(12345)
Pop <- rpois(n = length(Reg), lambda = 500000)
Pop1 <- rpois(n = length(Reg), lambda = 500)
df <- data.frame(Reg, City, Res, Pop, Pop1)
df3 <-
df %>%
group_by(Reg) %>%
summarise(Pop = sum(Pop), Pop1 = sum(Pop1), Res = 'Total') %>%
bind_rows(df) %>%
arrange(Reg) %>%
select(Reg, Res, Pop, Pop1) %>%
distinct()
df3
# A tibble: 29 x 4
Reg Res Pop Pop1
<fctr> <chr> <int> <int>
1 A Total 2000853 2135
2 A Urban 500414 549
3 A Rural 500501 545
4 A Urban 499922 536
5 A Rural 500016 505
6 B Total 3000844 2938
7 B Urban 501638 510
8 B Rural 499274 492
9 B Urban 499804 506
10 B Rural 499825 508
# ... with 19 more rows
然后,需要以下代码来查找每个Reg
中的农村和城市总和df3 %>% group_by(Reg, Res) %>% summarise(sum(Pop), sum(Pop1))
# A tibble: 15 x 4
# Groups: Reg [?]
Reg Res `sum(Pop)` `sum(Pop1)`
<fctr> <chr> <int> <int>
1 A Rural 1000517 1050
2 A Total 2000853 2135
3 A Urban 1000336 1085
4 B Rural 1499485 1446
5 B Total 3000844 2938
6 B Urban 1501359 1492
7 C Rural 999234 987
8 C Total 1997259 2007
9 C Urban 998025 1020
10 D Rural 998760 1058
11 D Total 2000712 2052
12 D Urban 1001952 994
13 E Rural 1501848 1547
14 E Total 2999304 3050
15 E Urban 1497456 1503
Res列也有不同的顺序。
但是,我想以更有效和更紧凑的方式进行所有计算。
所需的输出
Reg Res Pop Pop1
1 A Total 2000853 2135
2 A Urban 1000336 1085
3 A Rural 1000517 1050
4 B Total 3000844 2938
5 B Urban 1501359 1492
6 B Rural 1499485 1446
答案 0 :(得分:3)
不确定它是否更紧凑,但通过按进程反转两个组的顺序,可以消除多行代码:
df %>%
group_by(Reg, Res) %>% # group by Reg and Res first
summarise(Pop = sum(Pop), Pop1 = sum(Pop1)) %>%
bind_rows(
group_by(., Reg) %>% # now group by Reg and bind_rows with previous result
summarise(Pop = sum(Pop), Pop1 = sum(Pop1), Res = 'Total'),
. # <<<<<< bind total above other Res by passing previous
# result as second argument in the bind_rows
) %>% arrange(Reg)
# A tibble: 15 x 4
# Reg Pop Pop1 Res
# <chr> <int> <int> <chr>
# 1 A 2000853 2135 Total
# 2 A 1000517 1050 Rural
# 3 A 1000336 1085 Urban
# 4 B 3000844 2938 Total
# 5 B 1499485 1446 Rural
# 6 B 1501359 1492 Urban
# 7 C 1997259 2007 Total
# 8 C 999234 987 Rural
# 9 C 998025 1020 Urban
# 10 D 2000712 2052 Total
# 11 D 998760 1058 Rural
# 12 D 1001952 994 Urban
# 13 E 2999304 3050 Total
# 14 E 1501848 1547 Rural
# 15 E 1497456 1503 Urban
答案 1 :(得分:0)
以下是使用tidyverse
library(tidyverse)
map(list(c("Reg", "Res"), "Reg"), ~df %>% group_by(!!! syms(.x)) %>%
summarise_at(vars("Pop", "Pop1"), sum)) %>%
bind_rows %>%
mutate(Res = replace(as.character(Res), is.na(Res), "Total")) %>%
arrange(Reg, Res != "Total")
# A tibble: 15 x 4
# Groups: Reg [5]
# Reg Res Pop Pop1
# <fctr> <chr> <int> <int>
# 1 A Total 2000853 2135
# 2 A Rural 1000517 1050
# 3 A Urban 1000336 1085
# 4 B Total 3000844 2938
# 5 B Rural 1499485 1446
# 6 B Urban 1501359 1492
# 7 C Total 1997259 2007
# 8 C Rural 999234 987
# 9 C Urban 998025 1020
#10 D Total 2000712 2052
#11 D Rural 998760 1058
#12 D Urban 1001952 994
#13 E Total 2999304 3050
#14 E Rural 1501848 1547
#15 E Urban 1497456 1503
或与data.table
library(data.table)
rbindlist(Map(function(grp) df[, lapply(.SD, sum), by = c(grp), .SDcols = Pop:Pop1],
list(c("Reg", "Res"), "Reg")), fill = TRUE)[
is.na(Res), Res := "Total"][order(Reg, Res != "Total")]
# Reg Res Pop Pop1
# 1: A Total 2000853 2135
# 2: A Urban 1000336 1085
# 3: A Rural 1000517 1050
# 4: B Total 3000844 2938
# 5: B Urban 1501359 1492
# 6: B Rural 1499485 1446
# 7: C Total 1997259 2007
# 8: C Urban 998025 1020
# 9: C Rural 999234 987
#10: D Total 2000712 2052
#11: D Urban 1001952 994
#12: D Rural 998760 1058
#13: E Total 2999304 3050
#14: E Urban 1497456 1503
#15: E Rural 1501848 1547