必需的输出
这是必需的输出(数字可能不同)。
City Res Pop Pop1
Total 4503739 4455
State Urban 3003948 2966
State Rural 1499791 1489
Total 1000915 986
A Urban 500414 493
A Rural 500501 494
Total 999938 1009
B Urban 499922 497
B Rural 500016 512
Total 1000912 976
C Urban 501638 493
C Rural 499274 483
R
代码是
City <- rep(LETTERS[1:3], each = 2)
Res <- factor(rep(c("Urban", "Rural"), times = length(City)/2))
set.seed(12345)
Pop <- rpois(n = length(City), lambda = 500000)
Pop1 <- rpois(n = length(City), lambda = 500)
df <- data.frame(City, Res, Pop, Pop1)
df
library(tidyverse)
df %>%
group_by(Res) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
mutate(City = "State") %>%
bind_rows(df) %>%
select(City, everything()) %>%
ungroup(Res) %>%
group_by(., City) %>%
bind_rows(
group_by(., Res) %>%
summarise(Pop = sum(Pop), Pop1 = sum(Pop1)),
. ) %>%
select(City, everything())
# A tibble: 10 x 4
City Res Pop Pop1
<chr> <fct> <int> <int>
1 NA Rural 2999582 2978
2 NA Urban 3003948 2966
3 State Rural 1499791 1489
4 State Urban 1501974 1483
5 A Urban 500414 493
6 A Rural 500501 494
7 B Urban 499922 497
8 B Rural 500016 512
9 C Urban 501638 493
10 C Rural 499274 483
我想知道如何更有效地获得所需的输出。谢谢
sessionInfo
sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.2 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.1
[4] purrr_0.3.2 readr_1.3.1 tidyr_0.8.3.9000
[7] tibble_2.1.1 ggplot2_3.1.1 tidyverse_1.2.1.9000
loaded via a namespace (and not attached):
[1] Rcpp_1.0.1 cellranger_1.1.0 pillar_1.4.0 compiler_3.6.0
[5] dbplyr_1.4.0 plyr_1.8.4 tools_3.6.0 zeallot_0.1.0
[9] lubridate_1.7.4 jsonlite_1.6 nlme_3.1-140 gtable_0.3.0
[13] lattice_0.20-38 pkgconfig_2.0.2 rlang_0.3.4.9003 reprex_0.3.0
[17] cli_1.1.0 DBI_1.0.0 rstudioapi_0.10 haven_2.1.0
[21] withr_2.1.2 xml2_1.2.0.9000 httr_1.4.0 fs_1.3.1
[25] generics_0.0.2 vctrs_0.1.0.9003 hms_0.4.2 grid_3.6.0
[29] tidyselect_0.2.5 glue_1.3.1 R6_2.4.0 fansi_0.4.0
[33] readxl_1.3.1 modelr_0.1.4 magrittr_1.5 backports_1.1.4
[37] scales_1.0.0 rvest_0.3.4 assertthat_0.2.1 colorspace_1.4-1
[41] utf8_1.1.4 stringi_1.4.3 lazyeval_0.2.2 munsell_0.5.0
[45] broom_0.5.2 crayon_1.3.4
答案 0 :(得分:1)
library(tidyverse)
df %>%
select(Res, Pop, Pop1) %>%
group_by(Res) %>%
summarise_all(sum) %>%
bind_rows(df) %>%
mutate(City = fct_explicit_na(City, "State")) %>% # from forcats: renames NA as factor
select(City, Res, Pop, Pop1)
# A tibble: 8 x 4
City Res Pop Pop1
<fct> <fct> <int> <int>
1 State Rural 1499791 1489
2 State Urban 1501974 1483
3 A Urban 500414 493
4 A Rural 500501 494
5 B Urban 499922 497
6 B Rural 500016 512
7 C Urban 501638 493
8 C Rural 499274 483
答案 1 :(得分:1)
这里是add_row
library(tidyverse)
df %>%
group_split(Res) %>%
map_df(~ add_row(., City = "State", Res = first(.$Res),
Pop = sum(.$Pop), Pop1 = sum(.$Pop1)) %>%
add_row(., City = 'Total', Res = first(.$Res),
Pop = sum(.$Pop), Pop1 = sum(.$Pop1)))
# A tibble: 10 x 4
# City Res Pop Pop1
# <fct> <fct> <int> <int>
# 1 A Rural 500501 494
# 2 B Rural 500016 512
# 3 C Rural 499274 483
# 4 State Rural 1499791 1489
# 5 Total Rural 2999582 2978
# 6 A Urban 500414 493
# 7 B Urban 499922 497
# 8 C Urban 501638 493
# 9 State Urban 1501974 1483
#10 Total Urban 3003948 2966
或者另一个选择是rollup
中的data.table
library(data.table)
f1 <- function(dat), rollup(dat, lapply(.SD, sum), by = "Res",
.SDcols = Pop:Pop1)
setDT(df)
out1 <- rbind(f1(df)[-.N][, City := "State"], df)
rbind(f1(out1)[-.N][, City := "Total"], out1)
答案 2 :(得分:1)
这是基于purrr::map_df
和split
的选项。我们可以使用 City split
df,然后遍历每个 City :在每个City的顶部绑定 Total 行,总和{ {1}}和Pop
Pop1