如何将group_by与summary和summarise_all结合使用?

时间:2019-06-20 21:31:31

标签: r group-by dplyr tidyverse

   x  y
1  1  1
2  3  2
3  2  3
4  3  4
5  2  5
6  4  6
7  5  7
8  2  8
9  1  9
10 1 10
11 3 11
12 4 12

以上是输入内容的一部分。

让我们假设它还有很多其他列

我要:

  1. group_by x
  2. 按和总结
  3. 对于所有其他列,我想通过仅取第一个值来进行总结

3 个答案:

答案 0 :(得分:3)

这是将其分为两个问题并将其合并的方法:

library(dplyr)
left_join(
  # Here we want to treat column y specially
  df %>%
    group_by(x) %>%
    summarize(sum_y = sum(y)),
  # Here we exclude y and use a different summation for all the remaining columns
  df %>%
    group_by(x) %>%
    select(-y) %>%
    summarise_all(first)
  ) 

# A tibble: 5 x 3
      x sum_y     z
  <int> <int> <int>
1     1    20     1
2     2    16     3
3     3    17     2
4     4    18     2
5     5     7     3

样本数据:

df <- read.table(
  header = T, 
  stringsAsFactors = F,
  text="x  y z
        1  1 1
        3  2 2
        2  3 3
        3  4 4
        2  5 1
        4  6 2
        5  7 3
        2  8 4
        1  9 1
        1 10 2
        3 11 3
        4 12 4")

答案 1 :(得分:1)

library(dplyr)

df1 %>%  
  group_by(x) %>% 
  summarise_each(list(avg = mean), -y) %>% 
  bind_cols(.,{df1 %>% 
                group_by(x) %>% 
                summarise_at(vars(y), funs(sum)) %>% 
                select(-x)
               })

#> # A tibble: 5 x 4
#>       x r_avg r.1_avg     y
#>   <int> <dbl>   <dbl> <int>
#> 1     1  6.67    6.67    20
#> 2     2  5.33    5.33    16
#> 3     3  5.67    5.67    17
#> 4     4  9       9       18
#> 5     5  7       7        7

reprex package(v0.3.0)于2019-06-20创建

数据:

df1 <- read.table(text="
r   x  y
1  1  1
2  3  2
3  2  3
4  3  4
5  2  5
6  4  6
7  5  7
8  2  8
9  1  9
10 1 10
11 3 11
12 4 12", header=T)

df1 <- df1[,c(2,3,1,1)]

答案 2 :(得分:1)

library(tidyverse)
df <- tribble(~x, ~y,  # making a sample data frame
 1,  1,
 3,  2,
 2,  3,
 3,  4,
 2,  5,
 4,  6,
 5,  7,
 2,  8,
 1,  9,
 1, 10,
 3, 11,
 4, 12)

df <- df %>% 
  add_column(z = sample(1:nrow(df))) #add another column for the example

df


# If there is only one additional column and you need the first value
df %>% 
  group_by(x) %>% 
  summarise(sum_y = sum(y), z_1st = z[1])


# otherwise use summarise_at to address all the other columns
f <- function(x){x[1]} # function to extract the first value
df %>% 
  group_by(x) %>% 
  summarise_at(.vars = vars(-c('y')), .funs = f)  # exclude column y from the calculations