R group_by with range(或aggregate)

时间:2017-03-21 02:54:51

标签: r

我有一张这样的表:

City; Class; Number;  Sum
 BE ;    01;    734; 4711
 BE ;    02;    896; 4711
 BE ;    03;   1258; 4711
 BE ;    04;    980; 4711
 BE ;    05;    543; 4711
 BE ;    06;    192; 4711
 BE ;    07;     69; 4711
 BE ;    08;     20; 4711
 BE ;    09;     14; 4711
 BE ;    10;      4; 4711
 BE ;    12;      1; 4711
 FR ;    01;   1213;14258
 FR ;    02;   2217;14258
 FR ;    03;   3369;14258
 FR ;    04;   4037;14258
 FR ;    05;   2117;14258
 FR ;    06;    774;14258
 FR ;    07;    301;14258
 FR ;    08;    124;14258
 FR ;    09;     62;14258
 FR ;    10;     21;14258
 FR ;    11;     11;14258
 FR ;    12;      4;14258
 FR ;    13;      2;14258
 FR ;    14;      3;14258
 FR ;    16;      3;14258

我想在“Class”变量的8个范围内“分组”:

范围1 =“01”,范围2 =“02”,范围3 =“03”,范围4 =“04”,范围5 =“05”,以及3个范围的几个值:范围6 =“06 “,”07,“08”,“09,”10“, 范围7 =“11”,“12”,“13”,“14”,“15”, 范围8 =“16”,“17”,“18”,“19”,“20”。

是否可以使用group_by执行此操作?

变量“City”和“Class”是字符格式。

非常感谢您的帮助。

输出表应如下所示:

City; Range; Number;  Sum;
 BE ;    R1;    734; 4711;
 BE ;    R2;    896; 4711;
 BE ;    R3;   1258; 4711;
 BE ;    R4;    980; 4711;
 BE ;    R5;    543; 4711;
 BE ;    R6;    299; 4711;
 BE ;    R7;      1; 4711;
 FR ;    R1;   1213;14258;
 FR ;    R2;   2217;14258;
 FR ;    R3;   3369;14258;
 FR ;    R4;   4037;14258;
 FR ;    R5;   2117;14258;
 FR ;    R6;   1282;14258;
 FR ;    R7;     20;14258;
 FR ;    R8;      3;14258;

非常感谢。

3 个答案:

答案 0 :(得分:3)

使用cut,您可以根据breaks值创建范围,并使用它创建range列。然后通过对Number变量进行分组来计算City的总和。

df$Range <- cut( as.numeric(df$Class), breaks = c(0, 1,2,3,4,5,10,15,20 ), labels = paste('R', 1:8, sep = '' ))  # create Range column
df$Class <- NULL   # remove Class column
df <- within( df, Sum <- ave(Number, City, FUN = sum ) )  # compute sum by grouping City
df <- within(df, Number <- ave( Number, by = list(City, Range), FUN = sum ) )  # compute sum by grouping City and Range
df[ !duplicated(df), ]  # remove duplicated rows
#    City Number   Sum Range
# 1    BE    734  4711    R1
# 2    BE    896  4711    R2
# 3    BE   1258  4711    R3
# 4    BE    980  4711    R4
# 5    BE    543  4711    R5
# 6    BE    299  4711    R6
# 11   BE      1  4711    R7
# 12   FR   1213 14258    R1
# 13   FR   2217 14258    R2
# 14   FR   3369 14258    R3
# 15   FR   4037 14258    R4
# 16   FR   2117 14258    R5
# 17   FR   1282 14258    R6
# 22   FR     20 14258    R7
# 26   FR      3 14258    R8

数据:

df <- structure(list(City = c("BE", "BE", "BE", "BE", "BE", "BE", "BE", 
                              "BE", "BE", "BE", "BE", "FR", "FR", "FR", "FR", "FR", "FR", "FR", 
                              "FR", "FR", "FR", "FR", "FR", "FR", "FR", "FR"),
                     Class = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L, 1L, 2L, 3L, 4L, 5L, 
                               6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 16L), 
                     Number = c(734L, 896L, 1258L, 980L, 543L, 192L, 69L, 20L, 14L, 4L, 1L, 1213L, 
                                2217L, 3369L, 4037L, 2117L, 774L, 301L, 124L, 62L, 21L, 11L, 
                                4L, 2L, 3L, 3L),
                     Sum = c(4711L, 4711L, 4711L, 4711L, 4711L, 4711L, 4711L, 4711L, 4711L, 4711L,
                             4711L, 14258L, 14258L, 14258L, 14258L, 14258L, 14258L, 14258L, 14258L, 
                             14258L, 14258L, 14258L, 14258L, 14258L, 14258L, 14258L)),
                .Names = c("City", "Class", "Number", "Sum"), 
                row.names = c(NA, -26L), class = "data.frame")

答案 1 :(得分:2)

我们使用原始数据集创建key/value数据集join,按“类”分组并获取“数字”的sum

library(dplyr)
keyDat <- data.frame(Class = sprintf("%02d", 1:20), 
  range = rep(paste0("", 1:8), rep(c(1, 5), c(5, 3))), stringsAsFactors=FALSE)

df1 %>%
   left_join(., keyDat) %>% 
   group_by(City, Range = range) %>%
   summarise(Number = sum(Number), Sum= Sum[1L])
#   City Range Number   Sum
#   <chr> <chr>  <int> <int>
#1    BE     R1    734  4711
#2    BE     R2    896  4711
#3    BE     R3   1258  4711
#4    BE     R4    980  4711
#5    BE     R5    543  4711
#6    BE     R6    299  4711
#7    BE     R7      1  4711
#8    FR     R1   1213 14258
#9    FR     R2   2217 14258
#10   FR     R3   3369 14258
#11   FR     R4   4037 14258
#12   FR     R5   2117 14258
#13   FR     R6   1282 14258
#14   FR     R7     20 14258
#15   FR     R8      3 14258

数据

df1 <- structure(list(City = c("BE ", "BE ", "BE ", "BE ", "BE ", "BE ", 
"BE ", "BE ", "BE ", "BE ", "BE ", "FR ", "FR ", "FR ", "FR ", 
"FR ", "FR ", "FR ", "FR ", "FR ", "FR ", "FR ", "FR ", "FR ", 
"FR ", "FR "), Class = c("01", "02", "03", "04", "05", "06", 
"07", "08", "09", "10", "12", "01", "02", "03", "04", "05", "06", 
"07", "08", "09", "10", "11", "12", "13", "14", "16"), Number = c(734L, 
896L, 1258L, 980L, 543L, 192L, 69L, 20L, 14L, 4L, 1L, 1213L, 
2217L, 3369L, 4037L, 2117L, 774L, 301L, 124L, 62L, 21L, 11L, 
4L, 2L, 3L, 3L), Sum = c(4711L, 4711L, 4711L, 4711L, 4711L, 4711L, 
4711L, 4711L, 4711L, 4711L, 4711L, 14258L, 14258L, 14258L, 14258L, 
14258L, 14258L, 14258L, 14258L, 14258L, 14258L, 14258L, 14258L, 
14258L, 14258L, 14258L)), .Names = c("City", "Class", "Number", 
"Sum"), row.names = c(NA, -26L), class = "data.frame")

答案 2 :(得分:1)

Before updating someNum is 2.                                                                                                                                                   
After updating someNum is 3.                                                                                                                                                    
In main, someNum is now 3.                                                                                                                                                      
Before updating someNum is 3.                                                                                                                                                   
After updating someNum is 4.                                                                                                                                                    
In main, someNum is now 4.