合并R中的输出

时间:2014-08-18 15:12:23

标签: r aggregate

max=aggregate(cbind(a$VALUE,Date=a$DATE) ~ format(a$DATE, "%m") + cut(a$CLASS, breaks=c(0,2,4,6,8,10,12,14)) , data = a, max)[-1]
max$DATE=as.Date(max$DATE, origin = "1970-01-01")

示例数据:

DATE         GRADE    VALUE
2008-09-01     1        20
2008-09-02     2        30
2008-09-03     3        50
    .
    .
2008-09-30     2        75
    .
    .
2008-10-01     1        95
    .
    .
2008-11-01     4        90
    .
    . 
2008-12-01     1        70
2008-12-02     2        40
2008-12-28     4        30
2008-12-29     1        40
2008-12-31     3        50

根据上表仅我的第一个月的预期输出是:

 DATE         GRADE    VALUE
2008-09-30    (0,2]     75
2008-09-02    (2,4]     50

我的实际数据输出:

                format(DATE, "%m")
1                        09
2                        10
3                        11
4                        12
5                        09
6                        10
7                        11



  cut(a$GRADE, breaks = c(0, 2, 4, 6, 8, 10, 12, 14))        value
1                                                        (0,2] 0.30844444
2                                                        (0,2] 1.00000000
3                                                        (0,2] 1.00000000
4                                                        (0,2] 0.73333333
5                                                        (2,4] 0.16983488
6                                                        (2,4] 0.09368000
7                                                        (2,4] 0.10589335

          Date
1  2008-09-30
2  2008-10-31
3  2008-11-28
4  2008-12-31
5  2008-09-30
6  2008-10-31
7  2008-11-28

输出不是根据样本数据,因为数据太大。一个简单的逻辑是有1到10的等级,所以我想在相应的等级组中找到一个月的最高值。例如:我需要每组最高值(0,2),(0,4)等

我使用了函数max的聚合条件,并将两个列分为Date和Grade。现在当我运行代码并显示max的值时,我依次得到 3个表作为输出。现在我想绘制这个输出,但由于这个原因我无法做到这一点。那我怎么能合并所有这些输出呢?

2 个答案:

答案 0 :(得分:1)

尝试:

 library(dplyr)
 a %>%
 group_by(MONTH=format(DATE, "%m"), GRADE=cut(GRADE, breaks=seq(0,14,by=2))) %>%
 summarise_each(funs(max))

 #  MONTH GRADE       DATE VALUE
 #1    09 (0,2] 2008-09-30    75
 #2    09 (2,4] 2008-09-03    50
 #3    10 (0,2] 2008-10-01    95
 #4    11 (2,4] 2008-11-01    90
 #5    12 (0,2] 2008-12-29    70
 #6    12 (2,4] 2008-12-31    50

或使用data.table

 library(data.table)
  setDT(a)[, list(DATE=max(DATE), VALUE=max(VALUE)), 
                         by= list(MONTH=format(DATE, "%m"),
                     GRADE=cut(GRADE, breaks=seq(0,14, by=2)))]
  #       MONTH GRADE       DATE VALUE
  #1:    09 (0,2] 2008-09-30    75
  #2:    09 (2,4] 2008-09-03    50
  #3:    10 (0,2] 2008-10-01    95
  #4:    11 (2,4] 2008-11-01    90
  #5:    12 (0,2] 2008-12-29    70
  #6:    12 (2,4] 2008-12-31    50

或使用aggregate

  res <- transform(with(a, 
           aggregate(cbind(VALUE, DATE), 
             list(MONTH=format(DATE, "%m") ,GRADE=cut(GRADE, breaks=seq(0,14, by=2))), max)),
           DATE=as.Date(DATE, origin="1970-01-01"))
   res[order(res$MONTH),]
  # MONTH GRADE VALUE       DATE
  #1    09 (0,2]    75 2008-09-30
  #4    09 (2,4]    50 2008-09-03
  #2    10 (0,2]    95 2008-10-01
  #5    11 (2,4]    90 2008-11-01
  #3    12 (0,2]    70 2008-12-29
  #6    12 (2,4]    50 2008-12-31

数据

 a <-  structure(list(DATE = structure(c(14123, 14124, 14125, 14152, 
   14153, 14184, 14214, 14215, 14241, 14242, 14244), class = "Date"), 
   GRADE = c(1L, 2L, 3L, 2L, 1L, 4L, 1L, 2L, 4L, 1L, 3L), VALUE = c(20L, 
   30L, 50L, 75L, 95L, 90L, 70L, 40L, 30L, 40L, 50L)), .Names = c("DATE", 
  "GRADE", "VALUE"), row.names = c(NA, -11L), class = "data.frame")

更新

如果您想在分组中加入YEAR

   library(dplyr)
   a %>% 
   group_by(MONTH=format(DATE, "%m"), YEAR=format(DATE, "%Y"), GRADE=cut(GRADE, breaks=seq(0,14, by=2)))%>%
  summarise_each(funs(max))
  #   MONTH YEAR GRADE       DATE VALUE
  #1     09 2008 (0,2] 2008-09-30    75
  #2     09 2008 (2,4] 2008-09-03    50
  #3     09 2009 (0,2] 2009-09-30    75
  #4     09 2009 (2,4] 2009-09-03    50
  #5     10 2008 (0,2] 2008-10-01    95
  #6     10 2009 (0,2] 2009-10-01    95
  #7     11 2008 (2,4] 2008-11-01    90
  #8     11 2009 (2,4] 2009-11-01    90
  #9     12 2008 (0,2] 2008-12-29    70
  #10    12 2008 (2,4] 2008-12-31    50
  #11    12 2009 (0,2] 2009-12-29    70
  #12    12 2009 (2,4] 2009-12-31    50

数据

 a <- structure(list(DATE = structure(c(14123, 14124, 14125, 14152, 
   14153, 14184, 14214, 14215, 14241, 14242, 14244, 14488, 14489, 
  14490, 14517, 14518, 14549, 14579, 14580, 14606, 14607, 14609
  ), class = "Date"), GRADE = c(1L, 2L, 3L, 2L, 1L, 4L, 1L, 2L, 
  4L, 1L, 3L, 1L, 2L, 3L, 2L, 1L, 4L, 1L, 2L, 4L, 1L, 3L), VALUE = c(20L, 
  30L, 50L, 75L, 95L, 90L, 70L, 40L, 30L, 40L, 50L, 20L, 30L, 50L, 
  75L, 95L, 90L, 70L, 40L, 30L, 40L, 50L)), .Names = c("DATE", 
  "GRADE", "VALUE"), row.names = c("1", "2", "3", "4", "5", "6", 
  "7", "8", "9", "10", "11", "12", "21", "31", "41", "51", "61", 
   "71", "81", "91", "101", "111"), class = "data.frame")

答案 1 :(得分:0)

使用基数R的代码可能会有所帮助(使用来自akrun答案的'a'数据框):

xx = strsplit(as.character(a$DATE), '-')
a$month = sapply(strsplit(as.character(a$DATE), '-'),'[',2)
gradeCats = cut(a$GRADE, breaks = c(0, 2, 4, 6, 8, 10, 12, 14))

aggregate(VALUE~month+gradeCats, data= a, max)
  month gradeCats VALUE
1    09     (0,2]    75
2    10     (0,2]    95
3    12     (0,2]    70
4    09     (2,4]    50
5    11     (2,4]    90
6    12     (2,4]    50