聚合r中的所有列

时间:2014-10-09 07:48:43

标签: r aggregate

So this is what I do I total up their grades by using this, there are more columns than this

test<-with(data,table(Student,Subject))
test <- cbind(test)
test <- as.data.frame(test)

 row.names Maths Science English Geography History Art ...
1  George    64     70      40       50       60     70
2  Anna      40     20      65       54       30     50
3  Scott     30     64      30       40       50     20
 ...

Summarize <- data.frame(
aggregate(.~Maths, data=test, min),
aggregate(.~English, data=test, max),
aggregate(.~Science, data=test, mean))

有没有办法选择所有列本身并将列聚合(范围和平均值)到新数据帧?

       Min Mean Max
Maths   30  60  90
Science    
English
Geography
...

提前致谢!

1 个答案:

答案 0 :(得分:1)

尝试:

library(dplyr)
library(tidyr) 
 df %>% 
    summarise_each(funs(min=min(., na.rm=TRUE), max=max(., na.rm=TRUE), 
                       mean=mean(., na.rm=TRUE)), -Student) %>%           
    gather(Var, Value, Maths_min:Art_mean) %>%
    separate(Var, c("Subject", "Var")) %>%
    spread(Var, Value)
 #    Subject max     mean min
 #1       Art  70 46.66667  20
 #2   English  65 45.00000  30
 #3 Geography  54 48.00000  40
 #4   History  60 46.66667  30
 #5     Maths  64 44.66667  30
 #6   Science  70 51.33333  20

更新

或者您可以将aggregatemelt

一起使用
 library(reshape2)
 res <- aggregate(value~variable, melt(df, id="Student"),
         FUN=function(x) c(Min=min(x, na.rm=TRUE), Mean=mean(x, na.rm=TRUE),
                            Max=max(x, na.rm=TRUE)))
 res1 <- do.call(`data.frame`, res)
 colnames(res1) <- gsub(".*\\.", "", colnames(res1))
 res1
 #   variable Min  Mean      Max
 #1     Maths 30   44.66667   64
 #2   Science 20   51.33333   70
 #3   English 30   45.00000   65
 #4 Geography 40   48.00000   54
 #5   History 30   46.66667   60
 #6       Art 20   46.66667   70

或仅使用base R

   res2 <- do.call(`data.frame`,
                aggregate(values~ind, stack(df, select=-1), 
                   FUN=function(x) c(Min=min(x, na.rm=TRUE), Mean=mean(x, na.rm=TRUE),
                              Max=max(x, na.rm=TRUE))))
   colnames(res2) <- gsub(".*\\.", "", colnames(res2))

   res2
   #        ind Min     Mean Max
   #1       Art  20 46.66667  70
   #2   English  30 45.00000  65
   #3 Geography  40 48.00000  54
   #4   History  30 46.66667  60
   #5     Maths  30 44.66667  64
   #6   Science  20 51.33333  70

数据

df <- structure(list(Student = c("George", "Anna", "Scott"), Maths = c(64L, 
40L, 30L), Science = c(70L, 20L, 64L), English = c(40L, 65L, 
30L), Geography = c(50L, 54L, 40L), History = c(60L, 30L, 50L
), Art = c(70L, 50L, 20L)), .Names = c("Student", "Maths", "Science", 
"English", "Geography", "History", "Art"), class = "data.frame", row.names = c(NA, 
-3L))