So this is what I do I total up their grades by using this, there are more columns than this
test<-with(data,table(Student,Subject))
test <- cbind(test)
test <- as.data.frame(test)
row.names Maths Science English Geography History Art ...
1 George 64 70 40 50 60 70
2 Anna 40 20 65 54 30 50
3 Scott 30 64 30 40 50 20
...
Summarize <- data.frame(
aggregate(.~Maths, data=test, min),
aggregate(.~English, data=test, max),
aggregate(.~Science, data=test, mean))
有没有办法选择所有列本身并将列聚合(范围和平均值)到新数据帧?
Min Mean Max
Maths 30 60 90
Science
English
Geography
...
提前致谢!
答案 0 :(得分:1)
尝试:
library(dplyr)
library(tidyr)
df %>%
summarise_each(funs(min=min(., na.rm=TRUE), max=max(., na.rm=TRUE),
mean=mean(., na.rm=TRUE)), -Student) %>%
gather(Var, Value, Maths_min:Art_mean) %>%
separate(Var, c("Subject", "Var")) %>%
spread(Var, Value)
# Subject max mean min
#1 Art 70 46.66667 20
#2 English 65 45.00000 30
#3 Geography 54 48.00000 40
#4 History 60 46.66667 30
#5 Maths 64 44.66667 30
#6 Science 70 51.33333 20
或者您可以将aggregate
与melt
library(reshape2)
res <- aggregate(value~variable, melt(df, id="Student"),
FUN=function(x) c(Min=min(x, na.rm=TRUE), Mean=mean(x, na.rm=TRUE),
Max=max(x, na.rm=TRUE)))
res1 <- do.call(`data.frame`, res)
colnames(res1) <- gsub(".*\\.", "", colnames(res1))
res1
# variable Min Mean Max
#1 Maths 30 44.66667 64
#2 Science 20 51.33333 70
#3 English 30 45.00000 65
#4 Geography 40 48.00000 54
#5 History 30 46.66667 60
#6 Art 20 46.66667 70
或仅使用base R
res2 <- do.call(`data.frame`,
aggregate(values~ind, stack(df, select=-1),
FUN=function(x) c(Min=min(x, na.rm=TRUE), Mean=mean(x, na.rm=TRUE),
Max=max(x, na.rm=TRUE))))
colnames(res2) <- gsub(".*\\.", "", colnames(res2))
res2
# ind Min Mean Max
#1 Art 20 46.66667 70
#2 English 30 45.00000 65
#3 Geography 40 48.00000 54
#4 History 30 46.66667 60
#5 Maths 30 44.66667 64
#6 Science 20 51.33333 70
df <- structure(list(Student = c("George", "Anna", "Scott"), Maths = c(64L,
40L, 30L), Science = c(70L, 20L, 64L), English = c(40L, 65L,
30L), Geography = c(50L, 54L, 40L), History = c(60L, 30L, 50L
), Art = c(70L, 50L, 20L)), .Names = c("Student", "Maths", "Science",
"English", "Geography", "History", "Art"), class = "data.frame", row.names = c(NA,
-3L))