在R中,如何按组添加最大值?

时间:2012-07-26 18:18:16

标签: r

我想使用R在我的数据集中创建一个新列,其中包含每个唯一组的最大值。我的数据如下所示:

group<-c("A","A","A","A","A","B","B","C","C","C")
replicate<-c(1,2,3,4,5,1,2,1,2,3)
x<-data.frame(cbind(group,replicate))

我想创建第三列,如下所示 - 每组的最大值。

group   replicate max.per.group
A       1         5       
A       2         5
A       3         5
A       4         5
A       5         5
B       1         2
B       2         2
C       1         3
C       2         3
C       3         3

7 个答案:

答案 0 :(得分:4)

如果您先重新定义xcbind生成两个列因子),

x<-data.frame(group,replicate)

你可以用这个:

merge(x,aggregate(replicate~group,x,FUN=max),all.x=TRUE,by="group")
   group replicate.x replicate.y
1      A           1           5
2      A           2           5
3      A           3           5
4      A           4           5
5      A           5           5
6      B           1           2
7      B           2           2
8      C           1           3
9      C           2           3
10     C           3           3

答案 1 :(得分:3)

尝试

# This is how you create your data.frame
group<-c("A","A","A","A","A","B","B","C","C","C")
replicate<-c(1,2,3,4,5,1,2,1,2,3)
x<-data.frame(group,replicate) # here you don't need c()

# Here's my solution    
Max <- tapply(x$replicate, x$group,max)
data.frame(x, max.per.group=rep(Max, table(x$group)))
 group replicate max.per.group
1      A         1             5
2      A         2             5
3      A         3             5
4      A         4             5
5      A         5             5
6      B         1             2
7      B         2             2
8      C         1             3
9      C         2             3
10     C         3             3

答案 2 :(得分:3)

这是另一个基础R解决方案:

cbind(x, cummax=unlist(tapply(x$replicate, x$group, function(x) rep(max(x), length(x)))))
   group replicate cummax
A1     A         1      5
A2     A         2      5
A3     A         3      5
A4     A         4      5
A5     A         5      5
B1     B         1      2
B2     B         2      2
C1     C         1      3
C2     C         2      3
C3     C         3      3

答案 3 :(得分:2)

您可以使用plyr包:

library(plyr)
> ddply(x, .(group), transform, max.per.group=max(replicate))
   group replicate max.per.group
1      A         1             5
2      A         2             5
3      A         3             5
4      A         4             5
5      A         5             5
6      B         1             2
7      B         2             2
8      C         1             3
9      C         2             3
10     C         3             3
> 

答案 4 :(得分:2)

共享的可重现示例表明您将列作为因子。我们需要先将它们转换为数字。

我们可以尝试使用基础R ave。找出每组的最大值。

x$max.per.group <- ave(x$replicate, x$group, FUN = function(x) max(as.numeric(x)))

#   group replicate max.per.group
#1      A         1             5
#2      A         2             5
#3      A         3             5
#4      A         4             5
#5      A         5             5
#6      B         1             2
#7      B         2             2
#8      C         1             3
#9      C         2             3
#10     C         3             3

dplyr

的另一个选项
library(dplyr)
x %>%
   group_by(group) %>%
   mutate(max.per.group = max(as.numeric(replicate)))

答案 5 :(得分:1)

您可以使用rle - Run Length Encoding

# Create the data.frame
group <- c("A","A","A","A","A","B","B","C","C","C")
replicate <- c(1,2,3,4,5,1,2,1,2,3)
x <- data.frame(group,replicate)

# using 'rle'
z <- rle(as.numeric(x$group))$lengths
x$max.per.group <- rep(z, z)
x

答案 6 :(得分:0)

我们可以使用data.table分配(:=)来创建一个无需复制的列

library(data.table)
setDT(x)[, max.per.group := max(replicate), by = group]
x
#     group replicate max.per.group
#1:     A         1             5
#2:     A         2             5
#3:     A         3             5
#4:     A         4             5
#5:     A         5             5
#6:     B         1             2
#7:     B         2             2
#8:     C         1             3
#9:     C         2             3
#10:    C         3             3

数据

x <- data.frame(group,replicate)