在data.frame中找到最高比例

时间:2013-05-14 15:35:13

标签: r dataframe

我有一个如下所示的数据框:

x <- data.frame(sector=rep(1:5, each=2),
                subspecies=rep(c("Type A", "Type B"), 5),
                proportion= c(.2, 1-.2, .3, 1-.3, .4, 
                              1-.4, .5, 1-.5, .6, 1-.6))

x$dominance <- NA

x[,1] <- sort(x[,1])

x
   sector subspecies proportion dominance
1       1     Type A        0.2        NA
2       1     Type B        0.8        NA
3       2     Type A        0.3        NA
4       2     Type B        0.7        NA
5       3     Type A        0.4        NA
6       3     Type B        0.6        NA
7       4     Type A        0.5        NA
8       4     Type B        0.5        NA
9       5     Type A        0.6        NA
10      5     Type B        0.4        NA

在每个扇区1-5中,如果A类比例最高,那么我需要添加一个占优势的&#39;到了“支配地位”如果B类比例最高,那么我需要添加“B”优势&#39;到了“支配地位”柱。如果有平局,我需要添加&#39; tie&#39;到了“支配地位”列。

这应该是输出数据帧:

x$dominance <- c("B dominant", "B dominant", "B dominant", "B dominant", "B dominant", 
                 "B dominant", "tie", "tie", "A dominant", "A dominant")
x
   sector subspecies proportion  dominance
1       1     Type A        0.2 B dominant
2       1     Type B        0.8 B dominant
3       2     Type A        0.3 B dominant
4       2     Type B        0.7 B dominant
5       3     Type A        0.4 B dominant
6       3     Type B        0.6 B dominant
7       4     Type A        0.5        tie
8       4     Type B        0.5        tie
9       5     Type A        0.6 A dominant
10      5     Type B        0.4 A dominant

3 个答案:

答案 0 :(得分:4)

 library(data.table)
 DT <- data.table(x)

 DT[, dominance := {p.a <- proportion[subspecies =="Type A"]
                    p.b <- proportion[subspecies =="Type B"] 
                   if (p.a > p.b) "A dominant" else if (p.b > p.a) "B dominant" else "tie"}
                  , by=sector]


    sector subspecies proportion  dominance
 1:      1     Type A        0.2 B dominant
 2:      1     Type B        0.8 B dominant
 3:      2     Type A        0.3 B dominant
 4:      2     Type B        0.7 B dominant
 5:      3     Type A        0.4 B dominant
 6:      3     Type B        0.6 B dominant
 7:      4     Type A        0.5        tie
 8:      4     Type B        0.5        tie
 9:      5     Type A        0.6 A dominant
10:      5     Type B        0.4 A dominant

答案 1 :(得分:3)

这是基础R解决方案

compare <- function(x) {
  ## return subspecies of max proportion
  res <- x[which(x$proportion == max(x$proportion)), "subspecies"]
  if(length(res) > 1L) { ## if tied length(res) == 2
    out <- "Tie"
  } else { ## Simple string replacement
    out <- paste(sub("Type ", "", res), "Dominant")
    ## or you could use
    #out <- if(res == "Type A") {"A Dominant"} else {"B Dominant")}
  }
  out
}

x$dominance <- unsplit(lapply(split(x, x$sector), compare), x$sector)

> x
   sector subspecies proportion  dominance
1       1     Type A        0.2 B Dominant
2       1     Type B        0.8 B Dominant
3       2     Type A        0.3 B Dominant
4       2     Type B        0.7 B Dominant
5       3     Type A        0.4 B Dominant
6       3     Type B        0.6 B Dominant
7       4     Type A        0.5        Tie
8       4     Type B        0.5        Tie
9       5     Type A        0.6 A Dominant
10      5     Type B        0.4 A Dominant

答案 2 :(得分:2)

以基数R:

do.call(rbind,
    by(x, x$sector, 
       FUN=function(sec) 
           transform(sec, 
                     dominance=if (anyDuplicated(proportion)) 'tie' 
                               else subspecies[which.max(proportion)]))

)
#      sector subspecies proportion dominance
# 1.1       1     Type A        0.2    Type B
# 1.2       1     Type B        0.8    Type B
# 2.3       2     Type A        0.3    Type B
# 2.4       2     Type B        0.7    Type B
# 3.5       3     Type A        0.4    Type B
# 3.6       3     Type B        0.6    Type B
# 4.7       4     Type A        0.5       tie
# 4.8       4     Type B        0.5       tie
# 5.9       5     Type A        0.6    Type A
# 5.10      5     Type B        0.4    Type A

如果这样做可以提高可读性,可以将其分成两部分。

f <- function(sec) 
    transform(sec, dominance=if (anyDuplicated(proportion)) 'tie' 
                             else subspecies[which.max(proportion)]))
do.call(rbind, by(x, x$sector, f))