我如何计算许多表中的比值比

时间:2014-05-06 01:28:10

标签: r

我有一张大桌子,像这样:

n1   n2    freq1   freq2   
A    C      33      44
A    C      23      19
R    E      163     56
R    E      32      12
W    Q      111     54
W    Q      12      33

如何计算隔行扫描的赔率?

n1   n2    freq1   freq2   odd_ratio
A    C      33      44       0.61
A    C      23      19       0.61
R    E      163     56       1.09
R    E      32      12       1.09
W    Q      111     54       5.65
W    Q      12      33       5.65

#0.61=(33*19)/(23*44)
#1.09=(163*12)/(32*56)

4 个答案:

答案 0 :(得分:2)

您可以使用split-apply-combine执行此操作:

do.call(rbind, lapply(split(tab, paste(tab$n1, tab$n2)), function(x) {
  x$odd_ratio = (x$freq1[1] * x$freq2[2]) / (x$freq1[2] * x$freq2[1])
  x
}))
#       n1 n2 freq1 freq2 odd_ratio
# A C.1  A  C    33    44 0.6195652
# A C.2  A  C    23    19 0.6195652
# R E.3  R  E   163    56 1.0915179
# R E.4  R  E    32    12 1.0915179
# W Q.5  W  Q   111    54 5.6527778
# W Q.6  W  Q    12    33 5.6527778

答案 1 :(得分:2)

或尝试data.table方法

library(data.table)

# read in the data
    dt <- read.table('n1   n2    freq1   freq2   
A    C      33      44
A    C      23      19
R    E      163     56
R    E      32      12
W    Q      111     54
W    Q      12      33', header=TRUE)

setDT(dt) # make the data frame into a data.table


# one line and done
dt[, odds_ratio:=freq1[1] * freq2[2] / (freq1[2] * freq2[1]), by=c('n1','n2')]

#    n1 n2 freq1 freq2 odds_ratio
# 1:  A  C    33    44  0.6195652
# 2:  A  C    23    19  0.6195652
# 3:  R  E   163    56  1.0915179
# 4:  R  E    32    12  1.0915179
# 5:  W  Q   111    54  5.6527778
# 6:  W  Q    12    33  5.6527778

它也很快:

library(microbenchmark)

microbenchmark(    dt[, odds_ratio:=freq1[1] * freq2[2] / (freq1[2] * freq2[1]), by=c('n1','n2')],
        times=1000L)

#    Unit: milliseconds
#    expr      min       lq          median       uq        max       neval
#     ##       2.367839  2.612129    2.691221     2.838895  16.24584  1000

答案 2 :(得分:0)

如果您不一定希望对一个组重复多次优势比,并且假设所有2x2表紧随其后的行,那么这将有效

step<-seq(1, nrow(dd), by=2)
cbind(dd[step, 1:2], OR=with(dd, 
    freq1[step]*freq2[step+1]/freq2[step]/freq1[step+1]
))

答案 3 :(得分:0)

merge(dat, sapply( split(dat[ , c('freq1','freq2')], dat$n1), 
                          function(dd) dd[1,1]*dd[2,2]/(dd[1,2]*dd[2,1]) ), 
       by.x="n1", by.y="row.names")
#-----------
  n1 n2 freq1 freq2         y
1  A  C    33    44 0.6195652
2  A  C    23    19 0.6195652
3  R  E   163    56 1.0915179
4  R  E    32    12 1.0915179
5  W  Q   111    54 5.6527778
6  W  Q    12    33 5.6527778