数据帧R中所有行的函数

时间:2015-06-21 20:09:00

标签: r

我有一个问题,

我丢弃了一个数据框:

            MInc      t0.01       t0.1         t0.2       t0.5       t0.8
A4GALT 0.1605681 0.16056814 0.27212265 0.3490585760 0.07486080         NA
AAAS   0.2992754 0.04794018 0.09057540 0.0001127665         NA         NA
AADAC  0.3027883 0.30278830 0.30278830 0.3027883033 0.30278830 0.28501358
AASS   0.1307001 0.12665125 0.12665125 0.1266512501 0.12665125 0.21474030
AATF   0.1453662 0.09392991         NA           NA         NA         NA
AATK   0.2567986 0.11338287 0.11338287 0.1133828663 0.11338287 0.11093412
ABCA8  0.1577148 0.07236169 0.07236169 0.1420553677 0.06185948 0.04529619
ABCB10 0.1437084 0.04794018         NA           NA         NA         NA
ABCB8  0.1022297 0.05264867         NA           NA         NA         NA
ABCC8  0.1577148 0.13209778 0.13209778 0.1320977809 0.07740797         NA
ABCD2  0.1453662 0.26370072 0.23850217 0.0572556220         NA         NA
ABCG2  0.1453662 0.08066152 0.04904863 0.0452961926 0.04529619         NA
ABHD12 0.1062786 0.13495108         NA           NA         NA         NA

我正在搜索用于在该行的第一列中的相应值之间划分每行(省略第一列)的最小值的命令。

输出必须是一个数据框,其中一列包含每行的此值。例如,对于我的数据的第一行,计算将是:

A4GALT 0.07486080/0.1605681 = 0.4662246.

谢谢!

3 个答案:

答案 0 :(得分:3)

我们可以使用pmin来获取每行的最小值。如果你需要划分' min'每个row的值与数据集的所有列,我们可以复制使用pmin获得的结果。一种选择是使用row(df1)建立索引并除以' df1'。

do.call(pmin, c(df1[-1], na.rm=TRUE))[row(df1)]/df1

或者如果您只需要第1列,我们除以' df1'的子集。 (使用选项drop=FALSE以避免转换为vector)。

 do.call(pmin, c(df1[-1], na.rm=TRUE))/df1[,1,drop=FALSE] 
 #                 MInc
 #  A4GALT 0.4662246112
 #  AAAS   0.0003767984
 #  AADAC  0.9412965428
 #  AASS   0.9690218294
 #  AATF   0.6461605930
 #  AATK   0.4319888037
 #  ABCA8  0.2872031667
 #  ABCB10 0.3335934434
 #  ABCB8  0.5150036633
 #  ABCC8  0.4908098035
 #  ABCD2  0.3938716290
 #  ABCG2  0.3116005646
 #  ABHD12 1.2697860152

注意:我使用df1[-1]作为提及的关于省略第一列的OP。但是如果OP假设省略了rownames,那么代码就是

   do.call(pmin, c(df1, na.rm=TRUE))/df1[,1,drop=FALSE] 

基准

set.seed(238)
m1 <- matrix(rnorm(5000*5000), ncol=5000)
df2 <- as.data.frame(m1)
jalapic <- function() apply(m1[,-1], 1, min, na.rm=TRUE)/m1[,1]
thomas <- function() rowMins(m1[,-1], na.rm=TRUE)/m1[,1]
akrun <- function() do.call(pmin, c(df2[,-1], na.rm=TRUE))/df2[,1]

microbenchmark(jalapic(), thomas(), akrun(), unit='relative', times=20L)
#Unit: relative
#    expr      min       lq     mean   median       uq       max neval cld
#jalapic() 2.255453 2.224805 2.088557 2.145412 2.133398 1.9793887    20   b
#thomas() 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000    20  a 
# akrun() 1.248002 1.227203 1.133792 1.212745 1.174489 0.8228857    20  a 

数据

df1 <- structure(list(MInc = c(0.1605681, 0.2992754, 0.3027883, 0.1307001, 
0.1453662, 0.2567986, 0.1577148, 0.1437084, 0.1022297, 0.1577148, 
0.1453662, 0.1453662, 0.1062786), t0.01 = c(0.16056814, 0.04794018, 
0.3027883, 0.12665125, 0.09392991, 0.11338287, 0.07236169, 0.04794018, 
0.05264867, 0.13209778, 0.26370072, 0.08066152, 0.13495108), 
    t0.1 = c(0.27212265, 0.0905754, 0.3027883, 0.12665125, NA, 
    0.11338287, 0.07236169, NA, NA, 0.13209778, 0.23850217, 0.04904863, 
    NA), t0.2 = c(0.349058576, 0.0001127665, 0.3027883033, 0.1266512501, 
    NA, 0.1133828663, 0.1420553677, NA, NA, 0.1320977809, 0.057255622, 
    0.0452961926, NA), t0.5 = c(0.0748608, NA, 0.3027883, 0.12665125, 
    NA, 0.11338287, 0.06185948, NA, NA, 0.07740797, NA, 0.04529619, 
    NA), t0.8 = c(NA, NA, 0.28501358, 0.2147403, NA, 0.11093412, 
    0.04529619, NA, NA, NA, NA, NA, NA)), .Names = c("MInc", 
"t0.01", "t0.1", "t0.2", "t0.5", "t0.8"), class = "data.frame",
 row.names = c("A4GALT", 
"AAAS", "AADAC", "AASS", "AATF", "AATK", "ABCA8", "ABCB10", "ABCB8", 
"ABCC8", "ABCD2", "ABCG2", "ABHD12"))

答案 1 :(得分:2)

你可以这样做:

apply(df[2:6], 1, min, na.rm=T) / df[,1]

     A4GALT         AAAS        AADAC         AASS         AATF         AATK        ABCA8       ABCB10        ABCB8        ABCC8        ABCD2        ABCG2 
0.4662246112 0.0003767984 0.9412965428 0.9690218294 0.6461605930 0.4319888037 0.2872031667 0.3335934434 0.5150036633 0.4908098035 0.3938716290 0.3116005646 
      ABHD12 
1.2697860152 

答案 2 :(得分:1)

这里的library("matrixStats") rowMins(as.matrix(df1[,-1]), na.rm = TRUE)/df1[,1] # [1] 0.4662246112 0.0003767984 0.9412965428 0.9690218294 0.6461605930 # [6] 0.4319888037 0.2872031667 0.3335934434 0.5150036633 0.4908098035 # [11] 0.3938716290 0.3116005646 1.2697860152 包可能会有所帮助,因为它针对这些操作进行了优化。

library("microbenchmark")
jalapic <- function(d) apply(d[,2:6], 1, min, na.rm = TRUE) / d[,1]
thomas <- function(d) rowMins(as.matrix(d[,-1]), na.rm = TRUE)/d[,1]
akrun <- function(d) do.call(pmin, list(d[,-1], na.rm = TRUE))/d[,1]
microbenchmark(jalapic(df1), akrun(df1), thomas(df1))
# Unit: microseconds
#       expr     min       lq     mean   median       uq      max neval
#  jalapic() 232.471 242.6705 260.6561 255.5640 273.4615  336.775   100
#    akrun() 521.904 555.5815 606.8519 580.0215 602.7295 2430.161   100
#   thomas() 159.727 167.0405 188.5057 175.8935 203.4120  341.393   100

这里有一些基准测试:

set.seed(123)
df2 <- matrix(rnorm(1e5), nrow = 1000)
microbenchmark(jalapic(df2), akrun(df2), thomas(df2))
# Unit: milliseconds
#          expr      min       lq     mean   median       uq      max neval
#  jalapic(df2) 1.871308 1.951365 2.049041 1.997358 2.052397 3.811125   100
#    akrun(df2) 2.400140 2.691882 3.250569 2.725560 4.373634 4.632084   100
#   thomas(df2) 1.256649 1.367110 1.623601 1.588996 1.610165 3.491672   100

这里是更大数据集的可比基准:

https://www.example.com/index?q=aoiehgoaiwghe&p=21490719