我有一个问题,
我丢弃了一个数据框:
MInc t0.01 t0.1 t0.2 t0.5 t0.8
A4GALT 0.1605681 0.16056814 0.27212265 0.3490585760 0.07486080 NA
AAAS 0.2992754 0.04794018 0.09057540 0.0001127665 NA NA
AADAC 0.3027883 0.30278830 0.30278830 0.3027883033 0.30278830 0.28501358
AASS 0.1307001 0.12665125 0.12665125 0.1266512501 0.12665125 0.21474030
AATF 0.1453662 0.09392991 NA NA NA NA
AATK 0.2567986 0.11338287 0.11338287 0.1133828663 0.11338287 0.11093412
ABCA8 0.1577148 0.07236169 0.07236169 0.1420553677 0.06185948 0.04529619
ABCB10 0.1437084 0.04794018 NA NA NA NA
ABCB8 0.1022297 0.05264867 NA NA NA NA
ABCC8 0.1577148 0.13209778 0.13209778 0.1320977809 0.07740797 NA
ABCD2 0.1453662 0.26370072 0.23850217 0.0572556220 NA NA
ABCG2 0.1453662 0.08066152 0.04904863 0.0452961926 0.04529619 NA
ABHD12 0.1062786 0.13495108 NA NA NA NA
我正在搜索用于在该行的第一列中的相应值之间划分每行(省略第一列)的最小值的命令。
输出必须是一个数据框,其中一列包含每行的此值。例如,对于我的数据的第一行,计算将是:
A4GALT 0.07486080/0.1605681 = 0.4662246.
谢谢!
答案 0 :(得分:3)
我们可以使用pmin
来获取每行的最小值。如果你需要划分' min'每个row
的值与数据集的所有列,我们可以复制使用pmin
获得的结果。一种选择是使用row(df1)
建立索引并除以' df1'。
do.call(pmin, c(df1[-1], na.rm=TRUE))[row(df1)]/df1
或者如果您只需要第1列,我们除以' df1'的子集。 (使用选项drop=FALSE
以避免转换为vector
)。
do.call(pmin, c(df1[-1], na.rm=TRUE))/df1[,1,drop=FALSE]
# MInc
# A4GALT 0.4662246112
# AAAS 0.0003767984
# AADAC 0.9412965428
# AASS 0.9690218294
# AATF 0.6461605930
# AATK 0.4319888037
# ABCA8 0.2872031667
# ABCB10 0.3335934434
# ABCB8 0.5150036633
# ABCC8 0.4908098035
# ABCD2 0.3938716290
# ABCG2 0.3116005646
# ABHD12 1.2697860152
注意:我使用df1[-1]
作为提及的关于省略第一列的OP。但是如果OP假设省略了rownames,那么代码就是
do.call(pmin, c(df1, na.rm=TRUE))/df1[,1,drop=FALSE]
set.seed(238)
m1 <- matrix(rnorm(5000*5000), ncol=5000)
df2 <- as.data.frame(m1)
jalapic <- function() apply(m1[,-1], 1, min, na.rm=TRUE)/m1[,1]
thomas <- function() rowMins(m1[,-1], na.rm=TRUE)/m1[,1]
akrun <- function() do.call(pmin, c(df2[,-1], na.rm=TRUE))/df2[,1]
microbenchmark(jalapic(), thomas(), akrun(), unit='relative', times=20L)
#Unit: relative
# expr min lq mean median uq max neval cld
#jalapic() 2.255453 2.224805 2.088557 2.145412 2.133398 1.9793887 20 b
#thomas() 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 20 a
# akrun() 1.248002 1.227203 1.133792 1.212745 1.174489 0.8228857 20 a
df1 <- structure(list(MInc = c(0.1605681, 0.2992754, 0.3027883, 0.1307001,
0.1453662, 0.2567986, 0.1577148, 0.1437084, 0.1022297, 0.1577148,
0.1453662, 0.1453662, 0.1062786), t0.01 = c(0.16056814, 0.04794018,
0.3027883, 0.12665125, 0.09392991, 0.11338287, 0.07236169, 0.04794018,
0.05264867, 0.13209778, 0.26370072, 0.08066152, 0.13495108),
t0.1 = c(0.27212265, 0.0905754, 0.3027883, 0.12665125, NA,
0.11338287, 0.07236169, NA, NA, 0.13209778, 0.23850217, 0.04904863,
NA), t0.2 = c(0.349058576, 0.0001127665, 0.3027883033, 0.1266512501,
NA, 0.1133828663, 0.1420553677, NA, NA, 0.1320977809, 0.057255622,
0.0452961926, NA), t0.5 = c(0.0748608, NA, 0.3027883, 0.12665125,
NA, 0.11338287, 0.06185948, NA, NA, 0.07740797, NA, 0.04529619,
NA), t0.8 = c(NA, NA, 0.28501358, 0.2147403, NA, 0.11093412,
0.04529619, NA, NA, NA, NA, NA, NA)), .Names = c("MInc",
"t0.01", "t0.1", "t0.2", "t0.5", "t0.8"), class = "data.frame",
row.names = c("A4GALT",
"AAAS", "AADAC", "AASS", "AATF", "AATK", "ABCA8", "ABCB10", "ABCB8",
"ABCC8", "ABCD2", "ABCG2", "ABHD12"))
答案 1 :(得分:2)
你可以这样做:
apply(df[2:6], 1, min, na.rm=T) / df[,1]
A4GALT AAAS AADAC AASS AATF AATK ABCA8 ABCB10 ABCB8 ABCC8 ABCD2 ABCG2
0.4662246112 0.0003767984 0.9412965428 0.9690218294 0.6461605930 0.4319888037 0.2872031667 0.3335934434 0.5150036633 0.4908098035 0.3938716290 0.3116005646
ABHD12
1.2697860152
答案 2 :(得分:1)
这里的library("matrixStats")
rowMins(as.matrix(df1[,-1]), na.rm = TRUE)/df1[,1]
# [1] 0.4662246112 0.0003767984 0.9412965428 0.9690218294 0.6461605930
# [6] 0.4319888037 0.2872031667 0.3335934434 0.5150036633 0.4908098035
# [11] 0.3938716290 0.3116005646 1.2697860152
包可能会有所帮助,因为它针对这些操作进行了优化。
library("microbenchmark")
jalapic <- function(d) apply(d[,2:6], 1, min, na.rm = TRUE) / d[,1]
thomas <- function(d) rowMins(as.matrix(d[,-1]), na.rm = TRUE)/d[,1]
akrun <- function(d) do.call(pmin, list(d[,-1], na.rm = TRUE))/d[,1]
microbenchmark(jalapic(df1), akrun(df1), thomas(df1))
# Unit: microseconds
# expr min lq mean median uq max neval
# jalapic() 232.471 242.6705 260.6561 255.5640 273.4615 336.775 100
# akrun() 521.904 555.5815 606.8519 580.0215 602.7295 2430.161 100
# thomas() 159.727 167.0405 188.5057 175.8935 203.4120 341.393 100
这里有一些基准测试:
set.seed(123)
df2 <- matrix(rnorm(1e5), nrow = 1000)
microbenchmark(jalapic(df2), akrun(df2), thomas(df2))
# Unit: milliseconds
# expr min lq mean median uq max neval
# jalapic(df2) 1.871308 1.951365 2.049041 1.997358 2.052397 3.811125 100
# akrun(df2) 2.400140 2.691882 3.250569 2.725560 4.373634 4.632084 100
# thomas(df2) 1.256649 1.367110 1.623601 1.588996 1.610165 3.491672 100
这里是更大数据集的可比基准:
https://www.example.com/index?q=aoiehgoaiwghe&p=21490719