这是我的数据结构:
structure(list(UDD_beta = c(1.17136554204268, 0.939587997289016
), UDD_pval = c(0, 0), UDD_R.sq = c(0.749044972637797, 0.516943886705951
), SSX_beta = c(1.05356804780772, 0.927948300464624), SSX_pval = c(0,
0), SSX_R.sq = c(0.60226298037862, 0.629111666509209), SPP_beta = c(0.675765151939885,
0.516425218613404), SPP_pval = c(0, 0), SPP_R.sq = c(0.479849538274406,
0.378266618442121), EEE_beta = c(0.690521022226874, 0.639380962824289
), EEE_pval = c(0, 0), EEE_R.sq = c(0.585610742768951, 0.676073352909597
)), .Names = c("UDD_beta", "UDD_pval", "UDD_R.sq", "SSX_beta",
"SSX_pval", "SSX_R.sq", "SPP_beta", "SPP_pval", "SPP_R.sq",
"EEE_beta", "EEE_pval", "EEE_R.sq"), row.names = c("DDK", "DDL"
), class = "data.frame")
我想取R.sq
列,每行找到最大值的最大值和列名。然后采取相应的beta
。预期产出:
Name Value
DDK UDD 1.17136554204268
DDL EEE 0.690521022226874
抱歉,第二个预期值应为0.639380962824289
。
答案 0 :(得分:2)
我们可以使用max.col
。使用grep
对感兴趣的列(即具有“R.sq”的列)进行子集,然后使用max
获取max.col
值的列索引。使用它来获取列名称以及与特定行(row/column
索引)对应的值
i1 <- grep("R.sq", names(df1))
i2 <- max.col(df1[i1], "first")
i3 <- grep("beta", names(df1))
res <- data.frame(Names = sub("_.*", "", names(df1)[i1][i2]),
Value = df1[i3][cbind(1:nrow(df1), i2)])
row.names(res) <- row.names(df1)
答案 1 :(得分:2)
你可以通过tidyverse
你的df使用gather
方法来加长并过滤R.sq
个变量和最大值,即
library(tidyverse)
df %>%
rownames_to_column('ID') %>%
gather(var, val, -ID) %>%
filter(grepl('R.sq|beta', var)) %>%
group_by(ID) %>%
mutate(max1=as.integer(val == max(val[grepl('R.sq', var)]))) %>%
group_by(ID, grp = sub('_.*', '', var)) %>%
filter(!all(max1 == 0) & grepl('beta', var)) %>%
ungroup() %>% select(-c(max1, grp))
给出,
# A tibble: 2 x 3 ID var val <chr> <chr> <dbl> 1 DDK UDD_beta 1.171366 2 DDL EEE_beta 0.639381
答案 2 :(得分:2)
sub_data <- data[grep("R.sq", colnames(data))]
colnames(sub_data) <- gsub("_R.sq", "", colnames(sub_data))
sub_data$Name <- NA
sub_data$Value <- NA
for (i in 1:nrow(sub_data)){
sub_data$Name[i] <- names(sub_data[i,])[which.max(apply(sub_data[i,], 2, max))]
sub_data$Value[i] <- max(data[grep(paste0(sub_data$Name[i], "_beta"), colnames(data))], na.rm=T)
}
sub_data[c("Name", "Value")]
# Name Value
#DDK UDD 1.171366
#DDL EEE 0.690521
答案 3 :(得分:1)
# Need ID for all possible betas and Rsq
ID <- gsub("_R.sq", "", grep("_R.sq$", names(INPUT), value = TRUE))
dummy <- function(x) {
# Find out which Rsq is largest
i <- ID[which.max(x[paste0(ID, "_R.sq")])]
# Extract beta for largest Rsq
data.frame(Name = i, Value = x[paste0(i, "_beta")])
}
do.call("rbind", apply(INPUT, 1, dummy))