查找特定列的最大行数,并从另一列中提取列名称和相应的行值

时间:2017-08-17 12:08:28

标签: r dataframe

这是我的数据结构:

structure(list(UDD_beta = c(1.17136554204268, 0.939587997289016
), UDD_pval = c(0, 0), UDD_R.sq = c(0.749044972637797, 0.516943886705951
), SSX_beta = c(1.05356804780772, 0.927948300464624), SSX_pval = c(0, 
0), SSX_R.sq = c(0.60226298037862, 0.629111666509209), SPP_beta = c(0.675765151939885, 
0.516425218613404), SPP_pval = c(0, 0), SPP_R.sq = c(0.479849538274406, 
0.378266618442121), EEE_beta = c(0.690521022226874, 0.639380962824289
), EEE_pval = c(0, 0), EEE_R.sq = c(0.585610742768951, 0.676073352909597
)), .Names = c("UDD_beta", "UDD_pval", "UDD_R.sq", "SSX_beta", 
"SSX_pval", "SSX_R.sq", "SPP_beta", "SPP_pval", "SPP_R.sq", 
"EEE_beta", "EEE_pval", "EEE_R.sq"), row.names = c("DDK", "DDL"
), class = "data.frame")

我想取R.sq列,每行找到最大值的最大值和列名。然后采取相应的beta。预期产出:

    Name Value
DDK UDD 1.17136554204268
DDL EEE 0.690521022226874

抱歉,第二个预期值应为0.639380962824289

4 个答案:

答案 0 :(得分:2)

我们可以使用max.col。使用grep对感兴趣的列(即具有“R.sq”的列)进行子集,然后使用max获取max.col值的列索引。使用它来获取列名称以及与特定行(row/column索引)对应的值

i1 <- grep("R.sq", names(df1))
i2 <- max.col(df1[i1], "first")
i3 <- grep("beta", names(df1))
res <- data.frame(Names = sub("_.*", "", names(df1)[i1][i2]), 
               Value = df1[i3][cbind(1:nrow(df1), i2)])
row.names(res) <- row.names(df1)

答案 1 :(得分:2)

你可以通过tidyverse你的df使用gather方法来加长并过滤R.sq个变量和最大值,即

library(tidyverse)

df %>% 
 rownames_to_column('ID') %>% 
 gather(var, val, -ID) %>% 
 filter(grepl('R.sq|beta', var)) %>% 
 group_by(ID) %>% 
 mutate(max1=as.integer(val == max(val[grepl('R.sq', var)]))) %>% 
 group_by(ID, grp = sub('_.*', '', var)) %>% 
 filter(!all(max1 == 0) & grepl('beta', var)) %>% 
 ungroup() %>% select(-c(max1, grp))

给出,

# A tibble: 2 x 3
     ID      var      val
  <chr>    <chr>    <dbl>
1   DDK UDD_beta 1.171366
2   DDL EEE_beta 0.639381

答案 2 :(得分:2)

sub_data <- data[grep("R.sq", colnames(data))]
colnames(sub_data) <- gsub("_R.sq", "", colnames(sub_data))
sub_data$Name <- NA
sub_data$Value <- NA
for (i in 1:nrow(sub_data)){
  sub_data$Name[i] <- names(sub_data[i,])[which.max(apply(sub_data[i,], 2, max))]
  sub_data$Value[i] <- max(data[grep(paste0(sub_data$Name[i], "_beta"), colnames(data))], na.rm=T)
}
sub_data[c("Name", "Value")]
#    Name    Value
#DDK  UDD 1.171366
#DDL  EEE 0.690521

答案 3 :(得分:1)

# Need ID for all possible betas and Rsq
ID <- gsub("_R.sq", "", grep("_R.sq$", names(INPUT), value = TRUE))
dummy <- function(x) {
    # Find out which Rsq is largest
    i <- ID[which.max(x[paste0(ID, "_R.sq")])]
    # Extract beta for largest Rsq
    data.frame(Name = i, Value = x[paste0(i, "_beta")])
}
do.call("rbind", apply(INPUT, 1, dummy))