如何根据多个列字符串操作数据框

时间:2016-08-16 23:16:10

标签: r

我想让col1,col2,col3和col4中的所有字符串都是唯一的,然后将它们的res值放在它前面。所以输出看起来像这样

我希望有这样的输出

output <- structure(list(col1 = structure(c(13L, 14L, 16L, 17L, 27L, 18L, 
26L, 25L, 24L, 4L, 7L, 9L, 11L, 21L, 22L, 23L, 5L, 8L, 10L, 12L, 
15L, 1L, 2L, 3L, 6L, 19L, 20L), .Label = c("A8WFJ8", "A8WFK2", 
"A8WHR6", "A8WHS3", "A8WIT0", "A8XQE0", "A9D0C6", "A9D4S6", "A9D649", 
"A9D8E6", "A9UJN4", "A9Z1L6", "ADliba1", "ADNIL2", "B0M0N9", 
"DFGH2", "GDH76", "ML2IS5", "Q9XXL6", "Q9XXN0", "Q9XXN2", "Q9XXQ4", 
"Q9XXQ6", "QSEA12", "RR2JDG", "T2HDY3", "TR5421"), class = "factor"), 
    res1 = c(3.59e-08, 2.15e-08, 1.52e-07, 1.24e-07, 4.53e-08, 
    3.11e-08, 7.08e-08, 1.98e-08, 1.46e-08, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), res2 = c(8.11e-07, 7.21e-08, 
    0, 4.02e-08, 0, 0, 2.32e-08, 0, 1.46e-08, 3.86e-08, 2.68e-08, 
    2.7e-08, 7.76e-08, 7.76e-08, 7.76e-08, 7.76e-08, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), res3 = c(8.76e-08, 1.4e-07, 0, 2.8e-08, 
    0, 0, 0, 0, 0, 0, 7.85e-08, 0, 0, 0, 0, 0, 2.13e-08, 3.57e-08, 
    1.46e-07, 5.23e-08, 6.44e-08, 0, 0, 0, 0, 0, 0), res4 = c(1.42e-07, 
    8.66e-08, 0, 7.64e-08, 0, 0, 6.28e-07, 0, 0, 0, 7.25e-07, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.26e-05, 8.58e-08, 2.83e-08, 
    3.7e-08, 1.26e-05, 8.58e-08)), .Names = c("col1", "res1", 
"res2", "res3", "res4"), class = "data.frame", row.names = c(NA, 
-27L))

1 个答案:

答案 0 :(得分:2)

首先清理数据

# organizes your "col" and "res" values into different lists
splitDF <- lapply(seq(1, ncol(df), by = 2), 
                  function(x) df[x:(x+1)])
# renames first column to make it easier for the merge
splitDF <- lapply(splitDF, function(x) names(x)[1] <- "col1")
# removes blank lines
splitDF <- lapply(splitDF, function(x) x[complete.cases(x), ])

然后,您可以使用找到here的优秀合并解决方案收集到一个数据框中。

output <- Reduce(function(...) merge(..., all=T), splitDF)

最后,您可以将所有NA值设置为零并对行重新排序。

output[is.na(output)] <- 0
varOrder <- c("ADliba1", "ADNIL2", "DFGH2", "GDH76", "TR5421", "ML2IS5",
              "T2HDY3", "RR2JDG", "QSEA12", "A8WHS3", "A9D0C6", "A9D649", 
              "A9UJN4", "Q9XXN2", "Q9XXQ4", "Q9XXQ6", "A8WIT0", "A9D4S6",
              "A9D8E6", "A9Z1L6", "B0M0N9", "A8WFJ8", "A8WFK2", "A8WHR6", 
              "A8XQE0", "Q9XXL6", "Q9XXN0")
output <- output[match(varOrder, output[["col1"]]), ]