我有4个数据集如下:
> dput(data1)
structure(c("Mark_1_Rep_Mean", "Chris_1_Rep_Mean", "Tom_3_Rep_Mean",
"Eric_1_Rep_Mean", "Marcel_2_Rep_Mean", "Grant_1_Rep_Mean",
"Mark_1_Rep1", "Chris_1_Rep1", "Tom_3_Rep1", "Eric_1_Rep1",
"Marcel_2_Rep1", "Grant_1_Rep1", "27772867.153991", "37880613.0075859",
"17708404.9082011", "8688739.67922574", "5728817.17146037", "3828066.3503363"
), .Dim = c(6L, 3L), .Dimnames = list(c("vec_gather", "vec_gather",
"vec_gather", "vec_gather", "vec_gather", "vec_gather"), c("Rep_Mean",
"Rep1", "AUC_Rep1")))
> dput(data2)
structure(c("Nina_1_Rep_Mean", "Chris_1_Rep_Mean", "Tim_3_Rep_Mean",
"Eric_1_Rep_Mean", "Marcel_2_Rep_Mean", "Grant_1_Rep_Mean",
"Nina_1_Rep1", "Chris_1_Rep1", "Tim_3_Rep1", "Eric_1_Rep1",
"Marcel_2_Rep1", "Grant_1_Rep1", "3291992.02338316", "2551728.32427674",
"2979707.83155071", "6171673.62102796", "6931956.13708179", "3778083.59661274"
), .Dim = c(6L, 3L), .Dimnames = list(c("vec_AGI_mono", "vec_gather",
"vec_gather", "vec_gather", "vec_gather", "vec_gather"), c("Rep_Mean",
"Rep2", "AUC_Rep2")))
> dput(data3)
structure(c("Mark_1_Rep_Mean", "Tim_1_Rep_Mean", "Tom_3_Rep_Mean",
"Eric_1_Rep_Mean", "Marcel_2_Rep_Mean", "John_1_Rep_Mean",
"Mark_1_Rep1", "Tim_1_Rep1", "Tom_3_Rep1", "Eric_1_Rep1",
"Marcel_2_Rep1", "John_1_Rep1", "3639230.51627098", "2879829.23738129",
"1529562.30387159", "1963745.90759888", "1975191.01828114", "2663188.06544836"
), .Dim = c(6L, 3L), .Dimnames = list(c("vec_AGI_mono", "vec_gather",
"vec_gather", "vec_gather", "vec_gather", "vec_gather"), c("Rep_Mean",
"Rep3", "AUC_Rep3")))
> dput(data4)
structure(c("Mark_1_Rep_Mean", "Chris_1_Rep_Mean", "Tom_3_Rep_Mean",
"Eric_2_Rep_Mean", "John_2_Rep_Mean", "Grant_1_Rep_Mean",
"Mark_1_Rep1", "Chris_1_Rep1", "Tom_3_Rep1", "Eric_1_Rep1",
"John_2_Rep1", "Grant_1_Rep1", "7320549.56504402", "2478231.1406273",
"2839730.47826704", "2410237.25323096", "31706460.4100607", "30098040.5281682"
), .Dim = c(6L, 3L), .Dimnames = list(c("vec_AGI_mono", "vec_gather",
"vec_gather", "vec_gather", "vec_gather", "vec_gather"), c("Rep_Mean",
"Rep4", "AUC_Rep4")))
我想将它们合并到一个表中,但是在特定条件下。首先,我想从这些数据集中获取所有可能的名称(第一列):
vec_rep1 <- data1[,1]
vec_rep2 <- data2[,1]
vec_rep3 <- data3[,1]
vec_rep4 <- data4[,1]
vec_all <- unique(c(vec_rep1, vec_rep2, vec_rep3, vec_rep4)
那将是我新表中的第一列。现在我想从我拥有的4个数据集中分配其他列。脚本应该在每个数据集中查找新表中第一列中的每个名称(我们在第一步中提取的唯一名称)并放置下一列。当然,并非所有复制品(数据集)中都可以找到所有名称,因此如果缺少某些内容,只需在表中放置NA
。
期望的输出:
Rep_Mean Rep1 AUC_Rep1 Rep2 AUC_Rep2 Rep3 AUC_Rep3 Rep4 AUC_Rep4
"AT1G01050_1_Rep_Mean" "NA or name" "NA or number" "NA or name" "NA or number" "NA or name" "NA or number" "NA or name" "NA or number"
"AT1G01080_1_Rep_Mean" "NA or name" "NA or number " "NA or name" "NA or number" "NA or name" "NA or number" "NA or name" "NA or number"
"AT1G01080_3_Rep_Mean" "NA or name" "NA or number" "NA or name" "NA or number" "NA or name" "NA or number" "NA or name" "NA or number"
答案 0 :(得分:1)
使用full_join
包
dplyr
功能
library(dplyr)
full_join(full_join(full_join(as.data.frame.matrix(data1),as.data.frame.matrix(data2)),as.data.frame.matrix(data3)),as.data.frame.matrix(data4))