根据行中的名称收集4个数据集

时间:2016-07-07 13:46:33

标签: r

我有4个数据集如下:

> dput(data1)
structure(c("Mark_1_Rep_Mean", "Chris_1_Rep_Mean", "Tom_3_Rep_Mean", 
            "Eric_1_Rep_Mean", "Marcel_2_Rep_Mean", "Grant_1_Rep_Mean", 
            "Mark_1_Rep1", "Chris_1_Rep1", "Tom_3_Rep1", "Eric_1_Rep1", 
            "Marcel_2_Rep1", "Grant_1_Rep1", "27772867.153991", "37880613.0075859", 
            "17708404.9082011", "8688739.67922574", "5728817.17146037", "3828066.3503363"
), .Dim = c(6L, 3L), .Dimnames = list(c("vec_gather", "vec_gather", 
                                        "vec_gather", "vec_gather", "vec_gather", "vec_gather"), c("Rep_Mean", 
                                                                                                   "Rep1", "AUC_Rep1")))

> dput(data2)
structure(c("Nina_1_Rep_Mean", "Chris_1_Rep_Mean", "Tim_3_Rep_Mean", 
            "Eric_1_Rep_Mean", "Marcel_2_Rep_Mean", "Grant_1_Rep_Mean", 
            "Nina_1_Rep1", "Chris_1_Rep1", "Tim_3_Rep1", "Eric_1_Rep1", 
            "Marcel_2_Rep1", "Grant_1_Rep1", "3291992.02338316", "2551728.32427674", 
            "2979707.83155071", "6171673.62102796", "6931956.13708179", "3778083.59661274"
), .Dim = c(6L, 3L), .Dimnames = list(c("vec_AGI_mono", "vec_gather", 
                                        "vec_gather", "vec_gather", "vec_gather", "vec_gather"), c("Rep_Mean", 
                                                                                                   "Rep2", "AUC_Rep2")))

> dput(data3)
structure(c("Mark_1_Rep_Mean", "Tim_1_Rep_Mean", "Tom_3_Rep_Mean", 
            "Eric_1_Rep_Mean", "Marcel_2_Rep_Mean", "John_1_Rep_Mean", 
            "Mark_1_Rep1", "Tim_1_Rep1", "Tom_3_Rep1", "Eric_1_Rep1", 
            "Marcel_2_Rep1", "John_1_Rep1", "3639230.51627098", "2879829.23738129", 
            "1529562.30387159", "1963745.90759888", "1975191.01828114", "2663188.06544836"
), .Dim = c(6L, 3L), .Dimnames = list(c("vec_AGI_mono", "vec_gather", 
                                        "vec_gather", "vec_gather", "vec_gather", "vec_gather"), c("Rep_Mean", 
                                                                                                   "Rep3", "AUC_Rep3")))

> dput(data4)
structure(c("Mark_1_Rep_Mean", "Chris_1_Rep_Mean", "Tom_3_Rep_Mean", 
            "Eric_2_Rep_Mean", "John_2_Rep_Mean", "Grant_1_Rep_Mean", 
            "Mark_1_Rep1", "Chris_1_Rep1", "Tom_3_Rep1", "Eric_1_Rep1", 
            "John_2_Rep1", "Grant_1_Rep1", "7320549.56504402", "2478231.1406273", 
            "2839730.47826704", "2410237.25323096", "31706460.4100607", "30098040.5281682"
), .Dim = c(6L, 3L), .Dimnames = list(c("vec_AGI_mono", "vec_gather", 
                                        "vec_gather", "vec_gather", "vec_gather", "vec_gather"), c("Rep_Mean", 
                                                                                                   "Rep4", "AUC_Rep4")))

我想将它们合并到一个表中,但是在特定条件下。首先,我想从这些数据集中获取所有可能的名称(第一列):

vec_rep1 <- data1[,1]
vec_rep2 <- data2[,1]
vec_rep3 <- data3[,1]
vec_rep4 <- data4[,1]

vec_all <- unique(c(vec_rep1, vec_rep2, vec_rep3, vec_rep4)

那将是我新表中的第一列。现在我想从我拥有的4个数据集中分配其他列。脚本应该在每个数据集中查找新表中第一列中的每个名称(我们在第一步中提取的唯一名称)并放置下一列。当然,并非所有复制品(数据集)中都可以找到所有名称,因此如果缺少某些内容,只需在表中放置NA

期望的输出:

Rep_Mean               Rep1               AUC_Rep1      Rep2        AUC_Rep2          Rep3            AUC_Rep3          Rep4            AUC_Rep4
"AT1G01050_1_Rep_Mean" "NA or name" "NA or number"    "NA or name"  "NA or number"   "NA or name"     "NA or number"    "NA or name"      "NA or number"
"AT1G01080_1_Rep_Mean" "NA or name" "NA or number "   "NA or name"  "NA or number"   "NA or name"     "NA or number"    "NA or name"     "NA or number"
"AT1G01080_3_Rep_Mean" "NA or name" "NA or number"    "NA or name"  "NA or number"   "NA or name"     "NA or number"    "NA or name"     "NA or number"

1 个答案:

答案 0 :(得分:1)

使用full_join

dplyr功能
library(dplyr)
full_join(full_join(full_join(as.data.frame.matrix(data1),as.data.frame.matrix(data2)),as.data.frame.matrix(data3)),as.data.frame.matrix(data4))