我已从全局环境中的文件夹中加载了尽可能多的文本文件
现在我想将它们合并到一个数据框
作为一个例子,我提到了3个数据集和一个愿望输出
df1<- structure(list(V1 = structure(c(9L, 15L, 3L, 2L, 12L, 7L, 8L,
10L, 4L, 5L, 1L, 6L, 16L, 11L, 14L, 13L, 17L), .Label = c("ASS1",
"CLCN4", "CXorf56", "DNAL4", "ELAC2", "IPP", "MMP15", "MTMR14",
"NIPSNAP3B", "NPR1", "POLR2J", "PWP2", "RGS4", "SEC23IP", "TF",
"TMEM59", "UQCRC1"), class = "factor"), V2 = c(3.771321309, 4.121988898,
5.555893632, 4.586876086, 6.279490572, 6.004261107, 6.613729673,
5.185145989, 5.63567329, 5.785365957, 9.018526719, 5.734111507,
9.809870554, 9.09813781, 5.643864005, 4.540559556, 9.375200415
)), .Names = c("V1", "V2"), class = "data.frame", row.names = c(NA,
-17L))
第二个df,如下所示
df2<- structure(list(V1 = structure(c(2L, 3L, 1L), .Label = c("CXorf56",
"NIPSNAP3B", "TF"), class = "factor"), V2 = c(3.771321309, 4.121988898,
5.555893632)), .Names = c("V1", "V2"), class = "data.frame", row.names = c(NA,
-3L))
和第三个df如下
df3<- structure(list(V1 = structure(c(6L, 7L, 11L, 17L, 3L, 2L, 14L,
9L, 10L, 12L, 4L, 5L, 1L, 8L, 18L, 13L, 16L, 15L, 19L), .Label = c("ASS1",
"CLCN4", "CXorf56", "DNAL4", "ELAC2", "EMX2", "FUS", "IPP", "MMP15",
"MTMR14", "NIPSNAP3B", "NPR1", "POLR2J", "PWP2", "RGS4", "SEC23IP",
"TF", "TMEM59", "UQCRC1"), class = "factor"), V2 = c(4.037370833,
6.933306871, 3.771321309, 4.121988898, 5.555893632, 4.586876086,
6.279490572, 6.004261107, 6.613729673, 5.185145989, 5.63567329,
5.785365957, 9.018526719, 5.734111507, 9.809870554, 9.09813781,
5.643864005, 4.540559556, 9.375200415)), .Names = c("V1", "V2"
), class = "data.frame", row.names = c(NA, -19L))
其中三个有一些共同点,有些没有
我想创建一个通用名称,然后为那些没有这些名称的数据添加零值
欲望输出如下
output<- structure(list(V1 = structure(c(11L, 6L, 7L, 12L, 18L, 3L, 2L,
15L, 9L, 10L, 13L, 4L, 5L, 1L, 8L, 19L, 14L, 17L, 16L, 20L), .Label = c("ASS1",
"CLCN4", "CXorf56", "DNAL4", "ELAC2", "EMX2", "FUS", "IPP", "MMP15",
"MTMR14", "names", "NIPSNAP3B", "NPR1", "POLR2J", "PWP2", "RGS4",
"SEC23IP", "TF", "TMEM59", "UQCRC1"), class = "factor"), V2 = structure(c(19L,
1L, 1L, 2L, 3L, 7L, 5L, 13L, 12L, 14L, 6L, 8L, 11L, 15L, 10L,
18L, 16L, 9L, 4L, 17L), .Label = c("0", "3.771321309", "4.121988898",
"4.540559556", "4.586876086", "5.185145989", "5.555893632", "5.63567329",
"5.643864005", "5.734111507", "5.785365957", "6.004261107", "6.279490572",
"6.613729673", "9.018526719", "9.09813781", "9.375200415", "9.809870554",
"df1"), class = "factor"), V3 = structure(c(5L, 1L, 1L, 2L, 3L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0",
"3.771321309", "4.121988898", "5.555893632", "df2"), class = "factor"),
V4 = structure(c(20L, 2L, 15L, 1L, 3L, 7L, 5L, 13L, 12L,
14L, 6L, 8L, 11L, 16L, 10L, 19L, 17L, 9L, 4L, 18L), .Label = c("3.771321309",
"4.037370833", "4.121988898", "4.540559556", "4.586876086",
"5.185145989", "5.555893632", "5.63567329", "5.643864005",
"5.734111507", "5.785365957", "6.004261107", "6.279490572",
"6.613729673", "6.933306871", "9.018526719", "9.09813781",
"9.375200415", "9.809870554", "df3"), class = "factor")), .Names = c("V1",
"V2", "V3", "V4"), class = "data.frame", row.names = c(NA, -20L
))
答案 0 :(得分:0)
在这里,我将如何处理这个问题。希望这可以帮助。 rbindlist()
和它一样好!
我确定某人可以最大限度地提高rename_list()
,但您明白了这一点。
library(tidyr)
library(data.table)
rename_list <- function(list){
for(i in 1:length(list)){
list[[i]] <- list[[i]]%>%
magrittr::set_colnames(c("V1", paste("df", i, sep = "")))
}
return(list)
}
mget(ls(pattern = "df"))%>%
rename_list()%>%
rbindlist(idcol = TRUE)%>%
spread(.id, df1)
V1 df1 df2 df3
1: ASS1 9.018527 NA 9.018527
2: CLCN4 4.586876 NA 4.586876
3: CXorf56 5.555894 5.555894 5.555894
4: DNAL4 5.635673 NA 5.635673
5: ELAC2 5.785366 NA 5.785366
6: IPP 5.734112 NA 5.734112
7: MMP15 6.004261 NA 6.004261
8: MTMR14 6.613730 NA 6.613730
9: NIPSNAP3B 3.771321 3.771321 3.771321
10: NPR1 5.185146 NA 5.185146
11: POLR2J 9.098138 NA 9.098138
12: PWP2 6.279491 NA 6.279491
13: RGS4 4.540560 NA 4.540560
14: SEC23IP 5.643864 NA 5.643864
15: TF 4.121989 4.121989 4.121989
16: TMEM59 9.809871 NA 9.809871
17: UQCRC1 9.375200 NA 9.375200
18: EMX2 NA NA 4.037371
19: FUS NA NA 6.933307