我正在尝试基于文件名/访问权限来越过这两个data.frames,以便获得包含3列的一个:accession,d40和specie。由于_和.tab,文件名和访问权限略有不同。我试图找到一种方法来使名称同质并交叉两个表。有人可以帮我吗?谢谢!!
> head(me)
filename d40
1 Ames_13502_.tab 15714.88
2 Ames_16950_.tab 21633.59
3 Ames_1903_.tab 17167.95
4 Ames_19097_.tab 17041.48
5 Ames_19467_.tab 17198.14
6 Ames_19532_.tab 17307.25
> head(TM)
specie accesion
1 Teosinte Ames 13502
2 Teosinte Ames 21785
3 Teosinte Ames 21786
4 Teosinte Ames 21787
5 Teosinte Ames 21789
6 Teosinte Ames 21790
答案 0 :(得分:1)
尝试gsub
(me$filename <- gsub("(Ames)_(\\d+)_\\.tab", "\\1 \\2", me$filename))
#[1] "Ames 13502" "Ames 16950" "Ames 1903" "Ames 19097" "Ames 19467" "Ames 19532"
现在通过accesion
和filename
合并两个数据帧。
merge(TM, me, by.x = "accesion", by.y = "filename", all = TRUE)
# accesion specie d40
#1 Ames 13502 Teosinte 15714.88
#2 Ames 16950 <NA> 21633.59
#3 Ames 1903 <NA> 17167.95
#4 Ames 19097 <NA> 17041.48
#5 Ames 19467 <NA> 17198.14
#6 Ames 19532 <NA> 17307.25
#7 Ames 21785 Teosinte NA
#8 Ames 21786 Teosinte NA
#9 Ames 21787 Teosinte NA
#10 Ames 21789 Teosinte NA
#11 Ames 21790 Teosinte NA
数据
me <- structure(list(filename = c("Ames 13502", "Ames 16950", "Ames 1903",
"Ames 19097", "Ames 19467", "Ames 19532"), d40 = c(15714.88,
21633.59, 17167.95, 17041.48, 17198.14, 17307.25)), .Names = c("filename",
"d40"), row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")
TM <- structure(list(specie = c("Teosinte", "Teosinte", "Teosinte",
"Teosinte", "Teosinte", "Teosinte"), accesion = c("Ames 13502",
"Ames 21785", "Ames 21786", "Ames 21787", "Ames 21789", "Ames 21790"
)), .Names = c("specie", "accesion"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
答案 1 :(得分:1)
使用dplyr
:
library(dplyr)
df1 %>%
mutate(filename = trimws(gsub("_\\.tab|_", " ", filename))) %>%
full_join(df2, by = c("filename" = "accesion"))
输出:
filename d40 specie
1 Ames 13502 15714.88 Teosinte
2 Ames 16950 21633.59 <NA>
3 Ames 1903 17167.95 <NA>
4 Ames 19097 17041.48 <NA>
5 Ames 19467 17198.14 <NA>
6 Ames 19532 17307.25 <NA>
7 Ames 21785 NA Teosinte
8 Ames 21786 NA Teosinte
9 Ames 21787 NA Teosinte
10 Ames 21789 NA Teosinte
11 Ames 21790 NA Teosinte
数据:
df1 <- structure(list(filename = structure(1:6, .Label = c("Ames_13502_.tab",
"Ames_16950_.tab", "Ames_1903_.tab", "Ames_19097_.tab", "Ames_19467_.tab",
"Ames_19532_.tab"), class = "factor"), d40 = c(15714.88, 21633.59,
17167.95, 17041.48, 17198.14, 17307.25)), .Names = c("filename",
"d40"), class = "data.frame", row.names = c("1", "2", "3", "4",
"5", "6"))
df2 <- structure(list(specie = c("Teosinte", "Teosinte", "Teosinte",
"Teosinte", "Teosinte", "Teosinte"), accesion = c("Ames 13502",
"Ames 21785", "Ames 21786", "Ames 21787", "Ames 21789", "Ames 21790"
)), .Names = c("specie", "accesion"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))