我有两个源自同一数据集的矩阵,但每个矩阵都有不同的数据量。
我想创建一个数据集,它在列名和行名方面是x
的复制,但包含y
中的数据值。如果数据不可用,则NA
将用作该坐标的值。
并非x
中的所有行名都出现在y
中,反之亦然。列名也是如此。
对于我在下面给出的示例输入数据,x
中与y
中的rowname相对应的rownames是rowname的开始和结束|
(我希望保留后的everthing)其他映射的|
。
最有效的方法是什么?
期望的输出
z = structure(c(NA, 1, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA,
NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), .Dim = c(11L, 5L), .Dimnames = list(
c("AACSL|729522", "AACS|65985", "AADACL2|344752", "AADACL3|126767",
"AADACL4|343066", "AADAC|13", "AADAT|51166", "AAGAB|79719",
"AAK1|22848", "AAK12|14", "AANAT|15"), c("S18", "S20", "S45",
"S95", "S100")))
示例输入
x = structure(c(0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0), .Dim = c(11L,
5L), .Dimnames = list(c("AACSL|729522", "AACS|65985", "AADACL2|344752",
"AADACL3|126767", "AADACL4|343066", "AADAC|13", "AADAT|51166",
"AAGAB|79719", "AAK1|22848", "AAK12|14", "AANAT|15"), c("S18",
"S20", "S45", "S95", "S100")))
y = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0), .Dim = c(11L, 4L), .Dimnames = list(c("A1BG",
"A1CF", "A2ML1", "A4GALT", "AACS", "AAK1", "AARD", "AARS2", "AASDHPPT",
"AASS", "BAACS"), c("S18", "S10", "S45", "S95")))
答案 0 :(得分:1)
我认为您提供的示例可能存在轻微问题,我无法看到z如何来自上面的x和y ..请参阅此代码:
intersect(sapply(rownames(x), #I am just extracting the letter codes here
function(i){
return(
strsplit(x=i,split="|",fixed=TRUE)[[1]][[1]])
}),rownames(y))
#[1] "AACS" "AAK1"
很奇怪,对吗?我的意思是,与x相比,y中只有2个代码。但是,我认为下面的代码符合您的计划(除了这种不一致):
library(data.table)
library(reshape2)
library(dplyr)
x %>% as.data.frame %>% mutate(rownames=rownames(x)) %>%
mutate(nms=sapply(rownames(x),
function(i){
return(
strsplit(x=i,split="|",fixed=TRUE)[[1]][[1]])
})) %>%
melt(id.vars=c("nms","rownames")) %>%
merge(., y %>% as.data.frame %>% mutate(nms=rownames(y))%>% melt(id.vars="nms"), by=c("variable","nms"), all.x=TRUE) %>%
select(-nms, -value.x) %>% dcast(formula = rownames~variable, value.var="value.y") -> xy
#now put back the column names where they belong
rownames(xy)<-xy$rownames
#now the only thing left is to arrange the columns
xy[rownames(x),colnames(x)] -> xy
或者我在理解你的一些观点时错了吗?