我拥有的矩阵:
> dput(head(data1))
structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0, 0), .Dim = c(6L, 18L), .Dimnames = list(c("AT1G01050",
"AT1G01080", "AT1G01090", "AT1G01320", "AT1G01470", "AT1G01800"
), c("10", "33.95", "58.66", "84.42", "110.21", "134.16", "164.69",
"199.1", "234.35", "257.19", "361.84", "432.74", "506.34", "581.46",
"651.71", "732.59", "817.56", "896.24")))
为了便于解释,我将在下表中显示:
10 33.95 58.66 84.42 110.21 134.16 164.69 199.1 234.35 257.19 361.84 432.74 506.34 581.46 651.71 732.59 817.56 896.24
AT1G01050 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
AT1G01080 0 0 0 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1
AT1G01090 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0
AT1G01320 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1
AT1G01470 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
AT1G01800 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
如您所见,我的表格中充满了0
和1
。在许多情况下,每行中有多个1
。所以我想做的是复制/重复行只是为了在每一行中留下一个1
。因此,对于示例,请采取第一行:
10 33.95 58.66 84.42 110.21 134.16 164.69 199.1 234.35 257.19 361.84 432.74 506.34 581.46 651.71 732.59 817.56 896.24
AT1G01050 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
此行中有两个数字1
。这意味着我必须为这个“基因”再创建一行。所以输出应该是这样的。我先从数据1
(从左侧开始)离开数据,但删除第二个。为了稍后区分,我们还必须更改行的名称:
10 33.95 58.66 84.42 110.21 134.16 164.69 199.1 234.35 257.19 361.84 432.74 506.34 581.46 651.71 732.59 817.56 896.24
AT1G01050_1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
现在我删除第一个1
并离开第二个。
10 33.95 58.66 84.42 110.21 134.16 164.69 199.1 234.35 257.19 361.84 432.74 506.34 581.46 651.71 732.59 817.56 896.24
AT1G01050_2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
我认为做起来并不容易,但我相信你们中的一个人可能知道如何解决这个问题。
答案 0 :(得分:4)
试试这个:
divideLine = function(u)
{
index = which(u %in% 1)
len = length(u)
if(sum(u)<=1) return(t(u))
t(mapply(function(x,y){vec=rep(0, len);vec[y]=1;vec}, 1:length(index), index))
}
lst = apply(mat,1,divideLine)
x = lapply(lst, nrow)
res = do.call(rbind, lst)
rownames(res)=unlist(Map(function(u,v) paste(v, as.character((1:u)-1), sep='_'),x, names(x)))
#> res
# 10 33.95 58.66 84.42 110.21 134.16 164.69 199.1 234.35 257.19 361.84 432.74 506.34 581.46 651.71 732.59 817.56 896.24
#AT1G01050_0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
#AT1G01050_1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
#AT1G01080_0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
#AT1G01080_1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
#AT1G01080_2 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
#AT1G01080_3 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
#AT1G01080_4 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
#AT1G01080_5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
#AT1G01080_6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
#AT1G01090_0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
#AT1G01090_1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
#AT1G01090_2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
#AT1G01320_0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
#AT1G01320_1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
#AT1G01320_2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
#AT1G01320_3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
#AT1G01470_0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#AT1G01800_0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#AT1G01800_1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
答案 1 :(得分:3)
可能有帮助:
indx <- rep(1:nrow(data1), rowSums(data1!=0))
res <- do.call(rbind, lapply(split(indx, indx), function(i) {
x1 <- data1[i,,drop=FALSE]
indx2 <- colSums(x1!=0)==nrow(x1)
x2 <- x1[,indx2]
if(is.matrix(x2)) {
x2[lower.tri(x2)|upper.tri(x2)] <- 0
}
x1[,indx2] <- x2
x1 }))
row.names(res) <- make.names(row.names(res),unique=TRUE)
结果:
res
# 10 33.95 58.66 84.42 110.21 134.16 164.69 199.1 234.35 257.19
#AT1G01050 0 0 0 0 0 0 0 0 1 0
#AT1G01050.1 0 0 0 0 0 0 0 0 0 0
#AT1G01080 0 0 0 0 1 0 0 0 0 0
#AT1G01080.1 0 0 0 0 0 0 1 0 0 0
#AT1G01080.2 0 0 0 0 0 0 0 0 1 0
#AT1G01080.3 0 0 0 0 0 0 0 0 0 0
#AT1G01080.4 0 0 0 0 0 0 0 0 0 0
#AT1G01080.5 0 0 0 0 0 0 0 0 0 0
#AT1G01080.6 0 0 0 0 0 0 0 0 0 0
#AT1G01090 0 0 0 0 0 0 0 0 0 0
#AT1G01090.1 0 0 0 0 0 0 0 0 0 0
#AT1G01090.2 0 0 0 0 0 0 0 0 0 0
#AT1G01320 0 0 0 0 0 0 0 0 0 0
#AT1G01320.1 0 0 0 0 0 0 0 0 0 0
#AT1G01320.2 0 0 0 0 0 0 0 0 0 0
#AT1G01320.3 0 0 0 0 0 0 0 0 0 0
#AT1G01470 0 0 0 1 0 0 0 0 0 0
#AT1G01800 0 0 0 1 0 0 0 0 0 0
#AT1G01800.1 0 0 0 0 0 0 0 0 0 1
# 361.84 432.74 506.34 581.46 651.71 732.59 817.56 896.24
#AT1G01050 0 0 0 0 0 0 0 0
#AT1G01050.1 0 0 0 1 0 0 0 0
#AT1G01080 0 0 0 0 0 0 0 0
#AT1G01080.1 0 0 0 0 0 0 0 0
#AT1G01080.2 0 0 0 0 0 0 0 0
#AT1G01080.3 1 0 0 0 0 0 0 0
#AT1G01080.4 0 0 0 1 0 0 0 0
#AT1G01080.5 0 0 0 0 0 1 0 0
#AT1G01080.6 0 0 0 0 0 0 0 1
#AT1G01090 1 0 0 0 0 0 0 0
#AT1G01090.1 0 0 0 1 0 0 0 0
#AT1G01090.2 0 0 0 0 0 1 0 0
#AT1G01320 0 1 0 0 0 0 0 0
#AT1G01320.1 0 0 0 1 0 0 0 0
#AT1G01320.2 0 0 0 0 0 1 0 0
#AT1G01320.3 0 0 0 0 0 0 0 1
#AT1G01470 0 0 0 0 0 0 0 0
#AT1G01800 0 0 0 0 0 0 0 0
#AT1G01800.1 0 0 0 0 0 0 0 0