如何将矩阵转换为数值矩阵?

时间:2015-05-10 19:27:41

标签: r matrix

我在文本文件中创建了一个评分矩阵,用于使用pairwiseAlignment函数进行局部对齐。然后我用这个函数将它输入到R:

ex <- as.matrix(read.table("~/scoringMatrix", header=FALSE, sep = "\t", row.names = 1, as.is=TRUE)

格式是这样的:

> ex

   A  T  C  G
A  5 -2 -1 -2
T -2  7 -1 -2
C -1 -1  7  2
G -2 -2  2  8

现在每当我使用pairwiseAlignment函数时,我都会收到以下错误:

pairwiseAlignment(x[[1]], x[[2]], substitutionMatrix = ex, gapOpening = -2, gapExtension = -8, scoreOnly = FALSE)
    Error in XStringSet.pairwiseAlignment(pattern = pattern, subject = subject,  : 
      'substitutionMatrix' must be a numeric matrix

如果我使用已经存在的替换矩阵,如BLOSUM50,它可以很好地工作。那么如何使这个矩阵适合于pairwiseAlignment?

> dput(ex)
structure(logical(0), .Dim = c(5L, 0L), .Dimnames = list(c("   A  T  C  G", 
"A  5 -2 -1 -2", "T -2  7 -1 -2", "C -1 -1  7  2", "G -2 -2  2  8"
), NULL))

虽然dput(BLOSUM50)看起来完全不同:

> dput(BLOSUM50)
structure(c(5L, -2L, -1L, -2L, -1L, -1L, -1L, 0L, -2L, -1L, -2L, 
-1L, -1L, -3L, -1L, 1L, 0L, -3L, -2L, 0L, -2L, -1L, -1L, -5L, 
-2L, 7L, -1L, -2L, -4L, 1L, 0L, -3L, 0L, -4L, -3L, 3L, -2L, -3L, 
-3L, -1L, -1L, -3L, -1L, -3L, -1L, 0L, -1L, -5L, -1L, -1L, 7L, 
2L, -2L, 0L, 0L, 0L, 1L, -3L, -4L, 0L, -2L, -4L, -2L, 1L, 0L, 
-4L, -2L, -3L, 4L, 0L, -1L, -5L, -2L, -2L, 2L, 8L, -4L, 0L, 2L, 
-1L, -1L, -4L, -4L, -1L, -4L, -5L, -1L, 0L, -1L, -5L, -3L, -4L, 
5L, 1L, -1L, -5L, -1L, -4L, -2L, -4L, 13L, -3L, -3L, -3L, -3L, 
-2L, -2L, -3L, -2L, -2L, -4L, -1L, -1L, -5L, -3L, -1L, -3L, -3L, 
-2L, -5L, -1L, 1L, 0L, 0L, -3L, 7L, 2L, -2L, 1L, -3L, -2L, 2L, 
0L, -4L, -1L, 0L, -1L, -1L, -1L, -3L, 0L, 4L, -1L, -5L, -1L, 
0L, 0L, 2L, -3L, 2L, 6L, -3L, 0L, -4L, -3L, 1L, -2L, -3L, -1L, 
-1L, -1L, -3L, -2L, -3L, 1L, 5L, -1L, -5L, 0L, -3L, 0L, -1L, 
-3L, -2L, -3L, 8L, -2L, -4L, -4L, -2L, -3L, -4L, -2L, 0L, -2L, 
-3L, -3L, -4L, -1L, -2L, -2L, -5L, -2L, 0L, 1L, -1L, -3L, 1L, 
0L, -2L, 10L, -4L, -3L, 0L, -1L, -1L, -2L, -1L, -2L, -3L, 2L, 
-4L, 0L, 0L, -1L, -5L, -1L, -4L, -3L, -4L, -2L, -3L, -4L, -4L, 
-4L, 5L, 2L, -3L, 2L, 0L, -3L, -3L, -1L, -3L, -1L, 4L, -4L, -3L, 
-1L, -5L, -2L, -3L, -4L, -4L, -2L, -2L, -3L, -4L, -3L, 2L, 5L, 
-3L, 3L, 1L, -4L, -3L, -1L, -2L, -1L, 1L, -4L, -3L, -1L, -5L, 
-1L, 3L, 0L, -1L, -3L, 2L, 1L, -2L, 0L, -3L, -3L, 6L, -2L, -4L, 
-1L, 0L, -1L, -3L, -2L, -3L, 0L, 1L, -1L, -5L, -1L, -2L, -2L, 
-4L, -2L, 0L, -2L, -3L, -1L, 2L, 3L, -2L, 7L, 0L, -3L, -2L, -1L, 
-1L, 0L, 1L, -3L, -1L, -1L, -5L, -3L, -3L, -4L, -5L, -2L, -4L, 
-3L, -4L, -1L, 0L, 1L, -4L, 0L, 8L, -4L, -3L, -2L, 1L, 4L, -1L, 
-4L, -4L, -2L, -5L, -1L, -3L, -2L, -1L, -4L, -1L, -1L, -2L, -2L, 
-3L, -4L, -1L, -3L, -4L, 10L, -1L, -1L, -4L, -3L, -3L, -2L, -1L, 
-2L, -5L, 1L, -1L, 1L, 0L, -1L, 0L, -1L, 0L, -1L, -3L, -3L, 0L, 
-2L, -3L, -1L, 5L, 2L, -4L, -2L, -2L, 0L, 0L, -1L, -5L, 0L, -1L, 
0L, -1L, -1L, -1L, -1L, -2L, -2L, -1L, -1L, -1L, -1L, -2L, -1L, 
2L, 5L, -3L, -2L, 0L, 0L, -1L, 0L, -5L, -3L, -3L, -4L, -5L, -5L, 
-1L, -3L, -3L, -3L, -3L, -2L, -3L, -1L, 1L, -4L, -4L, -3L, 15L, 
2L, -3L, -5L, -2L, -3L, -5L, -2L, -1L, -2L, -3L, -3L, -1L, -2L, 
-3L, 2L, -1L, -1L, -2L, 0L, 4L, -3L, -2L, -2L, 2L, 8L, -1L, -3L, 
-2L, -1L, -5L, 0L, -3L, -3L, -4L, -1L, -3L, -3L, -4L, -4L, 4L, 
1L, -3L, 1L, -1L, -3L, -2L, 0L, -3L, -1L, 5L, -4L, -3L, -1L, 
-5L, -2L, -1L, 4L, 5L, -3L, 0L, 1L, -1L, 0L, -4L, -4L, 0L, -3L, 
-4L, -2L, 0L, 0L, -5L, -3L, -4L, 5L, 2L, -1L, -5L, -1L, 0L, 0L, 
1L, -3L, 4L, 5L, -2L, 0L, -3L, -3L, 1L, -1L, -4L, -1L, 0L, -1L, 
-2L, -2L, -3L, 2L, 5L, -1L, -5L, -1L, -1L, -1L, -1L, -2L, -1L, 
-1L, -2L, -1L, -1L, -1L, -1L, -1L, -2L, -2L, -1L, 0L, -3L, -1L, 
-1L, -1L, -1L, -1L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, 
-5L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, -5L, 
-5L, -5L, 1L), .Dim = c(24L, 24L), .Dimnames = list(c("A", "R", 
"N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", 
"S", "T", "W", "Y", "V", "B", "Z", "X", "*"), c("A", "R", "N", 
"D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", 
"T", "W", "Y", "V", "B", "Z", "X", "*")))

2 个答案:

答案 0 :(得分:2)

看起来你的'scoringMatrix'文件有空格分隔列,而且它的输入只是

ex = as.matrix(read.delim("scoringMatrix", sep=""))

具有结构

> dput(ex)
structure(c(5L, -2L, -1L, -2L, -2L, 7L, -1L, -2L, -1L, -1L, 7L, 
2L, -2L, -2L, 2L, 8L), .Dim = c(4L, 4L), .Dimnames = list(c("A", 
"T", "C", "G"), c("A", "T", "C", "G")))

在您的输入中,没有制表符\t,因此每行都作为单个列读入。 row.names=1表示单列被指定为行名 - 因此您有5行零列

> read.table("scoringMatrix", sep="\t", header=FALSE, row.names=1)
data frame with 0 columns and 5 rows

将其强制转换为矩阵会产生5 x 0矩阵,您在原始显示中看到的是矩阵的行名(!)。

这可以在R'手工制作'中创建,正如@DavidArenburg与

所建议的那样
matrix(c( 5, -2, -1, -2,
         -2,  7, -1, -2,
         -1, -1,  7,  2,
         -2, -2,  2,  8),
       nrow=4, ncol=4,
       dimnames=list(
         c("A", "C", "G", "T"),
         c("A", "C", "G", "T")),
       byrow=TRUE)

答案 1 :(得分:2)

另一种选择是只使用BLOSUM50match中选择所需的列/行,并避免首先使用文本编辑器手动创建此文件

indx <- match(c("A", "T", "C", "G"), rownames(BLOSUM50))
BLOSUM50[indx, indx]
#    A  T  C  G
# A  5  0 -1  0
# T  0  5 -1 -2
# C -1 -1 13 -3
# G  0 -2 -3  8