我的理解是,虽然我可以找到动态编程的基本上每个讨论都有一个存储指针,因为填充了矩阵,而不是在回溯步骤中重新计算先前的单元格。
据我所知,我有我的动态编程算法来正确构建矩阵,但我对如何进行回溯计算感到困惑。我也被告知有必要重新计算这些值(而不仅仅是查找它们),但我不知道它会如何产生不同的数字。
我正在实施的SW版本包括两个序列中的间隙打开选项,因此每个矩阵的递归关系有三个选项。下面是我的全局对齐类的当前版本。根据我的手计算,我相信score_align正确生成矩阵,但显然traceback_col_seq不起作用。
INF = 2147483647 #max size of int32
class global_aligner():
def __init__(self, subst, open=10, extend=2, double=3):
self.extend, self.open, self.double, self.subst = extend, open, double, subst
def __call__(self, row_seq, col_seq):
#add alphabet error checking?
score_align(row_seq, col_seq)
return traceback_col_seq()
def init_array(self):
self.M = zeros((self.maxI, self.maxJ), int)
self.Ic = zeros((self.maxI, self.maxJ), int)
self.Ir = zeros((self.maxI, self.maxJ), int)
for i in xrange(self.maxI):
self.M[i][0], self.Ir[i][0], self.Ic[i][0] = \
-INF, -INF, -(self.open+self.extend*i)
for j in xrange(self.maxJ):
self.M[0][j], self.Ic[0][j], self.Ir[0][j] = \
-INF, -INF, -(self.open+self.extend*j)
self.M[0][0] = 0
self.Ic[0][0] = -self.open
def score_cell(self, i, j, chars):
thisM = [self.Ic[i-1][j-1]+self.subst[chars], self.M[i-1][j-1]+\
self.subst[chars], self.Ir[i-1][j-1]+self.subst[chars]]
thisC = [self.Ic[i][j-1]-self.extend, self.M[i][j-1]-self.open, \
self.Ir[i][j-1]-self.double]
thisR = [self.M[i-1][j]-self.open, self.Ir[i-1][j]-self.extend, \
self.Ic[i-1][j]-self.double]
return max(thisM), max(thisC), max(thisR)
def score_align(self, row_seq, col_seq):
self.row_seq, self.col_seq = list(row_seq), list(col_seq)
self.maxI, self.maxJ = len(self.row_seq)+1, len(self.col_seq)+1
self.init_array()
for i in xrange(1, self.maxI):
row_char = self.row_seq[i-1]
for j in xrange(1, self.maxJ):
chars = row_char+self.col_seq[j-1]
self.M[i][j], self.Ic[i][j], self.Ir[i][j] = \
self.score_cell(i, j, chars)
def traceback_col_seq(self):
self.traceback = list()
i, j = self.maxI-1, self.maxJ-1
while i > 1 and j > 1:
cell = [self.M[i][j], self.Ic[i][j], self.Ir[i][j]]
cellMax = max(cell)
chars = self.row_seq[i-1]+self.col_seq[j-1]
if cell.index(cellMax) == 0: #M
diag = [diagM, diagC, diagR] = self.score_cell(i-1, j-1, chars)
diagMax = max(diag)
if diag.index(diagMax) == 0: #match
self.traceback.append(self.col_seq[j-1])
elif diag.index(diagMax) == 1: #insert column (open)
self.traceback.append('-')
elif diag.index(diagMax) == 2: #insert row (open other)
self.traceback.append(self.col_seq[j-1].lower())
i, j = i-1, j-1
elif cell.index(cellMax) == 1: #Ic
up = [upM, upC, upR] = self.score_cell(i-1, j, chars)
upMax = max(up)
if up.index(upMax) == 0: #match (close)
self.traceback.append(self.col_seq[j-1])
elif up.index(upMax) == 1: #insert column (extend)
self.traceback.append('-')
elif up.index(upMax) == 2: #insert row (double)
self.traceback.append('-')
i -= 1
elif cell.index(cellMax) == 2: #Ir
left = [leftM, leftC, leftR] = self.score_cell(i, j-1, chars)
leftMax = max(left)
if left.index(leftMax) == 0: #match (close)
self.traceback.append(self.col_seq[j-1])
elif left.index(leftMax) == 1: #insert column (double)
self.traceback.append('-')
elif left.index(leftMax) == 2: #insert row (extend other)
self.traceback.append(self.col_seq[j-1].lower())
j -= 1
for j in xrange(0,j,-1):
self.traceback.append(self.col_seq[j-1])
for i in xrange(0,i, -1):
self.traceback.append('-')
return ''.join(self.traceback[::-1])
test = global_aligner(blosumMatrix)
test.score_align('AA','AAA')
test.traceback_col_seq()
答案 0 :(得分:1)
我认为主要的问题是,在生成可能来自的单元格时,您并未考虑当前所使用的矩阵。 cell = [self.M[i][j], self.Ic[i][j], self.Ir[i][j]]
第一次通过while循环是正确的,但之后你不能只选择得分最高的矩阵。您的选择受到您来自哪里的限制。我在跟踪你的代码时遇到了一些麻烦,但我认为你在while循环的if语句中考虑到了这一点。如果是这种情况,那么我认为按照这些方面的改变就足够了:
cell = [self.M[i][j], self.Ic[i][j], self.Ir[i][j]]
cellIndex = cell.index(max(cell))
while i > 1 and j > 1:
chars = self.row_seq[i-1]+self.col_seq[j-1]
if cellIndex == 0: #M
diag = [diagM, diagC, diagR] = self.score_cell(i-1, j-1, chars)
diagMax = max(diag)
...
cellIndex = diagMax
i, j = i-1, j-1
elif cell.index(cellMax) == 1: #Ic
up = [upM, upC, upR] = self.score_cell(i-1, j, chars)
upMax = max(up)
...
cellIndex = upMax
i -= 1
elif cell.index(cellMax) == 2: #Ir
left = [leftM, leftC, leftR] = self.score_cell(i, j-1, chars)
leftMax = max(left)
...
cellIndex = leftMax
j -= 1
就像我说的那样,我并不认为我正确地遵循你的代码,但看看是否有帮助。