仿射间隙序列对齐

时间:2017-02-12 12:20:59

标签: java python

我正在尝试使用仿射间隙成本来实现全局对齐算法。我首先用Java实现它,然后用Python实现它。但我在Java中的输出与我在python中得到的不同。我在Python中实现的代码与在Java中完全相同。

我在Python中的代码如下:

import numpy as np

def deletionMatrix(D,S,i,j):
    res = 0
    if i==0 and j>=0:
        res = S[i-1][j] - (gapOpen + gapExt)
        D[i][j] = res

    if i > 1 and j >= 0:
        res_S = S[i-1][j] - (gapOpen + gapExt)
        res_D = D[i-1][j] - gapOpen

        res = max(res_S, res_D)
        D[i][j] = res

    return(res)

def insertionMatrix(I, S, i, j):
    res = 0

    if i >= 0 and j == 1:
        res = S[i][j-1] - (gapOpen + gapExt)
        I[i][j] = res

    if i >= 0 and j > 1:
        res_S = S[i][j-1] - (gapOpen + gapExt)
        res_I = I[i][j-1] - gapOpen

        res = max(res_S, res_I)
        I[i][j] = res

    return(res)

def matrix(S,D,I,m,n,match,mismatch):
    for i in range(0,len(m)):
        for j in range(0,len(n)):
             if i == 0 and j == 0:
                 S[i][j] = 0

             if i == 0 and j > 0:
                 S[i][j] = insertionMatrix(I,S,i,j)

             if i > 0 and j == 0:
                 S[i][j] = deletionMatrix(D,S,i,j)

             if i > 0 and j > 0:
                 if m[i-1] == n[j-1]:
                    res_S = S[i-1][j-1] + match
                    res_D = deletionMatrix(D,S,i,j)
                    res_I = insertionMatrix(I,S,i,j)

                    S[i][j] = max(res_S,res_D,res_I)

                 elif m[i-1] != n[j-1]:
                    res_S = S[i-1][j-1] + mismatch
                    res_D = deletionMatrix(D,S,i,j)
                    res_I = insertionMatrix(I,S,i,j)

                    S[i][j] = max(res_S,res_D,res_I)

     return(S)


gapOpen = 5
gapExt = 2

m = "GAATTCAGTTA"
n = "GGATCGA"

mLen = len(m) + 1
nLen = len(n) + 1

S = np.zeros([mLen,nLen])
D = np.zeros([mLen,nLen])
I = np.zeros([mLen,nLen])

match = 1
mismatch = -3

S = matrix(S,D,I,m,n,match,mismatch)

for i in range(0,len(m)):
   for j in range(0,len(n)):
       print(S[i][j],)

   print()

我附上了我想要实现的算法的图像。有人能告诉我哪里出错了。 This is the algorithm that I'm trying to implement. The alpha is the gapOpen variable in my code and beta is the gapExt variable. And A[i],B[j] simply is match or mismatch variable. That is the two characters being read at a given time (from String m and n), if they are the same then its a match otherwise it is a mismatch

以下是我的java代码。

public class AffineGapCost { static int gapOpen = 5; static int gapExt = 2; public static int deletionMatrix(int[][] D, int[][] S ,int i, int j) { int res = 0; if(i == 1 && j>=0) { res = S[i-1][j] - (gapOpen + gapExt); D[i][j] = res; } if(i > 1 && j >= 0) { int res_S = S[i-1][j] - (gapOpen + gapExt); int res_D = D[i-1][j] - gapOpen; res = Math.max(res_S, res_D); D[i][j] = res; } return res; } public static int insertionMatrix(int[][] I, int[][] S, int i, int j) { int res = 0; if(i >= 0 && j==1) { res = S[i][j-1] - (gapOpen + gapExt); I[i][j] = res; } if(i >= 0 && j > 1) { int res_S = S[i][j-1] - (gapOpen + gapExt); int res_I = I[i][j-1] - gapOpen; res = Math.max(res_S,res_I); I[i][j] = res; } return res; } public static int[][] matrix(int[][] S, int[][] D, int[][] I,String m, String n,int match, int mismatch) { for(int i=0;i<=m.length();i++) { for(int j=0;j<=n.length();j++) { if(i == 0 && j == 0) { S[i][j] = 0; } if(i == 0 && j > 0) { S[i][j] = insertionMatrix(I, S, i, j); } if(i > 0 && j == 0) { S[i][j] = deletionMatrix(D,S,i,j); } if(i >0 && j>0) { int res_S = S[i-1][j-1] + (m.charAt(i-1) == n.charAt(j-1) ? match:mismatch); int res_D = deletionMatrix(D,S,i,j); int res_I = insertionMatrix(I,S,i,j); S[i][j] = Math.max(Math.max(res_S,res_D),res_I); } } } return S; } <pre><code> public static void main(String[] args) { // Initializing two strings String m = "GAATTCAGTTA"; String n = "GGATCGA"; //String m = "ctaca"; //String n = "cttca"; //String m = "ACGGCT"; //String n = "ACGT"; int[][] S = new int[m.length() + 1][n.length() + 1]; int[][] D = new int[m.length() + 1][n.length() + 1]; int[][] I = new int[m.length() + 1][n.length() + 1]; int match = 1; int mismatch = -3; S = matrix(S,D,I,m,n,match,mismatch); for(int i=0;i<=m.length();i++) { for(int j=0;j<=n.length();j++) { System.out.print(S[i][j] + "\t"); } System.out.println(); } traceBack(S, D, I, m, n,match,mismatch); } }

如果有人能告诉我哪里出错了,我真的很感激。过去两天我一直试图找到问题,但似乎无法弄清楚我做错了什么。

1 个答案:

答案 0 :(得分:0)

这里有一些潜在的问题:

deletionMatrix中,Java说:

if(i == 1 && j>=0)

但是Python说:

if i==0 and j>=0:

matrix中,Java说:

for(int i=0;i<=m.length();i++) {
    for(int j=0;j<=n.length();j++) {

但Python说:

for i in range(0,len(m)):
    for j in range(0,len(n)):

每个循环的缩短一个。 range(m,n)m转到n-1,因此您可能需要:

for i in range(len(m) + 1):
    for j in range(len(n) + 1):

main代码的嵌套打印循环中再次发生这种情况。

下面是对Python代码的返工,它产生与Java代码相同的输出(如果在Java代码中注释掉对未定义方法traceBack()的调用):

import numpy as np

gapOpen = 5
gapExt = 2

def deletionMatrix(D, S, i, j):
    res = 0
    if i == 1 and j >= 0:
        res = S[i - 1][j] - (gapOpen + gapExt)
        D[i][j] = res

    if i > 1 and j >= 0:
        res_S = S[i - 1][j] - (gapOpen + gapExt)
        res_D = D[i - 1][j] - gapOpen

        res = max(res_S, res_D)
        D[i][j] = res

    return res

def insertionMatrix(I, S, i, j):
    res = 0

    if i >= 0 and j == 1:
        res = S[i][j-1] - (gapOpen + gapExt)
        I[i][j] = res

    if i >= 0 and j > 1:
        res_S = S[i][j-1] - (gapOpen + gapExt)
        res_I = I[i][j-1] - gapOpen

        res = max(res_S, res_I)
        I[i][j] = res

    return res

def matrix(S, D, I, m, n, match, mismatch):

    for i in range(len(m) + 1):

        for j in range(len(n) + 1):

            if i == 0 and j == 0:
                S[i][j] = 0

            if i == 0 and j > 0:
                S[i][j] = insertionMatrix(I, S, i, j)

            if i > 0 and j == 0:
                S[i][j] = deletionMatrix(D, S, i, j)

            if i > 0 and j > 0:
                res_S = S[i-1][j-1] + (match if m[i-1] == n[j-1] else mismatch)
                res_D = deletionMatrix(D, S, i, j)
                res_I = insertionMatrix(I, S, i, j)

                S[i][j] = max(res_S, res_D, res_I)

    return S

if __name__ == '__main__':

    # Initializing two strings
    m = "GAATTCAGTTA"
    n = "GGATCGA"

    mLen = len(m) + 1
    nLen = len(n) + 1

    S = np.zeros([mLen, nLen])
    D = np.zeros([mLen, nLen])
    I = np.zeros([mLen, nLen])

    match = 1
    mismatch = -3

    S = matrix(S, D, I, m, n, match, mismatch)

    for i in range(mLen):
        for j in range(nLen):
            print(S[i][j], end='\t')
        print()