我正在尝试使用仿射间隙成本来实现全局对齐算法。我首先用Java实现它,然后用Python实现它。但我在Java中的输出与我在python中得到的不同。我在Python中实现的代码与在Java中完全相同。
我在Python中的代码如下:
import numpy as np
def deletionMatrix(D,S,i,j):
res = 0
if i==0 and j>=0:
res = S[i-1][j] - (gapOpen + gapExt)
D[i][j] = res
if i > 1 and j >= 0:
res_S = S[i-1][j] - (gapOpen + gapExt)
res_D = D[i-1][j] - gapOpen
res = max(res_S, res_D)
D[i][j] = res
return(res)
def insertionMatrix(I, S, i, j):
res = 0
if i >= 0 and j == 1:
res = S[i][j-1] - (gapOpen + gapExt)
I[i][j] = res
if i >= 0 and j > 1:
res_S = S[i][j-1] - (gapOpen + gapExt)
res_I = I[i][j-1] - gapOpen
res = max(res_S, res_I)
I[i][j] = res
return(res)
def matrix(S,D,I,m,n,match,mismatch):
for i in range(0,len(m)):
for j in range(0,len(n)):
if i == 0 and j == 0:
S[i][j] = 0
if i == 0 and j > 0:
S[i][j] = insertionMatrix(I,S,i,j)
if i > 0 and j == 0:
S[i][j] = deletionMatrix(D,S,i,j)
if i > 0 and j > 0:
if m[i-1] == n[j-1]:
res_S = S[i-1][j-1] + match
res_D = deletionMatrix(D,S,i,j)
res_I = insertionMatrix(I,S,i,j)
S[i][j] = max(res_S,res_D,res_I)
elif m[i-1] != n[j-1]:
res_S = S[i-1][j-1] + mismatch
res_D = deletionMatrix(D,S,i,j)
res_I = insertionMatrix(I,S,i,j)
S[i][j] = max(res_S,res_D,res_I)
return(S)
gapOpen = 5
gapExt = 2
m = "GAATTCAGTTA"
n = "GGATCGA"
mLen = len(m) + 1
nLen = len(n) + 1
S = np.zeros([mLen,nLen])
D = np.zeros([mLen,nLen])
I = np.zeros([mLen,nLen])
match = 1
mismatch = -3
S = matrix(S,D,I,m,n,match,mismatch)
for i in range(0,len(m)):
for j in range(0,len(n)):
print(S[i][j],)
print()
以下是我的java代码。
public class AffineGapCost {
static int gapOpen = 5;
static int gapExt = 2;
public static int deletionMatrix(int[][] D, int[][] S ,int i, int j)
{
int res = 0;
if(i == 1 && j>=0)
{
res = S[i-1][j] - (gapOpen + gapExt);
D[i][j] = res;
}
if(i > 1 && j >= 0)
{
int res_S = S[i-1][j] - (gapOpen + gapExt);
int res_D = D[i-1][j] - gapOpen;
res = Math.max(res_S, res_D);
D[i][j] = res;
}
return res;
}
public static int insertionMatrix(int[][] I, int[][] S, int i, int j)
{
int res = 0;
if(i >= 0 && j==1)
{
res = S[i][j-1] - (gapOpen + gapExt);
I[i][j] = res;
}
if(i >= 0 && j > 1)
{
int res_S = S[i][j-1] - (gapOpen + gapExt);
int res_I = I[i][j-1] - gapOpen;
res = Math.max(res_S,res_I);
I[i][j] = res;
}
return res;
}
public static int[][] matrix(int[][] S, int[][] D, int[][] I,String m, String n,int match, int mismatch)
{
for(int i=0;i<=m.length();i++)
{
for(int j=0;j<=n.length();j++)
{
if(i == 0 && j == 0)
{
S[i][j] = 0;
}
if(i == 0 && j > 0)
{
S[i][j] = insertionMatrix(I, S, i, j);
}
if(i > 0 && j == 0)
{
S[i][j] = deletionMatrix(D,S,i,j);
}
if(i >0 && j>0)
{
int res_S = S[i-1][j-1] + (m.charAt(i-1) == n.charAt(j-1) ? match:mismatch);
int res_D = deletionMatrix(D,S,i,j);
int res_I = insertionMatrix(I,S,i,j);
S[i][j] = Math.max(Math.max(res_S,res_D),res_I);
}
}
}
return S;
}
<pre><code>
public static void main(String[] args)
{
// Initializing two strings
String m = "GAATTCAGTTA";
String n = "GGATCGA";
//String m = "ctaca";
//String n = "cttca";
//String m = "ACGGCT";
//String n = "ACGT";
int[][] S = new int[m.length() + 1][n.length() + 1];
int[][] D = new int[m.length() + 1][n.length() + 1];
int[][] I = new int[m.length() + 1][n.length() + 1];
int match = 1;
int mismatch = -3;
S = matrix(S,D,I,m,n,match,mismatch);
for(int i=0;i<=m.length();i++)
{
for(int j=0;j<=n.length();j++)
{
System.out.print(S[i][j] + "\t");
}
System.out.println();
}
traceBack(S, D, I, m, n,match,mismatch);
}
}
如果有人能告诉我哪里出错了,我真的很感激。过去两天我一直试图找到问题,但似乎无法弄清楚我做错了什么。
答案 0 :(得分:0)
这里有一些潜在的问题:
在deletionMatrix
中,Java说:
if(i == 1 && j>=0)
但是Python说:
if i==0 and j>=0:
在matrix
中,Java说:
for(int i=0;i<=m.length();i++) {
for(int j=0;j<=n.length();j++) {
但Python说:
for i in range(0,len(m)):
for j in range(0,len(n)):
每个循环的缩短一个。 range(m,n)
从m
转到n-1
,因此您可能需要:
for i in range(len(m) + 1):
for j in range(len(n) + 1):
在main
代码的嵌套打印循环中再次发生这种情况。
下面是对Python代码的返工,它产生与Java代码相同的输出(如果在Java代码中注释掉对未定义方法traceBack()
的调用):
import numpy as np
gapOpen = 5
gapExt = 2
def deletionMatrix(D, S, i, j):
res = 0
if i == 1 and j >= 0:
res = S[i - 1][j] - (gapOpen + gapExt)
D[i][j] = res
if i > 1 and j >= 0:
res_S = S[i - 1][j] - (gapOpen + gapExt)
res_D = D[i - 1][j] - gapOpen
res = max(res_S, res_D)
D[i][j] = res
return res
def insertionMatrix(I, S, i, j):
res = 0
if i >= 0 and j == 1:
res = S[i][j-1] - (gapOpen + gapExt)
I[i][j] = res
if i >= 0 and j > 1:
res_S = S[i][j-1] - (gapOpen + gapExt)
res_I = I[i][j-1] - gapOpen
res = max(res_S, res_I)
I[i][j] = res
return res
def matrix(S, D, I, m, n, match, mismatch):
for i in range(len(m) + 1):
for j in range(len(n) + 1):
if i == 0 and j == 0:
S[i][j] = 0
if i == 0 and j > 0:
S[i][j] = insertionMatrix(I, S, i, j)
if i > 0 and j == 0:
S[i][j] = deletionMatrix(D, S, i, j)
if i > 0 and j > 0:
res_S = S[i-1][j-1] + (match if m[i-1] == n[j-1] else mismatch)
res_D = deletionMatrix(D, S, i, j)
res_I = insertionMatrix(I, S, i, j)
S[i][j] = max(res_S, res_D, res_I)
return S
if __name__ == '__main__':
# Initializing two strings
m = "GAATTCAGTTA"
n = "GGATCGA"
mLen = len(m) + 1
nLen = len(n) + 1
S = np.zeros([mLen, nLen])
D = np.zeros([mLen, nLen])
I = np.zeros([mLen, nLen])
match = 1
mismatch = -3
S = matrix(S, D, I, m, n, match, mismatch)
for i in range(mLen):
for j in range(nLen):
print(S[i][j], end='\t')
print()