此代码用于扫描包含不明确的dna字符串的fasta文件,并计算有多少可能的序列。由于某种原因,它似乎只在1个核苷酸后停止,但我不知道为什么
def count_ambiguous_code(file):
'Bepaalt het aantal mogelijke sequenties afgaande uit de locatie van de ingevoerde file'
from Bio import SeqIO
from Bio.Alphabet.IUPAC import ambiguous_dna
seq_records = SeqIO.parse (file, 'fasta', alphabet = ambiguous_dna)
for record in seq_records:
record.lower()
a, t, g, c, u, r, y, s, w, k, m, b, d, h, v, n, = [0]*16
for nucl in record:
if nucl == "a":
a += 1
elif nucl == "t":
t += 1
elif nucl == "g":
g += 1
elif nucl == "c":
c += 1
elif nucl == "u":
u += 1
elif nucl == "r":
r += 1
elif nucl == "y":
y += 1
elif nucl == "s":
s += 1
elif nucl == "w":
w += 1
elif nucl == "k":
k += 1
elif nucl == "m":
m += 1
elif nucl == "b":
b += 1
elif nucl == "d":
d += 1
elif nucl == "h":
h += 1
elif nucl == "v":
v += 1
elif nucl == "n":
n += 1
possibilities = ((1**(a+t+g+c+u))*(2**(r+y+s+w+k+m))*(3**(b+d+h+v))*(4**n))
print(record.id)
print (possibilities)