我需要找到一个起始密码子和与该主要起始密码子相关的所有框内终止密码子,即startCodon_AGGAAG_stopCodon(1)_GAAGGTAACAGCTCTG_stopCodon(2)_ATCAAGA。这是我的代码,它给了我所有ORF的位置。如何将其修改为起始密码子的终点位置以及与该主要起始密码子相关的所有框架终止密码子。
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
def getORF(sequence, treshold, start_codons, stop_codons):
orfs = []
for j in range(0, 3):
start_codon_index = 0
end_codon_index = 0
start_codon_found = False
for indx in range(j, len(sequence), 3):
current_codon = sequence[indx:indx+3]
if current_codon in start_codons and not start_codon_found:
start_codon_found = True
start_codon_index = indx
s=start_codon_index+1
if current_codon in stop_codons and start_codon_found:
end_codon_index = indx
e=end_codon_index+3
length = end_codon_index - start_codon_index + 3
if length >= treshold * 3:
orfs.append(s)
orfs.append(e)
start_codon_found = False
return orfs
f = open("myfile.fa","r")
o = open("out.txt","w")
start = ["ATG"]
stop = ["TAA","TAG","TGA"]
for record in SeqIO.parse(f,'fasta'):
seq=record.seq
#compl_seq =record.seq.reverse_complement()
name=record.id
orfs = getORF(seq, 30, start, stop)
#complement_orfs = getComplementORF(compl_seq, 30, start, stop)
print >> o, name, '+', orfs, '-', complement_orfs