我正试图从这里解决问题http://rosalind.info/problems/cons/
我的脚本填写计数器列表并输出相等长度的共识字符串。我不认为有数学或索引错误正在发生并且已经碰壁。我的代码:
with open('C:/users/steph/downloads/rosalind_cons (3).txt') as f:
seqs = f.read().splitlines()
#remove all objects that are not sequences of interest
for s in seqs:
if s[0] == '>':
seqs.remove(s)
n = range(len(seqs[0])+1)
#lists to store counts for each nucleotide
A, C, G, T = [0 for i in n], [0 for i in n], [0 for i in n], [0 for i in n]
#see what nucleotide is at each index and augment the
#same index of the respective list
def counter(Q):
for q in Q:
for k in range(len(q)):
if q[k] == 'A':
A[k] += 1
elif q[k] == 'C':
C[k] += 1
elif q[k] == 'G':
G[k] += 1
elif q[k] == 'T':
T[k] += 1
counter(seqs)
#find the max of all the counter lists at every index
#and add the respective nucleotide to the consensus sequence
def consensus(a,t,c,g):
consensus = ''
for k in range(len(a)):
if (a[k] > t[k]) and (a[k]>c[k]) and (a[k]>g[k]):
consensus = consensus+"A"
elif (t[k] > a[k]) and (t[k]>c[k]) and (t[k]>g[k]):
consensus = consensus+ 'T'
elif (c[k] > t[k]) and (c[k]>a[k]) and (c[k]>g[k]):
consensus = consensus+ 'C'
elif (g[k] > t[k]) and (g[k]>c[k]) and (g[k]>a[k]):
consensus = consensus+ 'G'
#ensure a nucleotide is added to consensus sequence
#when more than one index has the max value
else:
if max(a[k],c[k],t[k],g[k]) in a:
consensus = consensus + 'A'
elif max(a[k],c[k],t[k],g[k]) in c:
consensus = consensus + 'C'
elif max(a[k],c[k],t[k],g[k]) in t:
consensus = consensus + 'T'
elif max(a[k],c[k],t[k],g[k]) in g:
consensus = consensus + 'G'
print(consensus)
#debugging, ignore this --> print('len(consensus)',len(consensus))
consensus(A,T,C,G)
#debugging, ignore this --> print('len(A)',len(A))
print('A: ',*A, sep=' ')
print('C: ',*C, sep=' ')
print('G: ',*G, sep=' ')
print('T: ',*T, sep=' ')
感谢您的时间
答案 0 :(得分:0)
以下行中有错误:
n =范围(len(seqs [0])+ 1)
导致序列太长(填充额外的A
和4次0
)。删除+1
,它应该有效。
:
之后的空格。尝试将这些行与下面的剪辑合并:
new_seqs = list()
for s in seqs:
if s.startswith('>'):
new_seqs.append('')
else:
new_seqs[-1]+=s
seqs = new_seqs
再试一次。