Question

def __init__(self,emps=str(""),l=[">"]):
    self.str=emps
    self.bl=l


def fromFile(self,seqfile):
    opf=open(seqfile,'r')                                       
    s=opf.read()                                              
    opf.close()                                                    
    lisst=s.split(">")                                             
    if s[0]==">":
        lisst.pop(0)                                                    
    nlist=[]
    for x in lisst:
        splitenter=x.split('\n')                                        
        splitenter.pop(0)                                               
        splitenter.pop()                                                
        splitstring="".join(splitenter)                                 
        nlist.append(splitstring)                                       
    nstr=">".join(nlist)                                                
    nstr=nstr.split()
    nstr="".join(nstr)
    for i in nstr:
        self.bl.append(i)
    self.str=nstr
    return nstr

def getSequence(self):
    print self.str
    print self.bl
    return self.str

def GpCratio(self):
    pgenes=[]
    nGC=[]
    for x in range(len(self.lb)):                                   
        if x==">":
            pgenes.append(x)                                           
    for i in range(len(pgenes)):                                        
        if i!=len(pgenes)-1:                                            
            c=krebscyclus[pgenes[i]:pgenes[i+1]].count('c')+0.000       
            g=krebscyclus[pgenes[i]:pgenes[i+1]].count('g')+0.000                                          
            ratio=(c+g)/(len(range(pgenes[i]+1,pgenes[i+1])))
            nGC.append(ratio)                                           
    return nGC

s = Sequence()
s.fromFile('D:\Documents\Bioinformatics\sequenceB.txt')
print 'Sequence:\n', s.getSequence(), '\n'
print "G+C ratio:\n", s.GpCratio(), '\n'

我不明白为什么会出错：

in GpCratio     for x in range(len(self.lb)): AttributeError: Sequence instance has no attribute 'lb'.

当我在def getSequence中打印列表时，它打印出正确的DNA序列列表，但我不能使用该列表来搜索核苷酸。我的大学只允许我输入1个文件，而不是在定义中使用其他参数，而是“自我” 顺便说一句，它是一个类，但它拒绝我发布它然后..类称为序列

Answer 1

看起来像一个错字。您在self.bl例程中定义__init__()，然后尝试访问self.lb。

（另外，emps=str("")是多余的 - emps=""同样适用。）

但即使你纠正了这个错字，循环也行不通：

for x in range(len(self.bl)):   # This iterates over a list like [0, 1, 2, 3, ...]
    if x==">":                  # This condition will never be True
        pgenes.append(x)

您可能需要执行类似

的操作

pgenes=[]
for x in self.bl:
    if x==">":                  # Shouldn't this be != ?
        pgenes.append(x)

也可以写成列表理解：

pgenes = [x for x in self.bl if x==">"]

在Python中，您几乎不需要len(x)或for n in range(...);你宁愿直接迭代序列/ iterable。

由于您的程序不完整且缺少样本数据，我无法在此处运行以查找其他所有缺陷。也许以下内容可以指出您正确的方向。假设一个字符串包含字符ATCG和>：

>>> gene = ">ATGAATCCGGTAATTGGCATACTGTAG>ATGATAGGAGGCTAG"
>>> pgene = ''.join(x for x in gene if x!=">")
>>> pgene
'ATGAATCCGGTAATTGGCATACTGTAGATGATAGGAGGCTAG'
>>> ratio = float(pgene.count("G") + pgene.count("C")) / (pgene.count("A") + pgene.count("T"))
>>> ratio
0.75

但是，如果您不想查看整个字符串，而是查看单独的基因（>是分隔符），请使用以下内容：

>>> gene = ">ATGAATCCGGTAATTGGCATACTGTAG>ATGATAGGAGGCTAG"
>>> genes = [g for g in gene.split(">") if g !=""]
>>> genes
['ATGAATCCGGTAATTGGCATACTGTAG', 'ATGATAGGAGGCTAG']
>>> nGC = [float(g.count("G")+g.count("C"))/(g.count("A")+g.count("T")) for g in genes]
>>> nGC
[0.6875, 0.875]

但是，如果你想计算GC含量，那么你当然不想要（G + C）/（A + T）但是（G + C）/（A + T + G + C） - ＆GT; nGC = [float(g.count("G")+g.count("C"))/len(g)]。

Init方法;自我对象的len（）

1 个答案: