如何绘制从存在状态矩阵到系统发育的字符状态变化

时间:2018-10-08 04:41:52

标签: python bioinformatics phylogeny

我正在尝试将字符状态变化从不存在矩阵分配到系统发育。

我尝试将每个字符分配给它的叶子节点,然后,如果叶子节点的姐妹具有相同的字符,我会将该字符重新分配给父节点(然后进行工作,直到分配了所有节点)。我正在使用虚拟数据集尝试实现这一目标:

Matrix
>Dme_001
1110000000000111
>Dme_002
1110000000000011
>Cfa_001
0110000000000011
>Mms_001
0110000000000011
>Hsa_001
0110000000000010
>Ptr_002
0110000000000011
>Mmu_002
0110000000000011
>Hsa_002
0110000000000011
>Ptr_001
0110000000000011
>Mmu_001
0110000000000011

Phylogeny
((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));

我使用ete3分配内部节点,所以我的输出应该是:

BranchID    CharacterState    Change
Node_1:    0    0->1
Hsa_001:    15    1->0 

当我的代码根据遇到错误的姐妹分配字符状态时,它会使输出混乱,从而使得:

BranchID    CharacterState    Change
Node_1:   0     0->1
Node_3    15    0->1
Node_5    15    0->1
Node_8    15    0->1

有人可以帮我吗?我正在用python编码并开发隧道视觉。预先感谢

我的代码:

from ete3 import PhyloTree
from collections import Counter
import itertools

PAM = open('PAM','r')

gene_tree = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'

NodeIDs = []

tree = PhyloTree(gene_tree)
edge = 0
for node in tree.traverse():
    if not node.is_leaf():
        node.name = "Node_%d" %edge
        edge +=1
        NodeIDs.append(node.name)
    if node.is_leaf():
        NodeIDs.append(node.name)

f = open('PAM','r')

taxa = []
pap = []

for line in f:
    term = line.strip().split('\t')
    taxa.append(term[0])
    p = [p for p in term[1]]
    pap.append(p)

statesD = dict(zip(taxa, pap))

def PlotCharacterStates():

    Plots = []

    events = []

    for key, value in statesD.iteritems():
        count = -1
        for s in value: 
            count+=1
            if s == CharacterState:
                a = key, count
                events.append(a)

    Round3_events = []
    while len(events) > 0:
        for rel in Relationships:
            node_store = []
            sis_store = []
            for event in events:
                if rel[0] == event[0]:
                    node_store.append(event[1])
                if rel[1] == event[0]:
                    sis_store.append(event[1])
            if (len(node_store) > 0) and (len(sis_store) > 0):
                place = rel, node_store, sis_store
                Round3_events.append(place)

        moved = []
        for placement in Round3_events:
            intercept = (set(placement[1]) & set(placement[2]))
            node_plot = (set(placement[1]) - set(placement[2]))
            sis_plot = (set(placement[2]) - set(placement[1]))
            if len(node_plot) > 0:
                for x in node_plot:
                    y = placement[0][0], x
                    Plots.append(y)
                    moved.append(y)
            if len(sis_plot) > 0:
                for x in sis_plot:
                    y = placement[0][1], x
                    Plots.append(y)
                    moved.append(y)
            if len(intercept) > 0:
                for x in intercept:
                    y = placement[0][2], x
                    y1 = placement[0][0], x
                    y2 = placement[0][1], x
                    moved.append(y1)
                    moved.append(y2)
                    events.append(y)

        for event in events:
            if event[0] == "Node_0":
                Plots.append(event)
                moved.append(event)

        events2 = (set(events) - set(moved))
        events = []
        for event in events2:
            events.append(event)


    pl = set(Plots)
    Plots = []
    for p in pl:
        Plots.append(p)

    print CharacterState, Plots


'''
assign sisters to leaves, internals
'''

e = []
round1b_e = []
round2a_e = []
placements = []
Relationships = []
Rounds = []
for node in tree.traverse():
    sisters = node.get_sisters()
    parent = node.up
    cycle1 = []
    if node.is_leaf():
        for sister in sisters:
            if sister.is_leaf():
                round1a = ["Round1a", node.name, sister.name, parent.name]
                node_names = node.name, sister.name
                Rounds.append(round1a)
                e.append(node_names)
                x = node.name, sister.name, parent.name, "leaf-leaf"
                Relationships.append(x)
            if not sister.is_leaf():
                round1b =  ["Round1b", node.name, sister.name, parent.name]
                node_names = node.name, sister.name
                Rounds.append(round1b)
                round1b_e.append(node_names)
                x = node.name, sister.name, parent.name, "node-leaf"
                Relationships.append(x)
    elif not node.is_leaf():
        if not node.is_root():
            for sister in sisters:
                if not sister.is_leaf():
                    node_names = node.name, sister.name
                    round2a_e.append(node_names)
                    x = node.name, sister.name, parent.name, "node-node"
                    Relationships.append(x)

x = []
CharacterStates = []                
for key, value in statesD.iteritems():
    for value in value:
        x.append(value)

y = sorted(set(x))
for x in y:
    CharacterStates.append(x)

for CharacterState in CharacterStates:
    PlotCharacterStates()

0 个答案:

没有答案