我正在尝试将字符状态变化从不存在矩阵分配到系统发育。
我尝试将每个字符分配给它的叶子节点,然后,如果叶子节点的姐妹具有相同的字符,我会将该字符重新分配给父节点(然后进行工作,直到分配了所有节点)。我正在使用虚拟数据集尝试实现这一目标:
Matrix
>Dme_001
1110000000000111
>Dme_002
1110000000000011
>Cfa_001
0110000000000011
>Mms_001
0110000000000011
>Hsa_001
0110000000000010
>Ptr_002
0110000000000011
>Mmu_002
0110000000000011
>Hsa_002
0110000000000011
>Ptr_001
0110000000000011
>Mmu_001
0110000000000011
Phylogeny
((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));
我使用ete3分配内部节点,所以我的输出应该是:
BranchID CharacterState Change
Node_1: 0 0->1
Hsa_001: 15 1->0
当我的代码根据遇到错误的姐妹分配字符状态时,它会使输出混乱,从而使得:
BranchID CharacterState Change
Node_1: 0 0->1
Node_3 15 0->1
Node_5 15 0->1
Node_8 15 0->1
有人可以帮我吗?我正在用python编码并开发隧道视觉。预先感谢
我的代码:
from ete3 import PhyloTree
from collections import Counter
import itertools
PAM = open('PAM','r')
gene_tree = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
NodeIDs = []
tree = PhyloTree(gene_tree)
edge = 0
for node in tree.traverse():
if not node.is_leaf():
node.name = "Node_%d" %edge
edge +=1
NodeIDs.append(node.name)
if node.is_leaf():
NodeIDs.append(node.name)
f = open('PAM','r')
taxa = []
pap = []
for line in f:
term = line.strip().split('\t')
taxa.append(term[0])
p = [p for p in term[1]]
pap.append(p)
statesD = dict(zip(taxa, pap))
def PlotCharacterStates():
Plots = []
events = []
for key, value in statesD.iteritems():
count = -1
for s in value:
count+=1
if s == CharacterState:
a = key, count
events.append(a)
Round3_events = []
while len(events) > 0:
for rel in Relationships:
node_store = []
sis_store = []
for event in events:
if rel[0] == event[0]:
node_store.append(event[1])
if rel[1] == event[0]:
sis_store.append(event[1])
if (len(node_store) > 0) and (len(sis_store) > 0):
place = rel, node_store, sis_store
Round3_events.append(place)
moved = []
for placement in Round3_events:
intercept = (set(placement[1]) & set(placement[2]))
node_plot = (set(placement[1]) - set(placement[2]))
sis_plot = (set(placement[2]) - set(placement[1]))
if len(node_plot) > 0:
for x in node_plot:
y = placement[0][0], x
Plots.append(y)
moved.append(y)
if len(sis_plot) > 0:
for x in sis_plot:
y = placement[0][1], x
Plots.append(y)
moved.append(y)
if len(intercept) > 0:
for x in intercept:
y = placement[0][2], x
y1 = placement[0][0], x
y2 = placement[0][1], x
moved.append(y1)
moved.append(y2)
events.append(y)
for event in events:
if event[0] == "Node_0":
Plots.append(event)
moved.append(event)
events2 = (set(events) - set(moved))
events = []
for event in events2:
events.append(event)
pl = set(Plots)
Plots = []
for p in pl:
Plots.append(p)
print CharacterState, Plots
'''
assign sisters to leaves, internals
'''
e = []
round1b_e = []
round2a_e = []
placements = []
Relationships = []
Rounds = []
for node in tree.traverse():
sisters = node.get_sisters()
parent = node.up
cycle1 = []
if node.is_leaf():
for sister in sisters:
if sister.is_leaf():
round1a = ["Round1a", node.name, sister.name, parent.name]
node_names = node.name, sister.name
Rounds.append(round1a)
e.append(node_names)
x = node.name, sister.name, parent.name, "leaf-leaf"
Relationships.append(x)
if not sister.is_leaf():
round1b = ["Round1b", node.name, sister.name, parent.name]
node_names = node.name, sister.name
Rounds.append(round1b)
round1b_e.append(node_names)
x = node.name, sister.name, parent.name, "node-leaf"
Relationships.append(x)
elif not node.is_leaf():
if not node.is_root():
for sister in sisters:
if not sister.is_leaf():
node_names = node.name, sister.name
round2a_e.append(node_names)
x = node.name, sister.name, parent.name, "node-node"
Relationships.append(x)
x = []
CharacterStates = []
for key, value in statesD.iteritems():
for value in value:
x.append(value)
y = sorted(set(x))
for x in y:
CharacterStates.append(x)
for CharacterState in CharacterStates:
PlotCharacterStates()