这个功能让我疯了!
def CCAD1 (tree)
leaves = []
for otu in tree:
if tree[otu][2]== None and tree[otu][1]== None:
leaves += [otu]
ccad = {}
for leaf in leaves:
otuX = leaf
otu1 = leaf
for leaf2 in leaves:
otuY = leaf2
otu2 = leaf2
while tree[otu1][0] is not None and tree[otu2][0] is not None and tree[otu1][0][0] != tree[otu2][0][0]:
otu1,otu2,tree = tree[otu1][0][0],tree[otu2][0][0],tree
if tree[otu1][0] is not None:
ccad[otuX] = {otuY:tree[otu1][0]}
return ccad
这是函数的输入
{'A': [('AD', 4.0), None, None], 'C': [('ADBFGC', 14.5), None, None], 'B': [('BF', 0.5), None, None], 'E': [('ADBFGCE', 17.0), None, None], 'D': [('AD', 4.0), None, None], 'G': [('BFG', 6.25), None, None], 'F': [('BF', 0.5), None, None], 'ADBFG': [('ADBFGC', 6.25), ('AD', 4.25), ('BFG', 2.0)], 'BF': [('BFG', 5.75), ('B', 0.5), ('F', 0.5)], 'ADBFGC': [('ADBFGCE', 2.5), ('ADBFG', 6.25), ('C', 14.5)], 'ADBFGCE': [None, ('ADBFGC', 2.5), ('E', 17.0)], 'BFG': [('ADBFG', 2.0), ('BF', 5.75), ('G', 6.25)], 'AD': [('ADBFG', 4.25), ('A', 4.0), ('D', 4.0)]}
输出应该是{"A":{"B":("AB",4)}}
之类的结构,在上面的代码中,这是字典'CCAD'。我真的一直在努力做到这一点,但它不起作用,我不知道为什么。
基本上我要做的是构建一个函数,输出一个字典字典,对于列表leaves
中每个不同的元素对,它将计算一个祖先(我得到了一些很好的帮助计算以前here)以及距离,在该链接中,每次函数迭代时都会保持运行总计。
它正在输出我需要的词典字典,但它不是为每一对,只有某些字典。如果您需要查看它,那么'tree'数据结构也在该链接中。
感谢任何帮助,我在这个阶段非常绝望:/
答案 0 :(得分:3)
好的,我想你想计算每个叶子节点之间的距离。所以我正在解决你的问题,而不是回答你的问题。
您的commonAncestor算法存在缺陷,因为它假设叶节点都处于相同的深度。他们不是。
首先想到的解决方案是确定所有叶节点并计算每个叶节点的根节点的路径。通过反向比较两条路径来确定最接近的共同祖先。
这是一个节点对的字典和它们之间的跳数。
from itertools import combinations
data = {'A': [('AD', 4.0), None, None], 'C': [('ADBFGC', 14.5), None, None], 'B': [('BF', 0.5), None, None], 'E': [('ADBFGCE', 17.0), None, None], 'D': [('AD', 4.0), None, None], 'G': [('BFG', 6.25), None, None], 'F': [('BF', 0.5), None, None], 'ADBFG': [('ADBFGC', 6.25), ('AD', 4.25), ('BFG', 2.0)], 'BF': [('BFG', 5.75), ('B', 0.5), ('F', 0.5)], 'ADBFGC': [('ADBFGCE', 2.5), ('ADBFG', 6.25), ('C', 14.5)], 'ADBFGCE': [None, ('ADBFGC', 2.5), ('E', 17.0)], 'BFG': [('ADBFG', 2.0), ('BF', 5.75), ('G', 6.25)], 'AD': [('ADBFG', 4.25), ('A', 4.0), ('D', 4.0)]}
def get_path(tree,leaf):
path = []
location = leaf
while True:
path.append(location)
parent = tree.get(location)[0]
if parent:
location = parent[0]
else:
break
return path
def get_leaves(tree):
return [ x for (x,y) in tree.items() if y[1] is None and y[2] is None ]
def leafDistances(tree):
paths = {}
leaves = get_leaves(tree)
for leaf in leaves:
paths[leaf] = get_path(tree,leaf)
results = {}
for l1,l2 in combinations(leaves,2):
commonAncestor = [ x for (x,y) in zip(paths[l1][::-1],paths[l2][::-1]) if x == y ][-1]
distance = paths[l1].index(commonAncestor) + paths[l2].index(commonAncestor)
results[(l1,l2)] = distance
print "%s <-> %s Ancestor == %s, distance == %s\nPath of %s == %s\nPath of %s == %s" % (l1,l2,commonAncestor,distance,l1,paths[l1],l2,paths[l2])
return results
leafDistances(data)
为了清晰起见,打印出来:
A <-> C Ancestor == ADBFGC, distance == 4
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
A <-> B Ancestor == ADBFG, distance == 5
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
A <-> E Ancestor == ADBFGCE, distance == 5
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of E == ['E', 'ADBFGCE']
A <-> D Ancestor == AD, distance == 2
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
A <-> G Ancestor == ADBFG, distance == 4
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
A <-> F Ancestor == ADBFG, distance == 5
Path of A == ['A', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
C <-> B Ancestor == ADBFGC, distance == 5
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
C <-> E Ancestor == ADBFGCE, distance == 3
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of E == ['E', 'ADBFGCE']
C <-> D Ancestor == ADBFGC, distance == 4
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
C <-> G Ancestor == ADBFGC, distance == 4
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
C <-> F Ancestor == ADBFGC, distance == 5
Path of C == ['C', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
B <-> E Ancestor == ADBFGCE, distance == 6
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of E == ['E', 'ADBFGCE']
B <-> D Ancestor == ADBFG, distance == 5
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
B <-> G Ancestor == BFG, distance == 3
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
B <-> F Ancestor == BF, distance == 2
Path of B == ['B', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
E <-> D Ancestor == ADBFGCE, distance == 5
Path of E == ['E', 'ADBFGCE']
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
E <-> G Ancestor == ADBFGCE, distance == 5
Path of E == ['E', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
E <-> F Ancestor == ADBFGCE, distance == 6
Path of E == ['E', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
D <-> G Ancestor == ADBFG, distance == 4
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
D <-> F Ancestor == ADBFG, distance == 5
Path of D == ['D', 'AD', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
G <-> F Ancestor == BFG, distance == 3
Path of G == ['G', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
Path of F == ['F', 'BF', 'BFG', 'ADBFG', 'ADBFGC', 'ADBFGCE']
答案 1 :(得分:2)
由于您没有提供所需的输出,因此调试代码非常困难,但我觉得最后if
条件应该在while
之下。