我在Python中根据wikipedia实现了Tarjan的强连接组件算法,但它不起作用。算法很短,我找不到任何区别,所以我不知道它为什么不起作用。我试图查看原始论文,但找不到它。
这是代码。
def strongConnect(v):
global E, idx, CCs, c, S
idx[v] = (c, c) #idx[v][0] for v.index # idx[v][1] for v.lowlink
c += 1
S.append(v)
for w in [u for (v2, u) in E if v == v2]:
if idx[w][0] < 0:
strongConnect(w)
# idx[w] = (idx[w][0], min(idx[v][1], idx[w][1])) #fixed, thx
idx[v] = (idx[v][0], min(idx[v][1], idx[w][1]))
elif w in S:
idx[v] = (idx[v][0], min(idx[v][1], idx[w][0]))
if (idx[v][0] == idx[v][1]):
i = S.index(v)
CCs.append(S[i:])
S = S[:i]
E = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'), ('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'), ('D', 'F'), ('F', 'B'), ('E', 'F')]
idx = {}
CCs = []
c = 0
S = []
for (u, v) in E:
idx[u] = (-1, -1)
idx[v] = (-1, -1)
for v in idx.keys():
if idx[v][0] < 0:
strongConnect(v)
print(CCs)
如果您愿意,可以check the graph visually。正如您所看到的,这是维基百科中伪代码的一个非常向前的翻译。但是,这是输出:
[['D', 'E', 'F'], ['B', 'C'], ['A']]
应该只有一个强连接组件,而不是三个。我希望这个问题在所有方面都是正确的,如果不是我很抱歉。无论如何,非常感谢你。
答案 0 :(得分:15)
好的,我有更多的时间来考虑这一点。正如我之前所说,我不再确定过滤边缘是问题所在。事实上,我认为pseudocode存在歧义; for each (v, w) in E
对于每条边(对于for each
的字面含义是什么意思),或者只是以v
开头的每条边(对于你合理地假设),v
是什么意思?然后,在for循环之后,问题v
是for
循环中的最终v
,就像在Python中一样?或者这又回到原来的v
?在这种情况下,伪代码没有明确定义的范围行为! (如果最后的v
是循环中v
的最后一个任意值,那将是非常奇怪的。这表明过滤是正确的,因为在这种情况下,{{1一直意味着同样的事情。)
但是,在任何情况下,代码中的清除错误都在此处:
idx[w] = (idx[w][0], min(idx[v][1], idx[w][1]))
根据伪代码,那肯定是
idx[v] = (idx[v][0], min(idx[v][1], idx[w][1]))
进行更改后,您将获得预期结果。坦率地说,你犯了那个错误并不会让我感到惊讶,因为你使用了一个非常奇怪和违反直觉的数据结构。这是我认为的改进 - 它只增加了几行,我发现它更具可读性。
import itertools
def strong_connect(vertex):
global edges, indices, lowlinks, connected_components, index, stack
indices[vertex] = index
lowlinks[vertex] = index
index += 1
stack.append(vertex)
for v, w in (e for e in edges if e[0] == vertex):
if indices[w] < 0:
strong_connect(w)
lowlinks[v] = min(lowlinks[v], lowlinks[w])
elif w in stack:
lowlinks[v] = min(lowlinks[v], indices[w])
if indices[vertex] == lowlinks[vertex]:
connected_components.append([])
while stack[-1] != vertex:
connected_components[-1].append(stack.pop())
connected_components[-1].append(stack.pop())
edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'),
('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'),
('D', 'F'), ('F', 'B'), ('E', 'F')]
vertices = set(v for v in itertools.chain(*edges))
indices = dict((v, -1) for v in vertices)
lowlinks = indices.copy()
connected_components = []
index = 0
stack = []
for v in vertices:
if indices[v] < 0:
strong_connect(v)
print(connected_components)
但是,我发现这里使用的全局变量令人反感。你可以在自己的模块中隐藏它,但我更喜欢创建一个可调用类的想法。在仔细观察Tarjan的original pseudocode后,(确认“过滤后的”版本是正确的,顺便说一句),我写了这篇文章。它包含一个简单的Graph
类,并进行了几项基本测试:
from itertools import chain
from collections import defaultdict
class Graph(object):
def __init__(self, edges, vertices=()):
edges = list(list(x) for x in edges)
self.edges = edges
self.vertices = set(chain(*edges)).union(vertices)
self.tails = defaultdict(list)
for head, tail in self.edges:
self.tails[head].append(tail)
@classmethod
def from_dict(cls, edge_dict):
return cls((k, v) for k, vs in edge_dict.iteritems() for v in vs)
class _StrongCC(object):
def strong_connect(self, head):
lowlink, count, stack = self.lowlink, self.count, self.stack
lowlink[head] = count[head] = self.counter = self.counter + 1
stack.append(head)
for tail in self.graph.tails[head]:
if tail not in count:
self.strong_connect(tail)
lowlink[head] = min(lowlink[head], lowlink[tail])
elif count[tail] < count[head]:
if tail in self.stack:
lowlink[head] = min(lowlink[head], count[tail])
if lowlink[head] == count[head]:
component = []
while stack and count[stack[-1]] >= count[head]:
component.append(stack.pop())
self.connected_components.append(component)
def __call__(self, graph):
self.graph = graph
self.counter = 0
self.count = dict()
self.lowlink = dict()
self.stack = []
self.connected_components = []
for v in self.graph.vertices:
if v not in self.count:
self.strong_connect(v)
return self.connected_components
strongly_connected_components = _StrongCC()
if __name__ == '__main__':
edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'),
('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'),
('D', 'F'), ('F', 'B'), ('E', 'F')]
print strongly_connected_components(Graph(edges))
edge_dict = {'a':['b', 'c', 'd'],
'b':['c', 'a'],
'c':['d', 'e'],
'd':['e'],
'e':['c']}
print strongly_connected_components(Graph.from_dict(edge_dict))
答案 1 :(得分:0)
我修改了 senderle 对 Python 3.6+ 的回答,并添加了类型提示和注释,使其对我更有意义。
from itertools import chain
from collections import defaultdict
from typing import Iterable, DefaultDict, List, Dict, Generic, TypeVar, Tuple, Set
T = TypeVar('T') # label for a vertex
class Graph(Generic[T]):
def __init__(self, edges: Iterable[Tuple[T, T]], vertices: Iterable[T] = ()):
edges = [list(x) for x in edges]
self.edges = edges
self.vertices: Set[T] = set(chain(*edges)).union(vertices)
self.adj_list: DefaultDict[T, List[T]] = defaultdict(list) # i.e., neighbors of a given node
for head, tail in self.edges:
self.adj_list[head].append(tail)
@classmethod
def from_dict(cls, edge_dict: Dict[T, Iterable[T]]):
return cls((k, v) for k, vs in edge_dict.items() for v in vs)
def strongly_connected_components(graph: Graph[T]) -> List[List[T]]:
idx = 0 # index to be assigned to the next node
node_idxs: Dict[T, int] = {} # index of a visited node
lowlink: Dict[T, int] = {} # low-link number is the lowest node number (index) reachable by the node that is in the same connected component – its own number, or the low-link number of a previous unvisited neighbor, or the node number of a visited neighbor in the stack
stack: List[T] = []
connected_components: List[List[T]] = []
def visit(head: T) -> None:
nonlocal idx
lowlink[head] = node_idxs[head] = idx
idx += 1
stack.append(head)
for neighbor in graph.adj_list[head]:
if neighbor not in node_idxs: # i.e., not visited
visit(neighbor)
lowlink[head] = min(lowlink[head], lowlink[neighbor])
elif node_idxs[neighbor] < node_idxs[head]:
if neighbor in stack:
lowlink[head] = min(lowlink[head], node_idxs[neighbor])
if lowlink[head] == node_idxs[head]:
component: List[T] = []
while stack and node_idxs[stack[-1]] >= node_idxs[head]:
component.append(stack.pop())
connected_components.append(component)
for v in graph.vertices:
if v not in node_idxs:
visit(v)
return connected_components
if __name__ == '__main__':
edges = [('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'E'),
('E', 'A'), ('A', 'E'), ('C', 'A'), ('C', 'E'),
('D', 'F'), ('F', 'B'), ('E', 'F')]
print(strongly_connected_components(Graph(edges))) # [['F', 'D', 'C', 'B', 'A', 'E']]
edge_dict = {'a':['b', 'c', 'd'],
'b':['c', 'a'],
'c':['d', 'e'],
'd':['e'],
'e':['c']}
print(strongly_connected_components(Graph.from_dict(edge_dict))) # [['e', 'd', 'c'], ['b', 'a']]