1
# Copyright (C) 2005, 2006, 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
# This code originated in bzrlib. In order to make it suitable for use in
18
# Germinate, Colin Watson removed its bzrlib.errors dependency and stripped
19
# out the implementation of specialised merge-graph sorting.
22
"""Topological sorting routines."""
25
__all__ = ["topo_sort", "TopoSorter"]
28
class GraphCycleError(StandardError):
30
_fmt = "Cycle in graph %(graph)r"
32
def __init__(self, graph):
33
StandardError.__init__(self)
37
d = dict(self.__dict__)
38
# special case: python2.5 puts the 'message' attribute in a
39
# slot, so it isn't seen in __dict__
40
d['message'] = getattr(self, 'message', 'no message')
42
# __str__() should always return a 'str' object
43
# never a 'unicode' object.
44
if isinstance(s, unicode):
45
return s.encode('utf8')
50
"""Topological sort a graph.
52
graph -- sequence of pairs of node->parents_list.
54
The result is a list of node names, such that all parents come before
57
node identifiers can be any hashable object, and are typically strings.
59
return TopoSorter(graph).sorted()
62
class TopoSorter(object):
64
def __init__(self, graph):
65
"""Topological sorting of a graph.
67
:param graph: sequence of pairs of node_name->parent_names_list.
68
i.e. [('C', ['B']), ('B', ['A']), ('A', [])]
69
For this input the output from the sort or
70
iter_topo_order routines will be:
73
node identifiers can be any hashable object, and are typically strings.
75
If you have a graph like [('a', ['b']), ('a', ['c'])] this will only use
76
one of the two values for 'a'.
78
The graph is sorted lazily: until you iterate or sort the input is
79
not processed other than to create an internal representation.
81
iteration or sorting may raise GraphCycleError if a cycle is present
84
# a dict of the graph.
85
self._graph = dict(graph)
86
self._visitable = set(self._graph)
88
# self._original_graph = dict(graph)
90
# this is a stack storing the depth first search into the graph.
91
self._node_name_stack = []
92
# at each level of 'recursion' we have to check each parent. This
93
# stack stores the parents we have not yet checked for the node at the
94
# matching depth in _node_name_stack
95
self._pending_parents_stack = []
96
# this is a set of the completed nodes for fast checking whether a
97
# parent in a node we are processing on the stack has already been
98
# emitted and thus can be skipped.
99
self._completed_node_names = set()
102
"""Sort the graph and return as a list.
104
After calling this the sorter is empty and you must create a new one.
106
return list(self.iter_topo_order())
108
### Useful if fiddling with this code.
110
### sorted_names = list(self.iter_topo_order())
111
### for index in range(len(sorted_names)):
112
### rev = sorted_names[index]
113
### for left_index in range(index):
114
### if rev in self.original_graph[sorted_names[left_index]]:
115
### print "revision in parent list of earlier revision"
116
### import pdb;pdb.set_trace()
118
def iter_topo_order(self):
119
"""Yield the nodes of the graph in a topological order.
121
After finishing iteration the sorter is empty and you cannot continue
125
# now pick a random node in the source graph, and transfer it to the
126
# top of the depth first search stack.
127
node_name, parents = self._graph.popitem()
128
self._push_node(node_name, parents)
129
while self._node_name_stack:
130
# loop until this call completes.
131
parents_to_visit = self._pending_parents_stack[-1]
132
# if all parents are done, the revision is done
133
if not parents_to_visit:
134
# append the revision to the topo sorted list
135
# all the nodes parents have been added to the output, now
136
# we can add it to the output.
137
yield self._pop_node()
139
while self._pending_parents_stack[-1]:
140
# recurse depth first into a single parent
141
next_node_name = self._pending_parents_stack[-1].pop()
142
if next_node_name in self._completed_node_names:
143
# this parent was completed by a child on the
144
# call stack. skip it.
146
if next_node_name not in self._visitable:
148
# otherwise transfer it from the source graph into the
149
# top of the current depth first search stack.
151
parents = self._graph.pop(next_node_name)
153
# if the next node is not in the source graph it has
154
# already been popped from it and placed into the
155
# current search stack (but not completed or we would
156
# have hit the continue 4 lines up.
157
# this indicates a cycle.
158
raise GraphCycleError(self._node_name_stack)
159
self._push_node(next_node_name, parents)
160
# and do not continue processing parents until this 'call'
164
def _push_node(self, node_name, parents):
165
"""Add node_name to the pending node stack.
167
Names in this stack will get emitted into the output as they are popped
170
self._node_name_stack.append(node_name)
171
self._pending_parents_stack.append(list(parents))
174
"""Pop the top node off the stack
176
The node is appended to the sorted output.
178
# we are returning from the flattened call frame:
179
# pop off the local variables
180
node_name = self._node_name_stack.pop()
181
self._pending_parents_stack.pop()
183
self._completed_node_names.add(node_name)