ioftools / networkxMiCe / networkxmaster / networkx / algorithms / lowest_common_ancestors.py @ 5cef0f13
History  View  Annotate  Download (14 KB)
1 
# Copyright (C) 2013 by


2 
# Alex Roper <aroper@umich.edu>

3 
# Copyright (C) 2017 by

4 
# Aric Hagberg <hagberg@lanl.gov>

5 
# Dan Schult <dschult@colgate.edu>

6 
# Pieter Swart <swart@lanl.gov>

7 
#

8 
# All rights reserved.

9 
# BSD license.

10 
#

11 
# Author: Alex Roper <aroper@umich.edu>

12 
"""Algorithms for finding the lowest common ancestor of trees and DAGs."""

13 
from collections import defaultdict 
14 
from collections.abc import Mapping, Set 
15 
from itertools import chain, count 
16  
17 
import networkx as nx 
18 
from networkx.utils import arbitrary_element, not_implemented_for, \ 
19 
UnionFind, generate_unique_node 
20  
21 
__all__ = ["all_pairs_lowest_common_ancestor",

22 
"tree_all_pairs_lowest_common_ancestor",

23 
"lowest_common_ancestor"]

24  
25  
26 
@not_implemented_for("undirected") 
27 
@not_implemented_for("multigraph") 
28 
def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None): 
29 
r"""Yield the lowest common ancestor for sets of pairs in a tree.

30 

31 
Parameters

32 


33 
G : NetworkX directed graph (must be a tree)

34 

35 
root : node, optional (default: None)

36 
The root of the subtree to operate on.

37 
If None, assume the entire graph has exactly one source and use that.

38 

39 
pairs : iterable or iterator of pairs of nodes, optional (default: None)

40 
The pairs of interest. If None, Defaults to all pairs of nodes

41 
under `root` that have a lowest common ancestor.

42 

43 
Returns

44 


45 
lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes

46 
in `pairs` and `lca` is their lowest common ancestor.

47 

48 
Notes

49 


50 
Only defined on nonnull trees represented with directed edges from

51 
parents to children. Uses Tarjan's offline lowestcommonancestors

52 
algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest

53 
value of the inverse Ackermann function likely to ever come up in actual

54 
use, and $P$ is the number of pairs requested (or $V^2$ if all are needed).

55 

56 
Tarjan, R. E. (1979), "Applications of path compression on balanced trees",

57 
Journal of the ACM 26 (4): 690715, doi:10.1145/322154.322161.

58 

59 
See Also

60 


61 
all_pairs_lowest_common_ancestor (similar routine for general DAGs)

62 
lowest_common_ancestor (just a single pair for general DAGs)

63 
"""

64 
if len(G) == 0: 
65 
raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") 
66 
elif None in G: 
67 
raise nx.NetworkXError("None is not a valid node.") 
68  
69 
# Index pairs of interest for efficient lookup from either side.

70 
if pairs is not None: 
71 
pair_dict = defaultdict(set)

72 
# See note on all_pairs_lowest_common_ancestor.

73 
if not isinstance(pairs, (Mapping, Set)): 
74 
pairs = set(pairs)

75 
for u, v in pairs: 
76 
for n in (u, v): 
77 
if n not in G: 
78 
msg = "The node %s is not in the digraph." % str(n) 
79 
raise nx.NodeNotFound(msg)

80 
pair_dict[u].add(v) 
81 
pair_dict[v].add(u) 
82  
83 
# If root is not specified, find the exactly one node with in degree 0 and

84 
# use it. Raise an error if none are found, or more than one is. Also check

85 
# for any nodes with in degree larger than 1, which would imply G is not a

86 
# tree.

87 
if root is None: 
88 
for n, deg in G.in_degree: 
89 
if deg == 0: 
90 
if root is not None: 
91 
msg = "No root specified and tree has multiple sources."

92 
raise nx.NetworkXError(msg)

93 
root = n 
94 
elif deg > 1: 
95 
msg = "Tree LCA only defined on trees; use DAG routine."

96 
raise nx.NetworkXError(msg)

97 
if root is None: 
98 
raise nx.NetworkXError("Graph contains a cycle.") 
99  
100 
# Iterative implementation of Tarjan's offline lca algorithm

101 
# as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition)

102 
uf = UnionFind() 
103 
ancestors = {} 
104 
for node in G: 
105 
ancestors[node] = uf[node] 
106  
107 
colors = defaultdict(bool)

108 
for node in nx.dfs_postorder_nodes(G, root): 
109 
colors[node] = True

110 
for v in (pair_dict[node] if pairs is not None else G): 
111 
if colors[v]:

112 
# If the user requested both directions of a pair, give it.

113 
# Otherwise, just give one.

114 
if pairs is not None and (node, v) in pairs: 
115 
yield (node, v), ancestors[uf[v]]

116 
if pairs is None or (v, node) in pairs: 
117 
yield (v, node), ancestors[uf[v]]

118 
if node != root:

119 
parent = arbitrary_element(G.pred[node]) 
120 
uf.union(parent, node) 
121 
ancestors[uf[parent]] = parent 
122  
123  
124 
@not_implemented_for("undirected") 
125 
@not_implemented_for("multigraph") 
126 
def lowest_common_ancestor(G, node1, node2, default=None): 
127 
"""Compute the lowest common ancestor of the given pair of nodes.

128 

129 
Parameters

130 


131 
G : NetworkX directed graph

132 

133 
node1, node2 : nodes in the graph.

134 

135 
default : object

136 
Returned if no common ancestor between `node1` and `node2`

137 

138 
Returns

139 


140 
The lowest common ancestor of node1 and node2,

141 
or default if they have no common ancestors.

142 

143 
Notes

144 


145 
Only defined on nonnull directed acyclic graphs.

146 
Takes n log(n) time in the size of the graph.

147 
See `all_pairs_lowest_common_ancestor` when you have

148 
more than one pair of nodes of interest.

149 

150 
See Also

151 


152 
tree_all_pairs_lowest_common_ancestor

153 
all_pairs_lowest_common_ancestor

154 
"""

155 
ans = list(all_pairs_lowest_common_ancestor(G, pairs=[(node1, node2)]))

156 
if ans:

157 
assert len(ans) == 1 
158 
return ans[0][1] 
159 
else:

160 
return default

161  
162  
163 
@not_implemented_for("undirected") 
164 
@not_implemented_for("multigraph") 
165 
def all_pairs_lowest_common_ancestor(G, pairs=None): 
166 
"""Compute the lowest common ancestor for pairs of nodes.

167 

168 
Parameters

169 


170 
G : NetworkX directed graph

171 

172 
pairs : iterable of pairs of nodes, optional (default: all pairs)

173 
The pairs of nodes of interest.

174 
If None, will find the LCA of all pairs of nodes.

175 

176 
Returns

177 


178 
An iterator over ((node1, node2), lca) where (node1, node2) are

179 
the pairs specified and lca is a lowest common ancestor of the pair.

180 
Note that for the default of all pairs in G, we consider

181 
unordered pairs, e.g. you will not get both (b, a) and (a, b).

182 

183 
Notes

184 


185 
Only defined on nonnull directed acyclic graphs.

186 

187 
Uses the $O(n^3)$ ancestorlist algorithm from:

188 
M. A. Bender, M. FarachColton, G. Pemmasani, S. Skiena, P. Sumazin.

189 
"Lowest common ancestors in trees and directed acyclic graphs."

190 
Journal of Algorithms, 57(2): 7594, 2005.

191 

192 
See Also

193 


194 
tree_all_pairs_lowest_common_ancestor

195 
lowest_common_ancestor

196 
"""

197 
if not nx.is_directed_acyclic_graph(G): 
198 
raise nx.NetworkXError("LCA only defined on directed acyclic graphs.") 
199 
elif len(G) == 0: 
200 
raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") 
201 
elif None in G: 
202 
raise nx.NetworkXError("None is not a valid node.") 
203  
204 
# The copy isn't ideal, neither is the switchontype, but without it users

205 
# passing an iterable will encounter confusing errors, and itertools.tee

206 
# does not appear to handle builtin types efficiently (IE, it materializes

207 
# another buffer rather than just creating listoperators at the same

208 
# offset). The Python documentation notes use of tee is unadvised when one

209 
# is consumed before the other.

210 
#

211 
# This will always produce correct results and avoid unnecessary

212 
# copies in many common cases.

213 
#

214 
if (not isinstance(pairs, (Mapping, Set)) and pairs is not None): 
215 
pairs = set(pairs)

216  
217 
# Convert G into a dag with a single root by adding a node with edges to

218 
# all sources iff necessary.

219 
sources = [n for n, deg in G.in_degree if deg == 0] 
220 
if len(sources) == 1: 
221 
root = sources[0]

222 
super_root = None

223 
else:

224 
G = G.copy() 
225 
super_root = root = generate_unique_node() 
226 
for source in sources: 
227 
G.add_edge(root, source) 
228  
229 
# Start by computing a spanning tree, and the DAG of all edges not in it.

230 
# We will then use the tree lca algorithm on the spanning tree, and use

231 
# the DAG to figure out the set of tree queries necessary.

232 
spanning_tree = nx.dfs_tree(G, root) 
233 
dag = nx.DiGraph((u, v) for u, v in G.edges 
234 
if u not in spanning_tree or v not in spanning_tree[u]) 
235  
236 
# Ensure that both the dag and the spanning tree contains all nodes in G,

237 
# even nodes that are disconnected in the dag.

238 
spanning_tree.add_nodes_from(G) 
239 
dag.add_nodes_from(G) 
240  
241 
counter = count() 
242  
243 
# Necessary to handle graphs consisting of a single node and no edges.

244 
root_distance = {root: next(counter)}

245  
246 
for edge in nx.bfs_edges(spanning_tree, root): 
247 
for node in edge: 
248 
if node not in root_distance: 
249 
root_distance[node] = next(counter)

250  
251 
# Index the position of all nodes in the Euler tour so we can efficiently

252 
# sort lists and merge in tour order.

253 
euler_tour_pos = {} 
254 
for node in nx.depth_first_search.dfs_preorder_nodes(G, root): 
255 
if node not in euler_tour_pos: 
256 
euler_tour_pos[node] = next(counter)

257  
258 
# Generate the set of all nodes of interest in the pairs.

259 
pairset = set()

260 
if pairs is not None: 
261 
pairset = set(chain.from_iterable(pairs))

262  
263 
for n in pairset: 
264 
if n not in G: 
265 
msg = "The node %s is not in the digraph." % str(n) 
266 
raise nx.NodeNotFound(msg)

267  
268 
# Generate the transitive closure over the dag (not G) of all nodes, and

269 
# sort each node's closure set by order of first appearance in the Euler

270 
# tour.

271 
ancestors = {} 
272 
for v in dag: 
273 
if pairs is None or v in pairset: 
274 
my_ancestors = nx.dag.ancestors(dag, v) 
275 
my_ancestors.add(v) 
276 
ancestors[v] = sorted(my_ancestors, key=euler_tour_pos.get)

277  
278 
def _compute_dag_lca_from_tree_values(tree_lca, dry_run): 
279 
"""Iterate through the inorder merge for each pair of interest.

280 

281 
We do this to answer the user's query, but it is also used to

282 
avoid generating unnecessary tree entries when the user only

283 
needs some pairs.

284 
"""

285 
for (node1, node2) in pairs if pairs is not None else tree_lca: 
286 
best_root_distance = None

287 
best = None

288  
289 
indices = [0, 0] 
290 
ancestors_by_index = [ancestors[node1], ancestors[node2]] 
291  
292 
def get_next_in_merged_lists(indices): 
293 
"""Returns index of the list containing the next item

294 

295 
Next order refers to the merged order.

296 
Index can be 0 or 1 (or None if exhausted).

297 
"""

298 
index1, index2 = indices 
299 
if (index1 >= len(ancestors[node1]) and 
300 
index2 >= len(ancestors[node2])):

301 
return None 
302 
elif index1 >= len(ancestors[node1]): 
303 
return 1 
304 
elif index2 >= len(ancestors[node2]): 
305 
return 0 
306 
elif (euler_tour_pos[ancestors[node1][index1]] <

307 
euler_tour_pos[ancestors[node2][index2]]): 
308 
return 0 
309 
else:

310 
return 1 
311  
312 
# Find the LCA by iterating through the inorder merge of the two

313 
# nodes of interests' ancestor sets. In principle, we need to

314 
# consider all pairs in the Cartesian product of the ancestor sets,

315 
# but by the restricted min range query reduction we are guaranteed

316 
# that one of the pairs of interest is adjacent in the merged list

317 
# iff one came from each list.

318 
i = get_next_in_merged_lists(indices) 
319 
cur = ancestors_by_index[i][indices[i]], i 
320 
while i is not None: 
321 
prev = cur 
322 
indices[i] += 1

323 
i = get_next_in_merged_lists(indices) 
324 
if i is not None: 
325 
cur = ancestors_by_index[i][indices[i]], i 
326  
327 
# Two adjacent entries must not be from the same list

328 
# in order for their tree LCA to be considered.

329 
if cur[1] != prev[1]: 
330 
tree_node1, tree_node2 = prev[0], cur[0] 
331 
if (tree_node1, tree_node2) in tree_lca: 
332 
ans = tree_lca[tree_node1, tree_node2] 
333 
else:

334 
ans = tree_lca[tree_node2, tree_node1] 
335 
if not dry_run and (best is None or 
336 
root_distance[ans] > best_root_distance): 
337 
best_root_distance = root_distance[ans] 
338 
best = ans 
339  
340 
# If the LCA is super_root, there is no LCA in the user's graph.

341 
if not dry_run and (super_root is None or best != super_root): 
342 
yield (node1, node2), best

343  
344 
# Generate the spanning tree lca for all pairs. This doesn't make sense to

345 
# do incrementally since we are using a linear time offline algorithm for

346 
# tree lca.

347 
if pairs is None: 
348 
# We want all pairs so we'll need the entire tree.

349 
tree_lca = dict(tree_all_pairs_lowest_common_ancestor(spanning_tree,

350 
root)) 
351 
else:

352 
# We only need the merged adjacent pairs by seeing which queries the

353 
# algorithm needs then generating them in a single pass.

354 
tree_lca = defaultdict(int)

355 
for _ in _compute_dag_lca_from_tree_values(tree_lca, True): 
356 
pass

357  
358 
# Replace the bogus default tree values with the real ones.

359 
for (pair, lca) in tree_all_pairs_lowest_common_ancestor(spanning_tree, 
360 
root, 
361 
tree_lca): 
362 
tree_lca[pair] = lca 
363  
364 
# All precomputations complete. Now we just need to give the user the pairs

365 
# they asked for, or all pairs if they want them all.

366 
return _compute_dag_lca_from_tree_values(tree_lca, False) 