## iof-tools / networkxMiCe / networkx-master / networkx / algorithms / lowest_common_ancestors.py @ 5cef0f13

History | View | Annotate | Download (14 KB)

1 | 5cef0f13 | tiamilani | ```
# Copyright (C) 2013 by
``` |
---|---|---|---|

2 | ```
# Alex Roper <aroper@umich.edu>
``` |
||

3 | ```
# Copyright (C) 2017 by
``` |
||

4 | ```
# Aric Hagberg <hagberg@lanl.gov>
``` |
||

5 | ```
# Dan Schult <dschult@colgate.edu>
``` |
||

6 | ```
# Pieter Swart <swart@lanl.gov>
``` |
||

7 | ```
#
``` |
||

8 | ```
# All rights reserved.
``` |
||

9 | ```
# BSD license.
``` |
||

10 | ```
#
``` |
||

11 | ```
# Author: Alex Roper <aroper@umich.edu>
``` |
||

12 | ```
"""Algorithms for finding the lowest common ancestor of trees and DAGs."""
``` |
||

13 | from collections import defaultdict |
||

14 | from collections.abc import Mapping, Set |
||

15 | from itertools import chain, count |
||

16 | |||

17 | import networkx as nx |
||

18 | from networkx.utils import arbitrary_element, not_implemented_for, \ |
||

19 | UnionFind, generate_unique_node |
||

20 | |||

21 | ```
__all__ = ["all_pairs_lowest_common_ancestor",
``` |
||

22 | ```
"tree_all_pairs_lowest_common_ancestor",
``` |
||

23 | ```
"lowest_common_ancestor"]
``` |
||

24 | |||

25 | |||

26 | @not_implemented_for("undirected") |
||

27 | @not_implemented_for("multigraph") |
||

28 | def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None): |
||

29 | ```
r"""Yield the lowest common ancestor for sets of pairs in a tree.
``` |
||

30 | ```
``` |
||

31 | ```
Parameters
``` |
||

32 | ```
----------
``` |
||

33 | ```
G : NetworkX directed graph (must be a tree)
``` |
||

34 | ```
``` |
||

35 | ```
root : node, optional (default: None)
``` |
||

36 | ```
The root of the subtree to operate on.
``` |
||

37 | ```
If None, assume the entire graph has exactly one source and use that.
``` |
||

38 | ```
``` |
||

39 | ```
pairs : iterable or iterator of pairs of nodes, optional (default: None)
``` |
||

40 | ```
The pairs of interest. If None, Defaults to all pairs of nodes
``` |
||

41 | ```
under `root` that have a lowest common ancestor.
``` |
||

42 | ```
``` |
||

43 | ```
Returns
``` |
||

44 | ```
-------
``` |
||

45 | ```
lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes
``` |
||

46 | ```
in `pairs` and `lca` is their lowest common ancestor.
``` |
||

47 | ```
``` |
||

48 | ```
Notes
``` |
||

49 | ```
-----
``` |
||

50 | ```
Only defined on non-null trees represented with directed edges from
``` |
||

51 | ```
parents to children. Uses Tarjan's off-line lowest-common-ancestors
``` |
||

52 | ```
algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest
``` |
||

53 | ```
value of the inverse Ackermann function likely to ever come up in actual
``` |
||

54 | ```
use, and $P$ is the number of pairs requested (or $V^2$ if all are needed).
``` |
||

55 | ```
``` |
||

56 | ```
Tarjan, R. E. (1979), "Applications of path compression on balanced trees",
``` |
||

57 | ```
Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161.
``` |
||

58 | ```
``` |
||

59 | ```
See Also
``` |
||

60 | ```
--------
``` |
||

61 | ```
all_pairs_lowest_common_ancestor (similar routine for general DAGs)
``` |
||

62 | ```
lowest_common_ancestor (just a single pair for general DAGs)
``` |
||

63 | ```
"""
``` |
||

64 | if len(G) == 0: |
||

65 | raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") |
||

66 | elif None in G: |
||

67 | raise nx.NetworkXError("None is not a valid node.") |
||

68 | |||

69 | ```
# Index pairs of interest for efficient lookup from either side.
``` |
||

70 | if pairs is not None: |
||

71 | ```
pair_dict = defaultdict(set)
``` |
||

72 | ```
# See note on all_pairs_lowest_common_ancestor.
``` |
||

73 | if not isinstance(pairs, (Mapping, Set)): |
||

74 | ```
pairs = set(pairs)
``` |
||

75 | for u, v in pairs: |
||

76 | for n in (u, v): |
||

77 | if n not in G: |
||

78 | msg = "The node %s is not in the digraph." % str(n) |
||

79 | ```
raise nx.NodeNotFound(msg)
``` |
||

80 | pair_dict[u].add(v) |
||

81 | pair_dict[v].add(u) |
||

82 | |||

83 | ```
# If root is not specified, find the exactly one node with in degree 0 and
``` |
||

84 | ```
# use it. Raise an error if none are found, or more than one is. Also check
``` |
||

85 | ```
# for any nodes with in degree larger than 1, which would imply G is not a
``` |
||

86 | ```
# tree.
``` |
||

87 | if root is None: |
||

88 | for n, deg in G.in_degree: |
||

89 | if deg == 0: |
||

90 | if root is not None: |
||

91 | ```
msg = "No root specified and tree has multiple sources."
``` |
||

92 | ```
raise nx.NetworkXError(msg)
``` |
||

93 | root = n |
||

94 | elif deg > 1: |
||

95 | ```
msg = "Tree LCA only defined on trees; use DAG routine."
``` |
||

96 | ```
raise nx.NetworkXError(msg)
``` |
||

97 | if root is None: |
||

98 | raise nx.NetworkXError("Graph contains a cycle.") |
||

99 | |||

100 | ```
# Iterative implementation of Tarjan's offline lca algorithm
``` |
||

101 | ```
# as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition)
``` |
||

102 | uf = UnionFind() |
||

103 | ancestors = {} |
||

104 | for node in G: |
||

105 | ancestors[node] = uf[node] |
||

106 | |||

107 | ```
colors = defaultdict(bool)
``` |
||

108 | for node in nx.dfs_postorder_nodes(G, root): |
||

109 | ```
colors[node] = True
``` |
||

110 | for v in (pair_dict[node] if pairs is not None else G): |
||

111 | ```
if colors[v]:
``` |
||

112 | ```
# If the user requested both directions of a pair, give it.
``` |
||

113 | ```
# Otherwise, just give one.
``` |
||

114 | if pairs is not None and (node, v) in pairs: |
||

115 | ```
yield (node, v), ancestors[uf[v]]
``` |
||

116 | if pairs is None or (v, node) in pairs: |
||

117 | ```
yield (v, node), ancestors[uf[v]]
``` |
||

118 | ```
if node != root:
``` |
||

119 | parent = arbitrary_element(G.pred[node]) |
||

120 | uf.union(parent, node) |
||

121 | ancestors[uf[parent]] = parent |
||

122 | |||

123 | |||

124 | @not_implemented_for("undirected") |
||

125 | @not_implemented_for("multigraph") |
||

126 | def lowest_common_ancestor(G, node1, node2, default=None): |
||

127 | ```
"""Compute the lowest common ancestor of the given pair of nodes.
``` |
||

128 | ```
``` |
||

129 | ```
Parameters
``` |
||

130 | ```
----------
``` |
||

131 | ```
G : NetworkX directed graph
``` |
||

132 | ```
``` |
||

133 | ```
node1, node2 : nodes in the graph.
``` |
||

134 | ```
``` |
||

135 | ```
default : object
``` |
||

136 | ```
Returned if no common ancestor between `node1` and `node2`
``` |
||

137 | ```
``` |
||

138 | ```
Returns
``` |
||

139 | ```
-------
``` |
||

140 | ```
The lowest common ancestor of node1 and node2,
``` |
||

141 | ```
or default if they have no common ancestors.
``` |
||

142 | ```
``` |
||

143 | ```
Notes
``` |
||

144 | ```
-----
``` |
||

145 | ```
Only defined on non-null directed acyclic graphs.
``` |
||

146 | ```
Takes n log(n) time in the size of the graph.
``` |
||

147 | ```
See `all_pairs_lowest_common_ancestor` when you have
``` |
||

148 | ```
more than one pair of nodes of interest.
``` |
||

149 | ```
``` |
||

150 | ```
See Also
``` |
||

151 | ```
--------
``` |
||

152 | ```
tree_all_pairs_lowest_common_ancestor
``` |
||

153 | ```
all_pairs_lowest_common_ancestor
``` |
||

154 | ```
"""
``` |
||

155 | ```
ans = list(all_pairs_lowest_common_ancestor(G, pairs=[(node1, node2)]))
``` |
||

156 | ```
if ans:
``` |
||

157 | assert len(ans) == 1 |
||

158 | return ans[0][1] |
||

159 | ```
else:
``` |
||

160 | ```
return default
``` |
||

161 | |||

162 | |||

163 | @not_implemented_for("undirected") |
||

164 | @not_implemented_for("multigraph") |
||

165 | def all_pairs_lowest_common_ancestor(G, pairs=None): |
||

166 | ```
"""Compute the lowest common ancestor for pairs of nodes.
``` |
||

167 | ```
``` |
||

168 | ```
Parameters
``` |
||

169 | ```
----------
``` |
||

170 | ```
G : NetworkX directed graph
``` |
||

171 | ```
``` |
||

172 | ```
pairs : iterable of pairs of nodes, optional (default: all pairs)
``` |
||

173 | ```
The pairs of nodes of interest.
``` |
||

174 | ```
If None, will find the LCA of all pairs of nodes.
``` |
||

175 | ```
``` |
||

176 | ```
Returns
``` |
||

177 | ```
-------
``` |
||

178 | ```
An iterator over ((node1, node2), lca) where (node1, node2) are
``` |
||

179 | ```
the pairs specified and lca is a lowest common ancestor of the pair.
``` |
||

180 | ```
Note that for the default of all pairs in G, we consider
``` |
||

181 | ```
unordered pairs, e.g. you will not get both (b, a) and (a, b).
``` |
||

182 | ```
``` |
||

183 | ```
Notes
``` |
||

184 | ```
-----
``` |
||

185 | ```
Only defined on non-null directed acyclic graphs.
``` |
||

186 | ```
``` |
||

187 | ```
Uses the $O(n^3)$ ancestor-list algorithm from:
``` |
||

188 | ```
M. A. Bender, M. Farach-Colton, G. Pemmasani, S. Skiena, P. Sumazin.
``` |
||

189 | ```
"Lowest common ancestors in trees and directed acyclic graphs."
``` |
||

190 | ```
Journal of Algorithms, 57(2): 75-94, 2005.
``` |
||

191 | ```
``` |
||

192 | ```
See Also
``` |
||

193 | ```
--------
``` |
||

194 | ```
tree_all_pairs_lowest_common_ancestor
``` |
||

195 | ```
lowest_common_ancestor
``` |
||

196 | ```
"""
``` |
||

197 | if not nx.is_directed_acyclic_graph(G): |
||

198 | raise nx.NetworkXError("LCA only defined on directed acyclic graphs.") |
||

199 | elif len(G) == 0: |
||

200 | raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") |
||

201 | elif None in G: |
||

202 | raise nx.NetworkXError("None is not a valid node.") |
||

203 | |||

204 | ```
# The copy isn't ideal, neither is the switch-on-type, but without it users
``` |
||

205 | ```
# passing an iterable will encounter confusing errors, and itertools.tee
``` |
||

206 | ```
# does not appear to handle builtin types efficiently (IE, it materializes
``` |
||

207 | ```
# another buffer rather than just creating listoperators at the same
``` |
||

208 | ```
# offset). The Python documentation notes use of tee is unadvised when one
``` |
||

209 | ```
# is consumed before the other.
``` |
||

210 | ```
#
``` |
||

211 | ```
# This will always produce correct results and avoid unnecessary
``` |
||

212 | ```
# copies in many common cases.
``` |
||

213 | ```
#
``` |
||

214 | if (not isinstance(pairs, (Mapping, Set)) and pairs is not None): |
||

215 | ```
pairs = set(pairs)
``` |
||

216 | |||

217 | ```
# Convert G into a dag with a single root by adding a node with edges to
``` |
||

218 | ```
# all sources iff necessary.
``` |
||

219 | sources = [n for n, deg in G.in_degree if deg == 0] |
||

220 | if len(sources) == 1: |
||

221 | ```
root = sources[0]
``` |
||

222 | ```
super_root = None
``` |
||

223 | ```
else:
``` |
||

224 | G = G.copy() |
||

225 | super_root = root = generate_unique_node() |
||

226 | for source in sources: |
||

227 | G.add_edge(root, source) |
||

228 | |||

229 | ```
# Start by computing a spanning tree, and the DAG of all edges not in it.
``` |
||

230 | ```
# We will then use the tree lca algorithm on the spanning tree, and use
``` |
||

231 | ```
# the DAG to figure out the set of tree queries necessary.
``` |
||

232 | spanning_tree = nx.dfs_tree(G, root) |
||

233 | dag = nx.DiGraph((u, v) for u, v in G.edges |
||

234 | if u not in spanning_tree or v not in spanning_tree[u]) |
||

235 | |||

236 | ```
# Ensure that both the dag and the spanning tree contains all nodes in G,
``` |
||

237 | ```
# even nodes that are disconnected in the dag.
``` |
||

238 | spanning_tree.add_nodes_from(G) |
||

239 | dag.add_nodes_from(G) |
||

240 | |||

241 | counter = count() |
||

242 | |||

243 | ```
# Necessary to handle graphs consisting of a single node and no edges.
``` |
||

244 | ```
root_distance = {root: next(counter)}
``` |
||

245 | |||

246 | for edge in nx.bfs_edges(spanning_tree, root): |
||

247 | for node in edge: |
||

248 | if node not in root_distance: |
||

249 | ```
root_distance[node] = next(counter)
``` |
||

250 | |||

251 | ```
# Index the position of all nodes in the Euler tour so we can efficiently
``` |
||

252 | ```
# sort lists and merge in tour order.
``` |
||

253 | euler_tour_pos = {} |
||

254 | for node in nx.depth_first_search.dfs_preorder_nodes(G, root): |
||

255 | if node not in euler_tour_pos: |
||

256 | ```
euler_tour_pos[node] = next(counter)
``` |
||

257 | |||

258 | ```
# Generate the set of all nodes of interest in the pairs.
``` |
||

259 | ```
pairset = set()
``` |
||

260 | if pairs is not None: |
||

261 | ```
pairset = set(chain.from_iterable(pairs))
``` |
||

262 | |||

263 | for n in pairset: |
||

264 | if n not in G: |
||

265 | msg = "The node %s is not in the digraph." % str(n) |
||

266 | ```
raise nx.NodeNotFound(msg)
``` |
||

267 | |||

268 | ```
# Generate the transitive closure over the dag (not G) of all nodes, and
``` |
||

269 | ```
# sort each node's closure set by order of first appearance in the Euler
``` |
||

270 | ```
# tour.
``` |
||

271 | ancestors = {} |
||

272 | for v in dag: |
||

273 | if pairs is None or v in pairset: |
||

274 | my_ancestors = nx.dag.ancestors(dag, v) |
||

275 | my_ancestors.add(v) |
||

276 | ```
ancestors[v] = sorted(my_ancestors, key=euler_tour_pos.get)
``` |
||

277 | |||

278 | def _compute_dag_lca_from_tree_values(tree_lca, dry_run): |
||

279 | ```
"""Iterate through the in-order merge for each pair of interest.
``` |
||

280 | ```
``` |
||

281 | ```
We do this to answer the user's query, but it is also used to
``` |
||

282 | ```
avoid generating unnecessary tree entries when the user only
``` |
||

283 | ```
needs some pairs.
``` |
||

284 | ```
"""
``` |
||

285 | for (node1, node2) in pairs if pairs is not None else tree_lca: |
||

286 | ```
best_root_distance = None
``` |
||

287 | ```
best = None
``` |
||

288 | |||

289 | indices = [0, 0] |
||

290 | ancestors_by_index = [ancestors[node1], ancestors[node2]] |
||

291 | |||

292 | def get_next_in_merged_lists(indices): |
||

293 | ```
"""Returns index of the list containing the next item
``` |
||

294 | ```
``` |
||

295 | ```
Next order refers to the merged order.
``` |
||

296 | ```
Index can be 0 or 1 (or None if exhausted).
``` |
||

297 | ```
"""
``` |
||

298 | index1, index2 = indices |
||

299 | if (index1 >= len(ancestors[node1]) and |
||

300 | ```
index2 >= len(ancestors[node2])):
``` |
||

301 | return None |
||

302 | elif index1 >= len(ancestors[node1]): |
||

303 | return 1 |
||

304 | elif index2 >= len(ancestors[node2]): |
||

305 | return 0 |
||

306 | ```
elif (euler_tour_pos[ancestors[node1][index1]] <
``` |
||

307 | euler_tour_pos[ancestors[node2][index2]]): |
||

308 | return 0 |
||

309 | ```
else:
``` |
||

310 | return 1 |
||

311 | |||

312 | ```
# Find the LCA by iterating through the in-order merge of the two
``` |
||

313 | ```
# nodes of interests' ancestor sets. In principle, we need to
``` |
||

314 | ```
# consider all pairs in the Cartesian product of the ancestor sets,
``` |
||

315 | ```
# but by the restricted min range query reduction we are guaranteed
``` |
||

316 | ```
# that one of the pairs of interest is adjacent in the merged list
``` |
||

317 | ```
# iff one came from each list.
``` |
||

318 | i = get_next_in_merged_lists(indices) |
||

319 | cur = ancestors_by_index[i][indices[i]], i |
||

320 | while i is not None: |
||

321 | prev = cur |
||

322 | ```
indices[i] += 1
``` |
||

323 | i = get_next_in_merged_lists(indices) |
||

324 | if i is not None: |
||

325 | cur = ancestors_by_index[i][indices[i]], i |
||

326 | |||

327 | ```
# Two adjacent entries must not be from the same list
``` |
||

328 | ```
# in order for their tree LCA to be considered.
``` |
||

329 | if cur[1] != prev[1]: |
||

330 | tree_node1, tree_node2 = prev[0], cur[0] |
||

331 | if (tree_node1, tree_node2) in tree_lca: |
||

332 | ans = tree_lca[tree_node1, tree_node2] |
||

333 | ```
else:
``` |
||

334 | ans = tree_lca[tree_node2, tree_node1] |
||

335 | if not dry_run and (best is None or |
||

336 | root_distance[ans] > best_root_distance): |
||

337 | best_root_distance = root_distance[ans] |
||

338 | best = ans |
||

339 | |||

340 | ```
# If the LCA is super_root, there is no LCA in the user's graph.
``` |
||

341 | if not dry_run and (super_root is None or best != super_root): |
||

342 | ```
yield (node1, node2), best
``` |
||

343 | |||

344 | ```
# Generate the spanning tree lca for all pairs. This doesn't make sense to
``` |
||

345 | ```
# do incrementally since we are using a linear time offline algorithm for
``` |
||

346 | ```
# tree lca.
``` |
||

347 | if pairs is None: |
||

348 | ```
# We want all pairs so we'll need the entire tree.
``` |
||

349 | ```
tree_lca = dict(tree_all_pairs_lowest_common_ancestor(spanning_tree,
``` |
||

350 | root)) |
||

351 | ```
else:
``` |
||

352 | ```
# We only need the merged adjacent pairs by seeing which queries the
``` |
||

353 | ```
# algorithm needs then generating them in a single pass.
``` |
||

354 | ```
tree_lca = defaultdict(int)
``` |
||

355 | for _ in _compute_dag_lca_from_tree_values(tree_lca, True): |
||

356 | ```
pass
``` |
||

357 | |||

358 | ```
# Replace the bogus default tree values with the real ones.
``` |
||

359 | for (pair, lca) in tree_all_pairs_lowest_common_ancestor(spanning_tree, |
||

360 | root, |
||

361 | tree_lca): |
||

362 | tree_lca[pair] = lca |
||

363 | |||

364 | ```
# All precomputations complete. Now we just need to give the user the pairs
``` |
||

365 | ```
# they asked for, or all pairs if they want them all.
``` |
||

366 | return _compute_dag_lca_from_tree_values(tree_lca, False) |