Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / lowest_common_ancestors.py @ 5cef0f13

History | View | Annotate | Download (14 KB)

1
# Copyright (C) 2013 by
2
#   Alex Roper <aroper@umich.edu>
3
# Copyright (C) 2017 by
4
#   Aric Hagberg <hagberg@lanl.gov>
5
#   Dan Schult <dschult@colgate.edu>
6
#   Pieter Swart <swart@lanl.gov>
7
#
8
#   All rights reserved.
9
#   BSD license.
10
#
11
# Author:  Alex Roper <aroper@umich.edu>
12
"""Algorithms for finding the lowest common ancestor of trees and DAGs."""
13
from collections import defaultdict
14
from collections.abc import Mapping, Set
15
from itertools import chain, count
16

    
17
import networkx as nx
18
from networkx.utils import arbitrary_element, not_implemented_for, \
19
    UnionFind, generate_unique_node
20

    
21
__all__ = ["all_pairs_lowest_common_ancestor",
22
           "tree_all_pairs_lowest_common_ancestor",
23
           "lowest_common_ancestor"]
24

    
25

    
26
@not_implemented_for("undirected")
27
@not_implemented_for("multigraph")
28
def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None):
29
    r"""Yield the lowest common ancestor for sets of pairs in a tree.
30

31
    Parameters
32
    ----------
33
    G : NetworkX directed graph (must be a tree)
34

35
    root : node, optional (default: None)
36
        The root of the subtree to operate on.
37
        If None, assume the entire graph has exactly one source and use that.
38

39
    pairs : iterable or iterator of pairs of nodes, optional (default: None)
40
        The pairs of interest. If None, Defaults to all pairs of nodes
41
        under `root` that have a lowest common ancestor.
42

43
    Returns
44
    -------
45
    lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes
46
        in `pairs` and `lca` is their lowest common ancestor.
47

48
    Notes
49
    -----
50
    Only defined on non-null trees represented with directed edges from
51
    parents to children. Uses Tarjan's off-line lowest-common-ancestors
52
    algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest
53
    value of the inverse Ackermann function likely to ever come up in actual
54
    use, and $P$ is the number of pairs requested (or $V^2$ if all are needed).
55

56
    Tarjan, R. E. (1979), "Applications of path compression on balanced trees",
57
    Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161.
58

59
    See Also
60
    --------
61
    all_pairs_lowest_common_ancestor (similar routine for general DAGs)
62
    lowest_common_ancestor           (just a single pair for general DAGs)
63
    """
64
    if len(G) == 0:
65
        raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
66
    elif None in G:
67
        raise nx.NetworkXError("None is not a valid node.")
68

    
69
    # Index pairs of interest for efficient lookup from either side.
70
    if pairs is not None:
71
        pair_dict = defaultdict(set)
72
        # See note on all_pairs_lowest_common_ancestor.
73
        if not isinstance(pairs, (Mapping, Set)):
74
            pairs = set(pairs)
75
        for u, v in pairs:
76
            for n in (u, v):
77
                if n not in G:
78
                    msg = "The node %s is not in the digraph." % str(n)
79
                    raise nx.NodeNotFound(msg)
80
            pair_dict[u].add(v)
81
            pair_dict[v].add(u)
82

    
83
    # If root is not specified, find the exactly one node with in degree 0 and
84
    # use it. Raise an error if none are found, or more than one is. Also check
85
    # for any nodes with in degree larger than 1, which would imply G is not a
86
    # tree.
87
    if root is None:
88
        for n, deg in G.in_degree:
89
            if deg == 0:
90
                if root is not None:
91
                    msg = "No root specified and tree has multiple sources."
92
                    raise nx.NetworkXError(msg)
93
                root = n
94
            elif deg > 1:
95
                msg = "Tree LCA only defined on trees; use DAG routine."
96
                raise nx.NetworkXError(msg)
97
    if root is None:
98
        raise nx.NetworkXError("Graph contains a cycle.")
99

    
100
    # Iterative implementation of Tarjan's offline lca algorithm
101
    # as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition)
102
    uf = UnionFind()
103
    ancestors = {}
104
    for node in G:
105
        ancestors[node] = uf[node]
106

    
107
    colors = defaultdict(bool)
108
    for node in nx.dfs_postorder_nodes(G, root):
109
        colors[node] = True
110
        for v in (pair_dict[node] if pairs is not None else G):
111
            if colors[v]:
112
                # If the user requested both directions of a pair, give it.
113
                # Otherwise, just give one.
114
                if pairs is not None and (node, v) in pairs:
115
                    yield (node, v), ancestors[uf[v]]
116
                if pairs is None or (v, node) in pairs:
117
                    yield (v, node), ancestors[uf[v]]
118
        if node != root:
119
            parent = arbitrary_element(G.pred[node])
120
            uf.union(parent, node)
121
            ancestors[uf[parent]] = parent
122

    
123

    
124
@not_implemented_for("undirected")
125
@not_implemented_for("multigraph")
126
def lowest_common_ancestor(G, node1, node2, default=None):
127
    """Compute the lowest common ancestor of the given pair of nodes.
128

129
    Parameters
130
    ----------
131
    G : NetworkX directed graph
132

133
    node1, node2 : nodes in the graph.
134

135
    default : object
136
        Returned if no common ancestor between `node1` and `node2`
137

138
    Returns
139
    -------
140
    The lowest common ancestor of node1 and node2,
141
    or default if they have no common ancestors.
142

143
    Notes
144
    -----
145
    Only defined on non-null directed acyclic graphs.
146
    Takes n log(n) time in the size of the graph.
147
    See `all_pairs_lowest_common_ancestor` when you have
148
    more than one pair of nodes of interest.
149

150
    See Also
151
    --------
152
    tree_all_pairs_lowest_common_ancestor
153
    all_pairs_lowest_common_ancestor
154
    """
155
    ans = list(all_pairs_lowest_common_ancestor(G, pairs=[(node1, node2)]))
156
    if ans:
157
        assert len(ans) == 1
158
        return ans[0][1]
159
    else:
160
        return default
161

    
162

    
163
@not_implemented_for("undirected")
164
@not_implemented_for("multigraph")
165
def all_pairs_lowest_common_ancestor(G, pairs=None):
166
    """Compute the lowest common ancestor for pairs of nodes.
167

168
    Parameters
169
    ----------
170
    G : NetworkX directed graph
171

172
    pairs : iterable of pairs of nodes, optional (default: all pairs)
173
        The pairs of nodes of interest.
174
        If None, will find the LCA of all pairs of nodes.
175

176
    Returns
177
    -------
178
    An iterator over ((node1, node2), lca) where (node1, node2) are
179
    the pairs specified and lca is a lowest common ancestor of the pair.
180
    Note that for the default of all pairs in G, we consider
181
    unordered pairs, e.g. you will not get both (b, a) and (a, b).
182

183
    Notes
184
    -----
185
    Only defined on non-null directed acyclic graphs.
186

187
    Uses the $O(n^3)$ ancestor-list algorithm from:
188
    M. A. Bender, M. Farach-Colton, G. Pemmasani, S. Skiena, P. Sumazin.
189
    "Lowest common ancestors in trees and directed acyclic graphs."
190
    Journal of Algorithms, 57(2): 75-94, 2005.
191

192
    See Also
193
    --------
194
    tree_all_pairs_lowest_common_ancestor
195
    lowest_common_ancestor
196
    """
197
    if not nx.is_directed_acyclic_graph(G):
198
        raise nx.NetworkXError("LCA only defined on directed acyclic graphs.")
199
    elif len(G) == 0:
200
        raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
201
    elif None in G:
202
        raise nx.NetworkXError("None is not a valid node.")
203

    
204
    # The copy isn't ideal, neither is the switch-on-type, but without it users
205
    # passing an iterable will encounter confusing errors, and itertools.tee
206
    # does not appear to handle builtin types efficiently (IE, it materializes
207
    # another buffer rather than just creating listoperators at the same
208
    # offset). The Python documentation notes use of tee is unadvised when one
209
    # is consumed before the other.
210
    #
211
    # This will always produce correct results and avoid unnecessary
212
    # copies in many common cases.
213
    #
214
    if (not isinstance(pairs, (Mapping, Set)) and pairs is not None):
215
        pairs = set(pairs)
216

    
217
    # Convert G into a dag with a single root by adding a node with edges to
218
    # all sources iff necessary.
219
    sources = [n for n, deg in G.in_degree if deg == 0]
220
    if len(sources) == 1:
221
        root = sources[0]
222
        super_root = None
223
    else:
224
        G = G.copy()
225
        super_root = root = generate_unique_node()
226
        for source in sources:
227
            G.add_edge(root, source)
228

    
229
    # Start by computing a spanning tree, and the DAG of all edges not in it.
230
    # We will then use the tree lca algorithm on the spanning tree, and use
231
    # the DAG to figure out the set of tree queries necessary.
232
    spanning_tree = nx.dfs_tree(G, root)
233
    dag = nx.DiGraph((u, v) for u, v in G.edges
234
                     if u not in spanning_tree or v not in spanning_tree[u])
235

    
236
    # Ensure that both the dag and the spanning tree contains all nodes in G,
237
    # even nodes that are disconnected in the dag.
238
    spanning_tree.add_nodes_from(G)
239
    dag.add_nodes_from(G)
240

    
241
    counter = count()
242

    
243
    # Necessary to handle graphs consisting of a single node and no edges.
244
    root_distance = {root: next(counter)}
245

    
246
    for edge in nx.bfs_edges(spanning_tree, root):
247
        for node in edge:
248
            if node not in root_distance:
249
                root_distance[node] = next(counter)
250

    
251
    # Index the position of all nodes in the Euler tour so we can efficiently
252
    # sort lists and merge in tour order.
253
    euler_tour_pos = {}
254
    for node in nx.depth_first_search.dfs_preorder_nodes(G, root):
255
        if node not in euler_tour_pos:
256
            euler_tour_pos[node] = next(counter)
257

    
258
    # Generate the set of all nodes of interest in the pairs.
259
    pairset = set()
260
    if pairs is not None:
261
        pairset = set(chain.from_iterable(pairs))
262

    
263
    for n in pairset:
264
        if n not in G:
265
            msg = "The node %s is not in the digraph." % str(n)
266
            raise nx.NodeNotFound(msg)
267

    
268
    # Generate the transitive closure over the dag (not G) of all nodes, and
269
    # sort each node's closure set by order of first appearance in the Euler
270
    # tour.
271
    ancestors = {}
272
    for v in dag:
273
        if pairs is None or v in pairset:
274
            my_ancestors = nx.dag.ancestors(dag, v)
275
            my_ancestors.add(v)
276
            ancestors[v] = sorted(my_ancestors, key=euler_tour_pos.get)
277

    
278
    def _compute_dag_lca_from_tree_values(tree_lca, dry_run):
279
        """Iterate through the in-order merge for each pair of interest.
280

281
        We do this to answer the user's query, but it is also used to
282
        avoid generating unnecessary tree entries when the user only
283
        needs some pairs.
284
        """
285
        for (node1, node2) in pairs if pairs is not None else tree_lca:
286
            best_root_distance = None
287
            best = None
288

    
289
            indices = [0, 0]
290
            ancestors_by_index = [ancestors[node1], ancestors[node2]]
291

    
292
            def get_next_in_merged_lists(indices):
293
                """Returns index of the list containing the next item
294

295
                Next order refers to the merged order.
296
                Index can be 0 or 1 (or None if exhausted).
297
                """
298
                index1, index2 = indices
299
                if (index1 >= len(ancestors[node1]) and
300
                        index2 >= len(ancestors[node2])):
301
                    return None
302
                elif index1 >= len(ancestors[node1]):
303
                    return 1
304
                elif index2 >= len(ancestors[node2]):
305
                    return 0
306
                elif (euler_tour_pos[ancestors[node1][index1]] <
307
                      euler_tour_pos[ancestors[node2][index2]]):
308
                    return 0
309
                else:
310
                    return 1
311

    
312
            # Find the LCA by iterating through the in-order merge of the two
313
            # nodes of interests' ancestor sets. In principle, we need to
314
            # consider all pairs in the Cartesian product of the ancestor sets,
315
            # but by the restricted min range query reduction we are guaranteed
316
            # that one of the pairs of interest is adjacent in the merged list
317
            # iff one came from each list.
318
            i = get_next_in_merged_lists(indices)
319
            cur = ancestors_by_index[i][indices[i]], i
320
            while i is not None:
321
                prev = cur
322
                indices[i] += 1
323
                i = get_next_in_merged_lists(indices)
324
                if i is not None:
325
                    cur = ancestors_by_index[i][indices[i]], i
326

    
327
                    # Two adjacent entries must not be from the same list
328
                    # in order for their tree LCA to be considered.
329
                    if cur[1] != prev[1]:
330
                        tree_node1, tree_node2 = prev[0], cur[0]
331
                        if (tree_node1, tree_node2) in tree_lca:
332
                            ans = tree_lca[tree_node1, tree_node2]
333
                        else:
334
                            ans = tree_lca[tree_node2, tree_node1]
335
                        if not dry_run and (best is None or
336
                                            root_distance[ans] > best_root_distance):
337
                            best_root_distance = root_distance[ans]
338
                            best = ans
339

    
340
            # If the LCA is super_root, there is no LCA in the user's graph.
341
            if not dry_run and (super_root is None or best != super_root):
342
                yield (node1, node2), best
343

    
344
    # Generate the spanning tree lca for all pairs. This doesn't make sense to
345
    # do incrementally since we are using a linear time offline algorithm for
346
    # tree lca.
347
    if pairs is None:
348
        # We want all pairs so we'll need the entire tree.
349
        tree_lca = dict(tree_all_pairs_lowest_common_ancestor(spanning_tree,
350
                                                              root))
351
    else:
352
        # We only need the merged adjacent pairs by seeing which queries the
353
        # algorithm needs then generating them in a single pass.
354
        tree_lca = defaultdict(int)
355
        for _ in _compute_dag_lca_from_tree_values(tree_lca, True):
356
            pass
357

    
358
        # Replace the bogus default tree values with the real ones.
359
        for (pair, lca) in tree_all_pairs_lowest_common_ancestor(spanning_tree,
360
                                                                 root,
361
                                                                 tree_lca):
362
            tree_lca[pair] = lca
363

    
364
    # All precomputations complete. Now we just need to give the user the pairs
365
    # they asked for, or all pairs if they want them all.
366
    return _compute_dag_lca_from_tree_values(tree_lca, False)