Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / tree / mst.py @ 5cef0f13

History | View | Annotate | Download (20.7 KB)

1
# -*- coding: utf-8 -*-
2
#    Copyright (C) 2017 NetworkX Developers
3
#    Aric Hagberg <hagberg@lanl.gov>
4
#    Dan Schult <dschult@colgate.edu>
5
#    Pieter Swart <swart@lanl.gov>
6
#    Loïc Séguin-C. <loicseguin@gmail.com>
7
#    All rights reserved.
8
#    BSD license.
9
"""
10
Algorithms for calculating min/max spanning trees/forests.
11

12
"""
13
from heapq import heappop, heappush
14
from operator import itemgetter
15
from itertools import count
16
from math import isnan
17

    
18
import networkx as nx
19
from networkx.utils import UnionFind, not_implemented_for
20

    
21
__all__ = [
22
    'minimum_spanning_edges', 'maximum_spanning_edges',
23
    'minimum_spanning_tree', 'maximum_spanning_tree',
24
]
25

    
26

    
27
@not_implemented_for('multigraph')
28
def boruvka_mst_edges(G, minimum=True, weight='weight',
29
                      keys=False, data=True, ignore_nan=False):
30
    """Iterate over edges of a Borůvka's algorithm min/max spanning tree.
31

32
    Parameters
33
    ----------
34
    G : NetworkX Graph
35
        The edges of `G` must have distinct weights,
36
        otherwise the edges may not form a tree.
37

38
    minimum : bool (default: True)
39
        Find the minimum (True) or maximum (False) spanning tree.
40

41
    weight : string (default: 'weight')
42
        The name of the edge attribute holding the edge weights.
43

44
    keys : bool (default: True)
45
        This argument is ignored since this function is not
46
        implemented for multigraphs; it exists only for consistency
47
        with the other minimum spanning tree functions.
48

49
    data : bool (default: True)
50
        Flag for whether to yield edge attribute dicts.
51
        If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
52
        If False, yield edges `(u, v)`.
53

54
    ignore_nan : bool (default: False)
55
        If a NaN is found as an edge weight normally an exception is raised.
56
        If `ignore_nan is True` then that edge is ignored instead.
57

58
    """
59
    # Initialize a forest, assuming initially that it is the discrete
60
    # partition of the nodes of the graph.
61
    forest = UnionFind(G)
62

    
63
    def best_edge(component):
64
        """Returns the optimum (minimum or maximum) edge on the edge
65
        boundary of the given set of nodes.
66

67
        A return value of ``None`` indicates an empty boundary.
68

69
        """
70
        sign = 1 if minimum else -1
71
        minwt = float('inf')
72
        boundary = None
73
        for e in nx.edge_boundary(G, component, data=True):
74
            wt = e[-1].get(weight, 1) * sign
75
            if isnan(wt):
76
                if ignore_nan:
77
                    continue
78
                msg = "NaN found as an edge weight. Edge %s"
79
                raise ValueError(msg % (e,))
80
            if wt < minwt:
81
                minwt = wt
82
                boundary = e
83
        return boundary
84

    
85
    # Determine the optimum edge in the edge boundary of each component
86
    # in the forest.
87
    best_edges = (best_edge(component) for component in forest.to_sets())
88
    best_edges = [edge for edge in best_edges if edge is not None]
89
    # If each entry was ``None``, that means the graph was disconnected,
90
    # so we are done generating the forest.
91
    while best_edges:
92
        # Determine the optimum edge in the edge boundary of each
93
        # component in the forest.
94
        #
95
        # This must be a sequence, not an iterator. In this list, the
96
        # same edge may appear twice, in different orientations (but
97
        # that's okay, since a union operation will be called on the
98
        # endpoints the first time it is seen, but not the second time).
99
        #
100
        # Any ``None`` indicates that the edge boundary for that
101
        # component was empty, so that part of the forest has been
102
        # completed.
103
        #
104
        # TODO This can be parallelized, both in the outer loop over
105
        # each component in the forest and in the computation of the
106
        # minimum. (Same goes for the identical lines outside the loop.)
107
        best_edges = (best_edge(component) for component in forest.to_sets())
108
        best_edges = [edge for edge in best_edges if edge is not None]
109
        # Join trees in the forest using the best edges, and yield that
110
        # edge, since it is part of the spanning tree.
111
        #
112
        # TODO This loop can be parallelized, to an extent (the union
113
        # operation must be atomic).
114
        for u, v, d in best_edges:
115
            if forest[u] != forest[v]:
116
                if data:
117
                    yield u, v, d
118
                else:
119
                    yield u, v
120
                forest.union(u, v)
121

    
122

    
123
def kruskal_mst_edges(G, minimum, weight='weight',
124
                      keys=True, data=True, ignore_nan=False):
125
    """Iterate over edges of a Kruskal's algorithm min/max spanning tree.
126

127
    Parameters
128
    ----------
129
    G : NetworkX Graph
130
        The graph holding the tree of interest.
131

132
    minimum : bool (default: True)
133
        Find the minimum (True) or maximum (False) spanning tree.
134

135
    weight : string (default: 'weight')
136
        The name of the edge attribute holding the edge weights.
137

138
    keys : bool (default: True)
139
        If `G` is a multigraph, `keys` controls whether edge keys ar yielded.
140
        Otherwise `keys` is ignored.
141

142
    data : bool (default: True)
143
        Flag for whether to yield edge attribute dicts.
144
        If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
145
        If False, yield edges `(u, v)`.
146

147
    ignore_nan : bool (default: False)
148
        If a NaN is found as an edge weight normally an exception is raised.
149
        If `ignore_nan is True` then that edge is ignored instead.
150

151
    """
152
    subtrees = UnionFind()
153
    if G.is_multigraph():
154
        edges = G.edges(keys=True, data=True)
155

    
156
        def filter_nan_edges(edges=edges, weight=weight):
157
            sign = 1 if minimum else -1
158
            for u, v, k, d in edges:
159
                wt = d.get(weight, 1) * sign
160
                if isnan(wt):
161
                    if ignore_nan:
162
                        continue
163
                    msg = "NaN found as an edge weight. Edge %s"
164
                    raise ValueError(msg % ((u, v, k, d),))
165
                yield wt, u, v, k, d
166
    else:
167
        edges = G.edges(data=True)
168

    
169
        def filter_nan_edges(edges=edges, weight=weight):
170
            sign = 1 if minimum else -1
171
            for u, v, d in edges:
172
                wt = d.get(weight, 1) * sign
173
                if isnan(wt):
174
                    if ignore_nan:
175
                        continue
176
                    msg = "NaN found as an edge weight. Edge %s"
177
                    raise ValueError(msg % ((u, v, d),))
178
                yield wt, u, v, d
179
    edges = sorted(filter_nan_edges(), key=itemgetter(0))
180
    # Multigraphs need to handle edge keys in addition to edge data.
181
    if G.is_multigraph():
182
        for wt, u, v, k, d in edges:
183
            if subtrees[u] != subtrees[v]:
184
                if keys:
185
                    if data:
186
                        yield u, v, k, d
187
                    else:
188
                        yield u, v, k
189
                else:
190
                    if data:
191
                        yield u, v, d
192
                    else:
193
                        yield u, v
194
                subtrees.union(u, v)
195
    else:
196
        for wt, u, v, d in edges:
197
            if subtrees[u] != subtrees[v]:
198
                if data:
199
                    yield (u, v, d)
200
                else:
201
                    yield (u, v)
202
                subtrees.union(u, v)
203

    
204

    
205
def prim_mst_edges(G, minimum, weight='weight',
206
                   keys=True, data=True, ignore_nan=False):
207
    """Iterate over edges of Prim's algorithm min/max spanning tree.
208

209
    Parameters
210
    ----------
211
    G : NetworkX Graph
212
        The graph holding the tree of interest.
213

214
    minimum : bool (default: True)
215
        Find the minimum (True) or maximum (False) spanning tree.
216

217
    weight : string (default: 'weight')
218
        The name of the edge attribute holding the edge weights.
219

220
    keys : bool (default: True)
221
        If `G` is a multigraph, `keys` controls whether edge keys ar yielded.
222
        Otherwise `keys` is ignored.
223

224
    data : bool (default: True)
225
        Flag for whether to yield edge attribute dicts.
226
        If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
227
        If False, yield edges `(u, v)`.
228

229
    ignore_nan : bool (default: False)
230
        If a NaN is found as an edge weight normally an exception is raised.
231
        If `ignore_nan is True` then that edge is ignored instead.
232

233
    """
234
    is_multigraph = G.is_multigraph()
235
    push = heappush
236
    pop = heappop
237

    
238
    nodes = list(G)
239
    c = count()
240

    
241
    sign = 1 if minimum else -1
242

    
243
    while nodes:
244
        u = nodes.pop(0)
245
        frontier = []
246
        visited = [u]
247
        if is_multigraph:
248
            for v, keydict in G.adj[u].items():
249
                for k, d in keydict.items():
250
                    wt = d.get(weight, 1) * sign
251
                    if isnan(wt):
252
                        if ignore_nan:
253
                            continue
254
                        msg = "NaN found as an edge weight. Edge %s"
255
                        raise ValueError(msg % ((u, v, k, d),))
256
                    push(frontier, (wt, next(c), u, v, k, d))
257
        else:
258
            for v, d in G.adj[u].items():
259
                wt = d.get(weight, 1) * sign
260
                if isnan(wt):
261
                    if ignore_nan:
262
                        continue
263
                    msg = "NaN found as an edge weight. Edge %s"
264
                    raise ValueError(msg % ((u, v, d),))
265
                push(frontier, (wt, next(c), u, v, d))
266
        while frontier:
267
            if is_multigraph:
268
                W, _, u, v, k, d = pop(frontier)
269
            else:
270
                W, _, u, v, d = pop(frontier)
271
            if v in visited:
272
                continue
273
            # Multigraphs need to handle edge keys in addition to edge data.
274
            if is_multigraph and keys:
275
                if data:
276
                    yield u, v, k, d
277
                else:
278
                    yield u, v, k
279
            else:
280
                if data:
281
                    yield u, v, d
282
                else:
283
                    yield u, v
284
            # update frontier
285
            visited.append(v)
286
            nodes.remove(v)
287
            if is_multigraph:
288
                for w, keydict in G.adj[v].items():
289
                    if w in visited:
290
                        continue
291
                    for k2, d2 in keydict.items():
292
                        new_weight = d2.get(weight, 1) * sign
293
                        push(frontier, (new_weight, next(c), v, w, k2, d2))
294
            else:
295
                for w, d2 in G.adj[v].items():
296
                    if w in visited:
297
                        continue
298
                    new_weight = d2.get(weight, 1) * sign
299
                    push(frontier, (new_weight, next(c), v, w, d2))
300

    
301

    
302
ALGORITHMS = {
303
    'boruvka': boruvka_mst_edges,
304
    u'borůvka': boruvka_mst_edges,
305
    'kruskal': kruskal_mst_edges,
306
    'prim': prim_mst_edges
307
}
308

    
309

    
310
@not_implemented_for('directed')
311
def minimum_spanning_edges(G, algorithm='kruskal', weight='weight',
312
                           keys=True, data=True, ignore_nan=False):
313
    """Generate edges in a minimum spanning forest of an undirected
314
    weighted graph.
315

316
    A minimum spanning tree is a subgraph of the graph (a tree)
317
    with the minimum sum of edge weights.  A spanning forest is a
318
    union of the spanning trees for each connected component of the graph.
319

320
    Parameters
321
    ----------
322
    G : undirected Graph
323
       An undirected graph. If `G` is connected, then the algorithm finds a
324
       spanning tree. Otherwise, a spanning forest is found.
325

326
    algorithm : string
327
       The algorithm to use when finding a minimum spanning tree. Valid
328
       choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
329

330
    weight : string
331
       Edge data key to use for weight (default 'weight').
332

333
    keys : bool
334
       Whether to yield edge key in multigraphs in addition to the edge.
335
       If `G` is not a multigraph, this is ignored.
336

337
    data : bool, optional
338
       If True yield the edge data along with the edge.
339

340
    ignore_nan : bool (default: False)
341
        If a NaN is found as an edge weight normally an exception is raised.
342
        If `ignore_nan is True` then that edge is ignored instead.
343

344
    Returns
345
    -------
346
    edges : iterator
347
       An iterator over edges in a maximum spanning tree of `G`.
348
       Edges connecting nodes `u` and `v` are represented as tuples:
349
       `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
350

351
       If `G` is a multigraph, `keys` indicates whether the edge key `k` will
352
       be reported in the third position in the edge tuple. `data` indicates
353
       whether the edge datadict `d` will appear at the end of the edge tuple.
354

355
       If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True
356
       or `(u, v)` if `data` is False.
357

358
    Examples
359
    --------
360
    >>> from networkx.algorithms import tree
361

362
    Find minimum spanning edges by Kruskal's algorithm
363

364
    >>> G = nx.cycle_graph(4)
365
    >>> G.add_edge(0, 3, weight=2)
366
    >>> mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)
367
    >>> edgelist = list(mst)
368
    >>> sorted(edgelist)
369
    [(0, 1), (1, 2), (2, 3)]
370

371
    Find minimum spanning edges by Prim's algorithm
372

373
    >>> G = nx.cycle_graph(4)
374
    >>> G.add_edge(0, 3, weight=2)
375
    >>> mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False)
376
    >>> edgelist = list(mst)
377
    >>> sorted(edgelist)
378
    [(0, 1), (1, 2), (2, 3)]
379

380
    Notes
381
    -----
382
    For Borůvka's algorithm, each edge must have a weight attribute, and
383
    each edge weight must be distinct.
384

385
    For the other algorithms, if the graph edges do not have a weight
386
    attribute a default weight of 1 will be used.
387

388
    Modified code from David Eppstein, April 2006
389
    http://www.ics.uci.edu/~eppstein/PADS/
390

391
    """
392
    try:
393
        algo = ALGORITHMS[algorithm]
394
    except KeyError:
395
        msg = '{} is not a valid choice for an algorithm.'.format(algorithm)
396
        raise ValueError(msg)
397

    
398
    return algo(G, minimum=True, weight=weight, keys=keys, data=data,
399
                ignore_nan=ignore_nan)
400

    
401

    
402
@not_implemented_for('directed')
403
def maximum_spanning_edges(G, algorithm='kruskal', weight='weight',
404
                           keys=True, data=True, ignore_nan=False):
405
    """Generate edges in a maximum spanning forest of an undirected
406
    weighted graph.
407

408
    A maximum spanning tree is a subgraph of the graph (a tree)
409
    with the maximum possible sum of edge weights.  A spanning forest is a
410
    union of the spanning trees for each connected component of the graph.
411

412
    Parameters
413
    ----------
414
    G : undirected Graph
415
       An undirected graph. If `G` is connected, then the algorithm finds a
416
       spanning tree. Otherwise, a spanning forest is found.
417

418
    algorithm : string
419
       The algorithm to use when finding a maximum spanning tree. Valid
420
       choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
421

422
    weight : string
423
       Edge data key to use for weight (default 'weight').
424

425
    keys : bool
426
       Whether to yield edge key in multigraphs in addition to the edge.
427
       If `G` is not a multigraph, this is ignored.
428

429
    data : bool, optional
430
       If True yield the edge data along with the edge.
431

432
    ignore_nan : bool (default: False)
433
        If a NaN is found as an edge weight normally an exception is raised.
434
        If `ignore_nan is True` then that edge is ignored instead.
435

436
    Returns
437
    -------
438
    edges : iterator
439
       An iterator over edges in a maximum spanning tree of `G`.
440
       Edges connecting nodes `u` and `v` are represented as tuples:
441
       `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
442

443
       If `G` is a multigraph, `keys` indicates whether the edge key `k` will
444
       be reported in the third position in the edge tuple. `data` indicates
445
       whether the edge datadict `d` will appear at the end of the edge tuple.
446

447
       If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True
448
       or `(u, v)` if `data` is False.
449

450
    Examples
451
    --------
452
    >>> from networkx.algorithms import tree
453

454
    Find maximum spanning edges by Kruskal's algorithm
455

456
    >>> G = nx.cycle_graph(4)
457
    >>> G.add_edge(0, 3, weight=2)
458
    >>> mst = tree.maximum_spanning_edges(G, algorithm='kruskal', data=False)
459
    >>> edgelist = list(mst)
460
    >>> sorted(edgelist)
461
    [(0, 1), (0, 3), (1, 2)]
462

463
    Find maximum spanning edges by Prim's algorithm
464

465
    >>> G = nx.cycle_graph(4)
466
    >>> G.add_edge(0, 3, weight=2) # assign weight 2 to edge 0-3
467
    >>> mst = tree.maximum_spanning_edges(G, algorithm='prim', data=False)
468
    >>> edgelist = list(mst)
469
    >>> sorted(edgelist)
470
    [(0, 1), (0, 3), (3, 2)]
471

472
    Notes
473
    -----
474
    For Borůvka's algorithm, each edge must have a weight attribute, and
475
    each edge weight must be distinct.
476

477
    For the other algorithms, if the graph edges do not have a weight
478
    attribute a default weight of 1 will be used.
479

480
    Modified code from David Eppstein, April 2006
481
    http://www.ics.uci.edu/~eppstein/PADS/
482
    """
483
    try:
484
        algo = ALGORITHMS[algorithm]
485
    except KeyError:
486
        msg = '{} is not a valid choice for an algorithm.'.format(algorithm)
487
        raise ValueError(msg)
488

    
489
    return algo(G, minimum=False, weight=weight, keys=keys, data=data,
490
                ignore_nan=ignore_nan)
491

    
492

    
493
def minimum_spanning_tree(G, weight='weight', algorithm='kruskal',
494
                          ignore_nan=False):
495
    """Returns a minimum spanning tree or forest on an undirected graph `G`.
496

497
    Parameters
498
    ----------
499
    G : undirected graph
500
        An undirected graph. If `G` is connected, then the algorithm finds a
501
        spanning tree. Otherwise, a spanning forest is found.
502

503
    weight : str
504
       Data key to use for edge weights.
505

506
    algorithm : string
507
       The algorithm to use when finding a minimum spanning tree. Valid
508
       choices are 'kruskal', 'prim', or 'boruvka'. The default is
509
       'kruskal'.
510

511
    ignore_nan : bool (default: False)
512
        If a NaN is found as an edge weight normally an exception is raised.
513
        If `ignore_nan is True` then that edge is ignored instead.
514

515
    Returns
516
    -------
517
    G : NetworkX Graph
518
       A minimum spanning tree or forest.
519

520
    Examples
521
    --------
522
    >>> G = nx.cycle_graph(4)
523
    >>> G.add_edge(0, 3, weight=2)
524
    >>> T = nx.minimum_spanning_tree(G)
525
    >>> sorted(T.edges(data=True))
526
    [(0, 1, {}), (1, 2, {}), (2, 3, {})]
527

528

529
    Notes
530
    -----
531
    For Borůvka's algorithm, each edge must have a weight attribute, and
532
    each edge weight must be distinct.
533

534
    For the other algorithms, if the graph edges do not have a weight
535
    attribute a default weight of 1 will be used.
536

537
    There may be more than one tree with the same minimum or maximum weight.
538
    See :mod:`networkx.tree.recognition` for more detailed definitions.
539

540
    Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
541

542
    """
543
    edges = minimum_spanning_edges(G, algorithm, weight, keys=True,
544
                                   data=True, ignore_nan=ignore_nan)
545
    T = G.__class__()  # Same graph class as G
546
    T.graph.update(G.graph)
547
    T.add_nodes_from(G.nodes.items())
548
    T.add_edges_from(edges)
549
    return T
550

    
551

    
552
def maximum_spanning_tree(G, weight='weight', algorithm='kruskal',
553
                          ignore_nan=False):
554
    """Returns a maximum spanning tree or forest on an undirected graph `G`.
555

556
    Parameters
557
    ----------
558
    G : undirected graph
559
        An undirected graph. If `G` is connected, then the algorithm finds a
560
        spanning tree. Otherwise, a spanning forest is found.
561

562
    weight : str
563
       Data key to use for edge weights.
564

565
    algorithm : string
566
       The algorithm to use when finding a maximum spanning tree. Valid
567
       choices are 'kruskal', 'prim', or 'boruvka'. The default is
568
       'kruskal'.
569

570
    ignore_nan : bool (default: False)
571
        If a NaN is found as an edge weight normally an exception is raised.
572
        If `ignore_nan is True` then that edge is ignored instead.
573

574

575
    Returns
576
    -------
577
    G : NetworkX Graph
578
       A maximum spanning tree or forest.
579

580

581
    Examples
582
    --------
583
    >>> G = nx.cycle_graph(4)
584
    >>> G.add_edge(0, 3, weight=2)
585
    >>> T = nx.maximum_spanning_tree(G)
586
    >>> sorted(T.edges(data=True))
587
    [(0, 1, {}), (0, 3, {'weight': 2}), (1, 2, {})]
588

589

590
    Notes
591
    -----
592
    For Borůvka's algorithm, each edge must have a weight attribute, and
593
    each edge weight must be distinct.
594

595
    For the other algorithms, if the graph edges do not have a weight
596
    attribute a default weight of 1 will be used.
597

598
    There may be more than one tree with the same minimum or maximum weight.
599
    See :mod:`networkx.tree.recognition` for more detailed definitions.
600

601
    Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
602

603
    """
604
    edges = maximum_spanning_edges(G, algorithm, weight, keys=True,
605
                                   data=True, ignore_nan=ignore_nan)
606
    edges = list(edges)
607
    T = G.__class__()  # Same graph class as G
608
    T.graph.update(G.graph)
609
    T.add_nodes_from(G.nodes.items())
610
    T.add_edges_from(edges)
611
    return T