Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / tree / branchings.py @ 5cef0f13

History | View | Annotate | Download (24.6 KB)

1
# encoding: utf-8
2
"""
3
Algorithms for finding optimum branchings and spanning arborescences.
4

5
This implementation is based on:
6

7
    J. Edmonds, Optimum branchings, J. Res. Natl. Bur. Standards 71B (1967),
8
    233–240. URL: http://archive.org/details/jresv71Bn4p233
9

10
"""
11
# TODO: Implement method from Gabow, Galil, Spence and Tarjan:
12
#
13
#@article{
14
#    year={1986},
15
#    issn={0209-9683},
16
#    journal={Combinatorica},
17
#    volume={6},
18
#    number={2},
19
#    doi={10.1007/BF02579168},
20
#    title={Efficient algorithms for finding minimum spanning trees in
21
#        undirected and directed graphs},
22
#    url={https://doi.org/10.1007/BF02579168},
23
#    publisher={Springer-Verlag},
24
#    keywords={68 B 15; 68 C 05},
25
#    author={Gabow, Harold N. and Galil, Zvi and Spencer, Thomas and Tarjan,
26
#        Robert E.},
27
#    pages={109-122},
28
#    language={English}
29
#}
30

    
31
from __future__ import division
32
from __future__ import print_function
33

    
34
import string
35
import random
36
from operator import itemgetter
37

    
38
import networkx as nx
39
from networkx.utils import py_random_state
40

    
41
from .recognition import *
42

    
43
__all__ = [
44
    'branching_weight', 'greedy_branching',
45
    'maximum_branching', 'minimum_branching',
46
    'maximum_spanning_arborescence', 'minimum_spanning_arborescence',
47
    'Edmonds'
48
]
49

    
50
KINDS = set(['max', 'min'])
51

    
52
STYLES = {
53
    'branching': 'branching',
54
    'arborescence': 'arborescence',
55
    'spanning arborescence': 'arborescence'
56
}
57

    
58
INF = float('inf')
59

    
60

    
61
@py_random_state(1)
62
def random_string(L=15, seed=None):
63
    return ''.join([seed.choice(string.ascii_letters) for n in range(L)])
64

    
65

    
66
def _min_weight(weight):
67
    return -weight
68

    
69

    
70
def _max_weight(weight):
71
    return weight
72

    
73

    
74
def branching_weight(G, attr='weight', default=1):
75
    """
76
    Returns the total weight of a branching.
77

78
    """
79
    return sum(edge[2].get(attr, default) for edge in G.edges(data=True))
80

    
81

    
82
@py_random_state(4)
83
def greedy_branching(G, attr='weight', default=1, kind='max', seed=None):
84
    """
85
    Returns a branching obtained through a greedy algorithm.
86

87
    This algorithm is wrong, and cannot give a proper optimal branching.
88
    However, we include it for pedagogical reasons, as it can be helpful to
89
    see what its outputs are.
90

91
    The output is a branching, and possibly, a spanning arborescence. However,
92
    it is not guaranteed to be optimal in either case.
93

94
    Parameters
95
    ----------
96
    G : DiGraph
97
        The directed graph to scan.
98
    attr : str
99
        The attribute to use as weights. If None, then each edge will be
100
        treated equally with a weight of 1.
101
    default : float
102
        When `attr` is not None, then if an edge does not have that attribute,
103
        `default` specifies what value it should take.
104
    kind : str
105
        The type of optimum to search for: 'min' or 'max' greedy branching.
106
    seed : integer, random_state, or None (default)
107
        Indicator of random number generation state.
108
        See :ref:`Randomness<randomness>`.
109

110
    Returns
111
    -------
112
    B : directed graph
113
        The greedily obtained branching.
114

115
    """
116
    if kind not in KINDS:
117
        raise nx.NetworkXException("Unknown value for `kind`.")
118

    
119
    if kind == 'min':
120
        reverse = False
121
    else:
122
        reverse = True
123

    
124
    if attr is None:
125
        # Generate a random string the graph probably won't have.
126
        attr = random_string(seed=seed)
127

    
128
    edges = [(u, v, data.get(attr, default))
129
             for (u, v, data) in G.edges(data=True)]
130

    
131
    # We sort by weight, but also by nodes to normalize behavior across runs.
132
    try:
133
        edges.sort(key=itemgetter(2, 0, 1), reverse=reverse)
134
    except TypeError:
135
        # This will fail in Python 3.x if the nodes are of varying types.
136
        # In that case, we use the arbitrary order.
137
        edges.sort(key=itemgetter(2), reverse=reverse)
138

    
139
    # The branching begins with a forest of no edges.
140
    B = nx.DiGraph()
141
    B.add_nodes_from(G)
142

    
143
    # Now we add edges greedily so long we maintain the branching.
144
    uf = nx.utils.UnionFind()
145
    for i, (u, v, w) in enumerate(edges):
146
        if uf[u] == uf[v]:
147
            # Adding this edge would form a directed cycle.
148
            continue
149
        elif B.in_degree(v) == 1:
150
            # The edge would increase the degree to be greater than one.
151
            continue
152
        else:
153
            # If attr was None, then don't insert weights...
154
            data = {}
155
            if attr is not None:
156
                data[attr] = w
157
            B.add_edge(u, v, **data)
158
            uf.union(u, v)
159

    
160
    return B
161

    
162

    
163
class MultiDiGraph_EdgeKey(nx.MultiDiGraph):
164
    """
165
    MultiDiGraph which assigns unique keys to every edge.
166

167
    Adds a dictionary edge_index which maps edge keys to (u, v, data) tuples.
168

169
    This is not a complete implementation. For Edmonds algorithm, we only use
170
    add_node and add_edge, so that is all that is implemented here. During
171
    additions, any specified keys are ignored---this means that you also
172
    cannot update edge attributes through add_node and add_edge.
173

174
    Why do we need this? Edmonds algorithm requires that we track edges, even
175
    as we change the head and tail of an edge, and even changing the weight
176
    of edges. We must reliably track edges across graph mutations.
177

178
    """
179

    
180
    def __init__(self, incoming_graph_data=None, **attr):
181
        cls = super(MultiDiGraph_EdgeKey, self)
182
        cls.__init__(incoming_graph_data=incoming_graph_data, **attr)
183

    
184
        self._cls = cls
185
        self.edge_index = {}
186

    
187
    def remove_node(self, n):
188
        keys = set([])
189
        for keydict in self.pred[n].values():
190
            keys.update(keydict)
191
        for keydict in self.succ[n].values():
192
            keys.update(keydict)
193

    
194
        for key in keys:
195
            del self.edge_index[key]
196

    
197
        self._cls.remove_node(n)
198

    
199
    def remove_nodes_from(self, nbunch):
200
        for n in nbunch:
201
            self.remove_node(n)
202

    
203
    def add_edge(self, u_for_edge, v_for_edge, key_for_edge, **attr):
204
        """
205
        Key is now required.
206

207
        """
208
        u, v, key = u_for_edge, v_for_edge, key_for_edge
209
        if key in self.edge_index:
210
            uu, vv, _ = self.edge_index[key]
211
            if (u != uu) or (v != vv):
212
                raise Exception("Key {0!r} is already in use.".format(key))
213

    
214
        self._cls.add_edge(u, v, key, **attr)
215
        self.edge_index[key] = (u, v, self.succ[u][v][key])
216

    
217
    def add_edges_from(self, ebunch_to_add, **attr):
218
        for u, v, k, d in ebunch_to_add:
219
            self.add_edge(u, v, k, **d)
220

    
221
    def remove_edge_with_key(self, key):
222
        try:
223
            u, v, _ = self.edge_index[key]
224
        except KeyError:
225
            raise KeyError('Invalid edge key {0!r}'.format(key))
226
        else:
227
            del self.edge_index[key]
228
            self._cls.remove_edge(u, v, key)
229

    
230
    def remove_edges_from(self, ebunch):
231
        raise NotImplementedError
232

    
233

    
234
def get_path(G, u, v):
235
    """
236
    Returns the edge keys of the unique path between u and v.
237

238
    This is not a generic function. G must be a branching and an instance of
239
    MultiDiGraph_EdgeKey.
240

241
    """
242
    nodes = nx.shortest_path(G, u, v)
243
    # We are guaranteed that there is only one edge connected every node
244
    # in the shortest path.
245

    
246
    def first_key(i, vv):
247
        # Needed for 2.x/3.x compatibilitity
248
        keys = G[nodes[i]][vv].keys()
249
        # Normalize behavior
250
        keys = list(keys)
251
        return keys[0]
252

    
253
    edges = [first_key(i, vv) for i, vv in enumerate(nodes[1:])]
254
    return nodes, edges
255

    
256

    
257
class Edmonds(object):
258
    """
259
    Edmonds algorithm for finding optimal branchings and spanning arborescences.
260

261
    """
262

    
263
    def __init__(self, G, seed=None):
264
        self.G_original = G
265

    
266
        # Need to fix this. We need the whole tree.
267
        self.store = True
268

    
269
        # The final answer.
270
        self.edges = []
271

    
272
        # Since we will be creating graphs with new nodes, we need to make
273
        # sure that our node names do not conflict with the real node names.
274
        self.template = random_string(seed=seed) + '_{0}'
275

    
276

    
277
    def _init(self, attr, default, kind, style, preserve_attrs, seed):
278
        if kind not in KINDS:
279
            raise nx.NetworkXException("Unknown value for `kind`.")
280

    
281
        # Store inputs.
282
        self.attr = attr
283
        self.default = default
284
        self.kind = kind
285
        self.style = style
286

    
287
        # Determine how we are going to transform the weights.
288
        if kind == 'min':
289
            self.trans = trans = _min_weight
290
        else:
291
            self.trans = trans = _max_weight
292

    
293
        if attr is None:
294
            # Generate a random attr the graph probably won't have.
295
            attr = random_string(seed=seed)
296

    
297
        # This is the actual attribute used by the algorithm.
298
        self._attr = attr
299

    
300
        # This attribute is used to store whether a particular edge is still
301
        # a candidate. We generate a random attr to remove clashes with
302
        # preserved edges
303
        self.candidate_attr = 'candidate_' + random_string(seed=seed)
304

    
305
        # The object we manipulate at each step is a multidigraph.
306
        self.G = G = MultiDiGraph_EdgeKey()
307
        for key, (u, v, data) in enumerate(self.G_original.edges(data=True)):
308
            d = {attr: trans(data.get(attr, default))}
309

    
310
            if preserve_attrs:
311
                for (d_k, d_v) in data.items():
312
                    if d_k != attr:
313
                        d[d_k] = d_v
314

    
315
            G.add_edge(u, v, key, **d)
316

    
317
        self.level = 0
318

    
319
        # These are the "buckets" from the paper.
320
        #
321
        # As in the paper, G^i are modified versions of the original graph.
322
        # D^i and E^i are nodes and edges of the maximal edges that are
323
        # consistent with G^i. These are dashed edges in figures A-F of the
324
        # paper. In this implementation, we store D^i and E^i together as a
325
        # graph B^i. So we will have strictly more B^i than the paper does.
326
        self.B = MultiDiGraph_EdgeKey()
327
        self.B.edge_index = {}
328
        self.graphs = []        # G^i
329
        self.branchings = []    # B^i
330
        self.uf = nx.utils.UnionFind()
331

    
332
        # A list of lists of edge indexes. Each list is a circuit for graph G^i.
333
        # Note the edge list will not, in general, be a circuit in graph G^0.
334
        self.circuits = []
335
        # Stores the index of the minimum edge in the circuit found in G^i and B^i.
336
        # The ordering of the edges seems to preserve the weight ordering from G^0.
337
        # So even if the circuit does not form a circuit in G^0, it is still true
338
        # that the minimum edge of the circuit in G^i is still the minimum edge
339
        # in circuit G^0 (depsite their weights being different).
340
        self.minedge_circuit = []
341

    
342
    def find_optimum(self, attr='weight', default=1, kind='max',
343
                     style='branching', preserve_attrs=False, seed=None):
344
        """
345
        Returns a branching from G.
346

347
        Parameters
348
        ----------
349
        attr : str
350
            The edge attribute used to in determining optimality.
351
        default : float
352
            The value of the edge attribute used if an edge does not have
353
            the attribute `attr`.
354
        kind : {'min', 'max'}
355
            The type of optimum to search for, either 'min' or 'max'.
356
        style : {'branching', 'arborescence'}
357
            If 'branching', then an optimal branching is found. If `style` is
358
            'arborescence', then a branching is found, such that if the
359
            branching is also an arborescence, then the branching is an
360
            optimal spanning arborescences. A given graph G need not have
361
            an optimal spanning arborescence.
362
        preserve_attrs : bool
363
            If True, preserve the other edge attributes of the original
364
            graph (that are not the one passed to `attr`)
365
        seed : integer, random_state, or None (default)
366
            Indicator of random number generation state.
367
            See :ref:`Randomness<randomness>`.
368

369
        Returns
370
        -------
371
        H : (multi)digraph
372
            The branching.
373

374
        """
375
        self._init(attr, default, kind, style, preserve_attrs, seed)
376
        uf = self.uf
377

    
378
        # This enormous while loop could use some refactoring...
379

    
380
        G, B = self.G, self.B
381
        D = set([])
382
        nodes = iter(list(G.nodes()))
383
        attr = self._attr
384
        G_pred = G.pred
385

    
386
        def desired_edge(v):
387
            """
388
            Find the edge directed toward v with maximal weight.
389

390
            """
391
            edge = None
392
            weight = -INF
393
            for u, _, key, data in G.in_edges(v, data=True, keys=True):
394
                new_weight = data[attr]
395
                if new_weight > weight:
396
                    weight = new_weight
397
                    edge = (u, v, key, new_weight)
398

    
399
            return edge, weight
400

    
401
        while True:
402
            # (I1): Choose a node v in G^i not in D^i.
403
            try:
404
                v = next(nodes)
405
            except StopIteration:
406
                # If there are no more new nodes to consider, then we *should*
407
                # meet the break condition (b) from the paper:
408
                #   (b) every node of G^i is in D^i and E^i is a branching
409
                # Construction guarantees that it's a branching.
410
                assert(len(G) == len(B))
411
                if len(B):
412
                    assert(is_branching(B))
413

    
414
                if self.store:
415
                    self.graphs.append(G.copy())
416
                    self.branchings.append(B.copy())
417

    
418
                    # Add these to keep the lengths equal. Element i is the
419
                    # circuit at level i that was merged to form branching i+1.
420
                    # There is no circuit for the last level.
421
                    self.circuits.append([])
422
                    self.minedge_circuit.append(None)
423
                break
424
            else:
425
                if v in D:
426
                    #print("v in D", v)
427
                    continue
428

    
429
            # Put v into bucket D^i.
430
            #print("Adding node {0}".format(v))
431
            D.add(v)
432
            B.add_node(v)
433

    
434
            edge, weight = desired_edge(v)
435
            #print("Max edge is {0!r}".format(edge))
436
            if edge is None:
437
                # If there is no edge, continue with a new node at (I1).
438
                continue
439
            else:
440
                # Determine if adding the edge to E^i would mean its no longer
441
                # a branching. Presently, v has indegree 0 in B---it is a root.
442
                u = edge[0]
443

    
444
                if uf[u] == uf[v]:
445
                    # Then adding the edge will create a circuit. Then B
446
                    # contains a unique path P from v to u. So condition (a)
447
                    # from the paper does hold. We need to store the circuit
448
                    # for future reference.
449
                    Q_nodes, Q_edges = get_path(B, v, u)
450
                    Q_edges.append(edge[2])
451
                else:
452
                    # Then B with the edge is still a branching and condition
453
                    # (a) from the paper does not hold.
454
                    Q_nodes, Q_edges = None, None
455

    
456
                # Conditions for adding the edge.
457
                # If weight < 0, then it cannot help in finding a maximum branching.
458
                if self.style == 'branching' and weight <= 0:
459
                    acceptable = False
460
                else:
461
                    acceptable = True
462

    
463
                #print("Edge is acceptable: {0}".format(acceptable))
464
                if acceptable:
465
                    dd = {attr: weight}
466
                    B.add_edge(u, v, edge[2], **dd)
467
                    G[u][v][edge[2]][self.candidate_attr] = True
468
                    uf.union(u, v)
469
                    if Q_edges is not None:
470
                        #print("Edge introduced a simple cycle:")
471
                        #print(Q_nodes, Q_edges)
472

    
473
                        # Move to method
474
                        # Previous meaning of u and v is no longer important.
475

    
476
                        # Apply (I2).
477
                        # Get the edge in the cycle with the minimum weight.
478
                        # Also, save the incoming weights for each node.
479
                        minweight = INF
480
                        minedge = None
481
                        Q_incoming_weight = {}
482
                        for edge_key in Q_edges:
483
                            u, v, data = B.edge_index[edge_key]
484
                            w = data[attr]
485
                            Q_incoming_weight[v] = w
486
                            if w < minweight:
487
                                minweight = w
488
                                minedge = edge_key
489

    
490
                        self.circuits.append(Q_edges)
491
                        self.minedge_circuit.append(minedge)
492

    
493
                        if self.store:
494
                            self.graphs.append(G.copy())
495
                        # Always need the branching with circuits.
496
                        self.branchings.append(B.copy())
497

    
498
                        # Now we mutate it.
499
                        new_node = self.template.format(self.level)
500

    
501
                        #print(minweight, minedge, Q_incoming_weight)
502

    
503
                        G.add_node(new_node)
504
                        new_edges = []
505
                        for u, v, key, data in G.edges(data=True, keys=True):
506
                            if u in Q_incoming_weight:
507
                                if v in Q_incoming_weight:
508
                                    # Circuit edge, do nothing for now.
509
                                    # Eventually delete it.
510
                                    continue
511
                                else:
512
                                    # Outgoing edge. Make it from new node
513
                                    dd = data.copy()
514
                                    new_edges.append((new_node, v, key, dd))
515
                            else:
516
                                if v in Q_incoming_weight:
517
                                    # Incoming edge. Change its weight
518
                                    w = data[attr]
519
                                    w += minweight - Q_incoming_weight[v]
520
                                    dd = data.copy()
521
                                    dd[attr] = w
522
                                    new_edges.append((u, new_node, key, dd))
523
                                else:
524
                                    # Outside edge. No modification necessary.
525
                                    continue
526

    
527
                        G.remove_nodes_from(Q_nodes)
528
                        B.remove_nodes_from(Q_nodes)
529
                        D.difference_update(set(Q_nodes))
530

    
531
                        for u, v, key, data in new_edges:
532
                            G.add_edge(u, v, key, **data)
533
                            if self.candidate_attr in data:
534
                                del data[self.candidate_attr]
535
                                B.add_edge(u, v, key, **data)
536
                                uf.union(u, v)
537

    
538
                        nodes = iter(list(G.nodes()))
539
                        self.level += 1
540

    
541
        # (I3) Branch construction.
542
        # print(self.level)
543
        H = self.G_original.__class__()
544

    
545
        def is_root(G, u, edgekeys):
546
            """
547
            Returns True if `u` is a root node in G.
548

549
            Node `u` will be a root node if its in-degree, restricted to the
550
            specified edges, is equal to 0.
551

552
            """
553
            if u not in G:
554
                #print(G.nodes(), u)
555
                raise Exception('{0!r} not in G'.format(u))
556
            for v in G.pred[u]:
557
                for edgekey in G.pred[u][v]:
558
                    if edgekey in edgekeys:
559
                        return False, edgekey
560
            else:
561
                return True, None
562

    
563
        # Start with the branching edges in the last level.
564
        edges = set(self.branchings[self.level].edge_index)
565
        while self.level > 0:
566
            self.level -= 1
567

    
568
            # The current level is i, and we start counting from 0.
569

    
570
            # We need the node at level i+1 that results from merging a circuit
571
            # at level i. randomname_0 is the first merged node and this
572
            # happens at level 1. That is, randomname_0 is a node at level 1
573
            # that results from merging a circuit at level 0.
574
            merged_node = self.template.format(self.level)
575

    
576
            # The circuit at level i that was merged as a node the graph
577
            # at level i+1.
578
            circuit = self.circuits[self.level]
579
            # print
580
            #print(merged_node, self.level, circuit)
581
            #print("before", edges)
582
            # Note, we ask if it is a root in the full graph, not the branching.
583
            # The branching alone doesn't have all the edges.
584

    
585
            isroot, edgekey = is_root(self.graphs[self.level + 1],
586
                                      merged_node, edges)
587
            edges.update(circuit)
588
            if isroot:
589
                minedge = self.minedge_circuit[self.level]
590
                if minedge is None:
591
                    raise Exception
592

    
593
                # Remove the edge in the cycle with minimum weight.
594
                edges.remove(minedge)
595
            else:
596
                # We have identified an edge at next higher level that
597
                # transitions into the merged node at the level. That edge
598
                # transitions to some corresponding node at the current level.
599
                # We want to remove an edge from the cycle that transitions
600
                # into the corresponding node.
601
                #print("edgekey is: ", edgekey)
602
                #print("circuit is: ", circuit)
603
                # The branching at level i
604
                G = self.graphs[self.level]
605
                # print(G.edge_index)
606
                target = G.edge_index[edgekey][1]
607
                for edgekey in circuit:
608
                    u, v, data = G.edge_index[edgekey]
609
                    if v == target:
610
                        break
611
                else:
612
                    raise Exception("Couldn't find edge incoming to merged node.")
613
                #print("not a root. removing {0}".format(edgekey))
614

    
615
                edges.remove(edgekey)
616

    
617
        self.edges = edges
618

    
619
        H.add_nodes_from(self.G_original)
620
        for edgekey in edges:
621
            u, v, d = self.graphs[0].edge_index[edgekey]
622
            dd = {self.attr: self.trans(d[self.attr])}
623

    
624
            # Optionally, preserve the other edge attributes of the original
625
            # graph
626
            if preserve_attrs:
627
                for (key, value) in d.items():
628
                    if key not in [self.attr, self.candidate_attr]:
629
                        dd[key] = value
630

    
631
            # TODO: make this preserve the key.
632
            H.add_edge(u, v, **dd)
633

    
634
        return H
635

    
636

    
637
def maximum_branching(G, attr='weight', default=1, preserve_attrs=False):
638
    ed = Edmonds(G)
639
    B = ed.find_optimum(attr, default, kind='max', style='branching',
640
                        preserve_attrs=preserve_attrs)
641
    return B
642

    
643

    
644
def minimum_branching(G, attr='weight', default=1, preserve_attrs=False):
645
    ed = Edmonds(G)
646
    B = ed.find_optimum(attr, default, kind='min', style='branching',
647
                        preserve_attrs=preserve_attrs)
648
    return B
649

    
650

    
651
def maximum_spanning_arborescence(G, attr='weight', default=1,
652
                                  preserve_attrs=False):
653
    ed = Edmonds(G)
654
    B = ed.find_optimum(attr, default, kind='max', style='arborescence',
655
                        preserve_attrs=preserve_attrs)
656
    if not is_arborescence(B):
657
        msg = 'No maximum spanning arborescence in G.'
658
        raise nx.exception.NetworkXException(msg)
659
    return B
660

    
661

    
662
def minimum_spanning_arborescence(G, attr='weight', default=1,
663
                                  preserve_attrs=False):
664
    ed = Edmonds(G)
665
    B = ed.find_optimum(attr, default, kind='min', style='arborescence',
666
                        preserve_attrs=preserve_attrs)
667
    if not is_arborescence(B):
668
        msg = 'No minimum spanning arborescence in G.'
669
        raise nx.exception.NetworkXException(msg)
670
    return B
671

    
672

    
673
docstring_branching = """
674
Returns a {kind} {style} from G.
675

676
Parameters
677
----------
678
G : (multi)digraph-like
679
    The graph to be searched.
680
attr : str
681
    The edge attribute used to in determining optimality.
682
default : float
683
    The value of the edge attribute used if an edge does not have
684
    the attribute `attr`.
685
preserve_attrs : bool
686
    If True, preserve the other attributes of the original graph (that are not
687
    passed to `attr`)
688

689
Returns
690
-------
691
B : (multi)digraph-like
692
    A {kind} {style}.
693
"""
694

    
695
docstring_arborescence = docstring_branching + """
696
Raises
697
------
698
NetworkXException
699
    If the graph does not contain a {kind} {style}.
700

701
"""
702

    
703
maximum_branching.__doc__ = \
704
    docstring_branching.format(kind='maximum', style='branching')
705

    
706
minimum_branching.__doc__ = \
707
    docstring_branching.format(kind='minimum', style='branching')
708

    
709
maximum_spanning_arborescence.__doc__ = \
710
    docstring_arborescence.format(kind='maximum', style='spanning arborescence')
711

    
712
minimum_spanning_arborescence.__doc__ = \
713
    docstring_arborescence.format(kind='minimum', style='spanning arborescence')