Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / bipartite / matching.py @ 5cef0f13

History | View | Annotate | Download (17.5 KB)

1
# matching.py - bipartite graph maximum matching algorithms
2
#
3
# Copyright 2015 Jeffrey Finkelstein <jeffrey.finkelstein@gmail.com>.
4
#
5
# This file is part of NetworkX.
6
#
7
# NetworkX is distributed under a BSD license; see LICENSE.txt for more
8
# information.
9
#
10
# This module uses material from the Wikipedia article Hopcroft--Karp algorithm
11
# <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on
12
# January 3, 2015, which is released under the Creative Commons
13
# Attribution-Share-Alike License 3.0
14
# <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes
15
# pseudocode, which has been translated into the corresponding Python code.
16
#
17
# Portions of this module use code from David Eppstein's Python Algorithms and
18
# Data Structures (PADS) library, which is dedicated to the public domain (for
19
# proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>).
20
"""Provides functions for computing a maximum cardinality matching in a
21
bipartite graph.
22

23
If you don't care about the particular implementation of the maximum matching
24
algorithm, simply use the :func:`maximum_matching`. If you do care, you can
25
import one of the named maximum matching algorithms directly.
26

27
For example, to find a maximum matching in the complete bipartite graph with
28
two vertices on the left and three vertices on the right:
29

30
>>> import networkx as nx
31
>>> G = nx.complete_bipartite_graph(2, 3)
32
>>> left, right = nx.bipartite.sets(G)
33
>>> list(left)
34
[0, 1]
35
>>> list(right)
36
[2, 3, 4]
37
>>> nx.bipartite.maximum_matching(G)
38
{0: 2, 1: 3, 2: 0, 3: 1}
39

40
The dictionary returned by :func:`maximum_matching` includes a mapping for
41
vertices in both the left and right vertex sets.
42

43
"""
44
import collections
45
import itertools
46

    
47
from networkx.algorithms.bipartite import sets as bipartite_sets
48
import networkx as nx
49

    
50
__all__ = ['maximum_matching', 'hopcroft_karp_matching', 'eppstein_matching',
51
           'to_vertex_cover']
52

    
53
INFINITY = float('inf')
54

    
55

    
56
def hopcroft_karp_matching(G, top_nodes=None):
57
    """Returns the maximum cardinality matching of the bipartite graph `G`.
58

59
    Parameters
60
    ----------
61
    G : NetworkX graph
62

63
      Undirected bipartite graph
64

65
    top_nodes : container
66

67
      Container with all nodes in one bipartite node set. If not supplied
68
      it will be computed. But if more than one solution exists an exception
69
      will be raised.
70

71
    Returns
72
    -------
73
    matches : dictionary
74

75
      The matching is returned as a dictionary, `matches`, such that
76
      ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
77
      nodes do not occur as a key in mate.
78

79
    Raises
80
    ------
81
    AmbiguousSolution : Exception
82

83
      Raised if the input bipartite graph is disconnected and no container
84
      with all nodes in one bipartite set is provided. When determining
85
      the nodes in each bipartite set more than one valid solution is
86
      possible if the input graph is disconnected.
87

88
    Notes
89
    -----
90

91
    This function is implemented with the `Hopcroft--Karp matching algorithm
92
    <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for
93
    bipartite graphs.
94

95
    See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
96
    for further details on how bipartite graphs are handled in NetworkX.
97

98
    See Also
99
    --------
100

101
    eppstein_matching
102

103
    References
104
    ----------
105
    .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for
106
       Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing**
107
       2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>.
108

109
    """
110
    # First we define some auxiliary search functions.
111
    #
112
    # If you are a human reading these auxiliary search functions, the "global"
113
    # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined
114
    # below the functions, so that they are initialized close to the initial
115
    # invocation of the search functions.
116
    def breadth_first_search():
117
        for v in left:
118
            if leftmatches[v] is None:
119
                distances[v] = 0
120
                queue.append(v)
121
            else:
122
                distances[v] = INFINITY
123
        distances[None] = INFINITY
124
        while queue:
125
            v = queue.popleft()
126
            if distances[v] < distances[None]:
127
                for u in G[v]:
128
                    if distances[rightmatches[u]] is INFINITY:
129
                        distances[rightmatches[u]] = distances[v] + 1
130
                        queue.append(rightmatches[u])
131
        return distances[None] is not INFINITY
132

    
133
    def depth_first_search(v):
134
        if v is not None:
135
            for u in G[v]:
136
                if distances[rightmatches[u]] == distances[v] + 1:
137
                    if depth_first_search(rightmatches[u]):
138
                        rightmatches[u] = v
139
                        leftmatches[v] = u
140
                        return True
141
            distances[v] = INFINITY
142
            return False
143
        return True
144

    
145
    # Initialize the "global" variables that maintain state during the search.
146
    left, right = bipartite_sets(G, top_nodes)
147
    leftmatches = {v: None for v in left}
148
    rightmatches = {v: None for v in right}
149
    distances = {}
150
    queue = collections.deque()
151

    
152
    # Implementation note: this counter is incremented as pairs are matched but
153
    # it is currently not used elsewhere in the computation.
154
    num_matched_pairs = 0
155
    while breadth_first_search():
156
        for v in left:
157
            if leftmatches[v] is None:
158
                if depth_first_search(v):
159
                    num_matched_pairs += 1
160

    
161
    # Strip the entries matched to `None`.
162
    leftmatches = {k: v for k, v in leftmatches.items() if v is not None}
163
    rightmatches = {k: v for k, v in rightmatches.items() if v is not None}
164

    
165
    # At this point, the left matches and the right matches are inverses of one
166
    # another. In other words,
167
    #
168
    #     leftmatches == {v, k for k, v in rightmatches.items()}
169
    #
170
    # Finally, we combine both the left matches and right matches.
171
    return dict(itertools.chain(leftmatches.items(), rightmatches.items()))
172

    
173

    
174
def eppstein_matching(G, top_nodes=None):
175
    """Returns the maximum cardinality matching of the bipartite graph `G`.
176

177
    Parameters
178
    ----------
179
    G : NetworkX graph
180

181
      Undirected bipartite graph
182

183
    top_nodes : container
184

185
      Container with all nodes in one bipartite node set. If not supplied
186
      it will be computed. But if more than one solution exists an exception
187
      will be raised.
188

189
    Returns
190
    -------
191
    matches : dictionary
192

193
      The matching is returned as a dictionary, `matching`, such that
194
      ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched
195
      nodes do not occur as a key in mate.
196

197
    Raises
198
    ------
199
    AmbiguousSolution : Exception
200

201
      Raised if the input bipartite graph is disconnected and no container
202
      with all nodes in one bipartite set is provided. When determining
203
      the nodes in each bipartite set more than one valid solution is
204
      possible if the input graph is disconnected.
205

206
    Notes
207
    -----
208

209
    This function is implemented with David Eppstein's version of the algorithm
210
    Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which
211
    originally appeared in the `Python Algorithms and Data Structures library
212
    (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_.
213

214
    See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
215
    for further details on how bipartite graphs are handled in NetworkX.
216

217
    See Also
218
    --------
219

220
    hopcroft_karp_matching
221

222
    """
223
    # Due to its original implementation, a directed graph is needed
224
    # so that the two sets of bipartite nodes can be distinguished
225
    left, right = bipartite_sets(G, top_nodes)
226
    G = nx.DiGraph(G.edges(left))
227
    # initialize greedy matching (redundant, but faster than full search)
228
    matching = {}
229
    for u in G:
230
        for v in G[u]:
231
            if v not in matching:
232
                matching[v] = u
233
                break
234
    while True:
235
        # structure residual graph into layers
236
        # pred[u] gives the neighbor in the previous layer for u in U
237
        # preds[v] gives a list of neighbors in the previous layer for v in V
238
        # unmatched gives a list of unmatched vertices in final layer of V,
239
        # and is also used as a flag value for pred[u] when u is in the first
240
        # layer
241
        preds = {}
242
        unmatched = []
243
        pred = {u: unmatched for u in G}
244
        for v in matching:
245
            del pred[matching[v]]
246
        layer = list(pred)
247

    
248
        # repeatedly extend layering structure by another pair of layers
249
        while layer and not unmatched:
250
            newLayer = {}
251
            for u in layer:
252
                for v in G[u]:
253
                    if v not in preds:
254
                        newLayer.setdefault(v, []).append(u)
255
            layer = []
256
            for v in newLayer:
257
                preds[v] = newLayer[v]
258
                if v in matching:
259
                    layer.append(matching[v])
260
                    pred[matching[v]] = v
261
                else:
262
                    unmatched.append(v)
263

    
264
        # did we finish layering without finding any alternating paths?
265
        if not unmatched:
266
            unlayered = {}
267
            for u in G:
268
                # TODO Why is extra inner loop necessary?
269
                for v in G[u]:
270
                    if v not in preds:
271
                        unlayered[v] = None
272
            # TODO Originally, this function returned a three-tuple:
273
            #
274
            #     return (matching, list(pred), list(unlayered))
275
            #
276
            # For some reason, the documentation for this function
277
            # indicated that the second and third elements of the returned
278
            # three-tuple would be the vertices in the left and right vertex
279
            # sets, respectively, that are also in the maximum independent set.
280
            # However, what I think the author meant was that the second
281
            # element is the list of vertices that were unmatched and the third
282
            # element was the list of vertices that were matched. Since that
283
            # seems to be the case, they don't really need to be returned,
284
            # since that information can be inferred from the matching
285
            # dictionary.
286

    
287
            # All the matched nodes must be a key in the dictionary
288
            for key in matching.copy():
289
                matching[matching[key]] = key
290
            return matching
291

    
292
        # recursively search backward through layers to find alternating paths
293
        # recursion returns true if found path, false otherwise
294
        def recurse(v):
295
            if v in preds:
296
                L = preds.pop(v)
297
                for u in L:
298
                    if u in pred:
299
                        pu = pred.pop(u)
300
                        if pu is unmatched or recurse(pu):
301
                            matching[v] = u
302
                            return True
303
            return False
304

    
305
        for v in unmatched:
306
            recurse(v)
307

    
308

    
309
def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges,
310
                                      targets):
311
    """Returns True if and only if the vertex `v` is connected to one of
312
    the target vertices by an alternating path in `G`.
313

314
    An *alternating path* is a path in which every other edge is in the
315
    specified maximum matching (and the remaining edges in the path are not in
316
    the matching). An alternating path may have matched edges in the even
317
    positions or in the odd positions, as long as the edges alternate between
318
    'matched' and 'unmatched'.
319

320
    `G` is an undirected bipartite NetworkX graph.
321

322
    `v` is a vertex in `G`.
323

324
    `matched_edges` is a set of edges present in a maximum matching in `G`.
325

326
    `unmatched_edges` is a set of edges not present in a maximum
327
    matching in `G`.
328

329
    `targets` is a set of vertices.
330

331
    """
332
    def _alternating_dfs(u, along_matched=True):
333
        """Returns True if and only if `u` is connected to one of the
334
        targets by an alternating path.
335

336
        `u` is a vertex in the graph `G`.
337

338
        If `along_matched` is True, this step of the depth-first search
339
        will continue only through edges in the given matching. Otherwise, it
340
        will continue only through edges *not* in the given matching.
341

342
        """
343
        if along_matched:
344
            edges = itertools.cycle([matched_edges, unmatched_edges])
345
        else:
346
            edges = itertools.cycle([unmatched_edges, matched_edges])
347
        visited = set()
348
        stack = [(u, iter(G[u]), next(edges))]
349
        while stack:
350
            parent, children, valid_edges = stack[-1]
351
            try:
352
                child = next(children)
353
                if child not in visited:
354
                    if ((parent, child) in valid_edges
355
                            or (child, parent) in valid_edges):
356
                        if child in targets:
357
                            return True
358
                        visited.add(child)
359
                        stack.append((child, iter(G[child]), next(edges)))
360
            except StopIteration:
361
                stack.pop()
362
        return False
363

    
364
    # Check for alternating paths starting with edges in the matching, then
365
    # check for alternating paths starting with edges not in the
366
    # matching.
367
    return (_alternating_dfs(v, along_matched=True) or
368
            _alternating_dfs(v, along_matched=False))
369

    
370

    
371
def _connected_by_alternating_paths(G, matching, targets):
372
    """Returns the set of vertices that are connected to one of the target
373
    vertices by an alternating path in `G` or are themselves a target.
374

375
    An *alternating path* is a path in which every other edge is in the
376
    specified maximum matching (and the remaining edges in the path are not in
377
    the matching). An alternating path may have matched edges in the even
378
    positions or in the odd positions, as long as the edges alternate between
379
    'matched' and 'unmatched'.
380

381
    `G` is an undirected bipartite NetworkX graph.
382

383
    `matching` is a dictionary representing a maximum matching in `G`, as
384
    returned by, for example, :func:`maximum_matching`.
385

386
    `targets` is a set of vertices.
387

388
    """
389
    # Get the set of matched edges and the set of unmatched edges. Only include
390
    # one version of each undirected edge (for example, include edge (1, 2) but
391
    # not edge (2, 1)). Using frozensets as an intermediary step we do not
392
    # require nodes to be orderable.
393
    edge_sets = {frozenset((u, v)) for u, v in matching.items()}
394
    matched_edges = {tuple(edge) for edge in edge_sets}
395
    unmatched_edges = {(u, v) for (u, v) in G.edges()
396
                       if frozenset((u, v)) not in edge_sets}
397

    
398
    return {v for v in G if v in targets or
399
            _is_connected_by_alternating_path(G, v, matched_edges,
400
                                              unmatched_edges, targets)}
401

    
402

    
403
def to_vertex_cover(G, matching, top_nodes=None):
404
    """Returns the minimum vertex cover corresponding to the given maximum
405
    matching of the bipartite graph `G`.
406

407
    Parameters
408
    ----------
409

410
    G : NetworkX graph
411

412
      Undirected bipartite graph
413

414
    matching : dictionary
415

416
      A dictionary whose keys are vertices in `G` and whose values are the
417
      distinct neighbors comprising the maximum matching for `G`, as returned
418
      by, for example, :func:`maximum_matching`. The dictionary *must*
419
      represent the maximum matching.
420

421
    top_nodes : container
422

423
      Container with all nodes in one bipartite node set. If not supplied
424
      it will be computed. But if more than one solution exists an exception
425
      will be raised.
426

427
    Returns
428
    -------
429

430
    vertex_cover : :class:`set`
431

432
      The minimum vertex cover in `G`.
433

434
    Raises
435
    ------
436
    AmbiguousSolution : Exception
437

438
      Raised if the input bipartite graph is disconnected and no container
439
      with all nodes in one bipartite set is provided. When determining
440
      the nodes in each bipartite set more than one valid solution is
441
      possible if the input graph is disconnected.
442

443
    Notes
444
    -----
445

446
    This function is implemented using the procedure guaranteed by `Konig's
447
    theorem
448
    <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_,
449
    which proves an equivalence between a maximum matching and a minimum vertex
450
    cover in bipartite graphs.
451

452
    Since a minimum vertex cover is the complement of a maximum independent set
453
    for any graph, one can compute the maximum independent set of a bipartite
454
    graph this way:
455

456
    >>> import networkx as nx
457
    >>> G = nx.complete_bipartite_graph(2, 3)
458
    >>> matching = nx.bipartite.maximum_matching(G)
459
    >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching)
460
    >>> independent_set = set(G) - vertex_cover
461
    >>> print(list(independent_set))
462
    [2, 3, 4]
463

464
    See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
465
    for further details on how bipartite graphs are handled in NetworkX.
466

467
    """
468
    # This is a Python implementation of the algorithm described at
469
    # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>.
470
    L, R = bipartite_sets(G, top_nodes)
471
    # Let U be the set of unmatched vertices in the left vertex set.
472
    unmatched_vertices = set(G) - set(matching)
473
    U = unmatched_vertices & L
474
    # Let Z be the set of vertices that are either in U or are connected to U
475
    # by alternating paths.
476
    Z = _connected_by_alternating_paths(G, matching, U)
477
    # At this point, every edge either has a right endpoint in Z or a left
478
    # endpoint not in Z. This gives us the vertex cover.
479
    return (L - Z) | (R & Z)
480

    
481

    
482
#: Returns the maximum cardinality matching in the given bipartite graph.
483
#:
484
#: This function is simply an alias for :func:`hopcroft_karp_matching`.
485
maximum_matching = hopcroft_karp_matching