Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / clique.py @ 5cef0f13

History | View | Annotate | Download (18.1 KB)

1
#    Copyright (C) 2004-2019 by
2
#    Aric Hagberg <hagberg@lanl.gov>
3
#    Dan Schult <dschult@colgate.edu>
4
#    Pieter Swart <swart@lanl.gov>
5
#    All rights reserved.
6
#    BSD license.
7
"""Functions for finding and manipulating cliques.
8

9
Finding the largest clique in a graph is NP-complete problem, so most of
10
these algorithms have an exponential running time; for more information,
11
see the Wikipedia article on the clique problem [1]_.
12

13
.. [1] clique problem:: https://en.wikipedia.org/wiki/Clique_problem
14

15
"""
16
from collections import deque
17
from itertools import chain
18
from itertools import combinations
19
from itertools import islice
20
try:
21
    from itertools import ifilter as filter
22
except ImportError:
23
    pass
24
import networkx as nx
25
from networkx.utils import not_implemented_for
26
__author__ = """Dan Schult (dschult@colgate.edu)"""
27
__all__ = ['find_cliques', 'find_cliques_recursive', 'make_max_clique_graph',
28
           'make_clique_bipartite', 'graph_clique_number',
29
           'graph_number_of_cliques', 'node_clique_number',
30
           'number_of_cliques', 'cliques_containing_node',
31
           'enumerate_all_cliques']
32

    
33

    
34
@not_implemented_for('directed')
35
def enumerate_all_cliques(G):
36
    """Returns all cliques in an undirected graph.
37

38
    This function returns an iterator over cliques, each of which is a
39
    list of nodes. The iteration is ordered by cardinality of the
40
    cliques: first all cliques of size one, then all cliques of size
41
    two, etc.
42

43
    Parameters
44
    ----------
45
    G : NetworkX graph
46
        An undirected graph.
47

48
    Returns
49
    -------
50
    iterator
51
        An iterator over cliques, each of which is a list of nodes in
52
        `G`. The cliques are ordered according to size.
53

54
    Notes
55
    -----
56
    To obtain a list of all cliques, use
57
    `list(enumerate_all_cliques(G))`. However, be aware that in the
58
    worst-case, the length of this list can be exponential in the number
59
    of nodes in the graph (for example, when the graph is the complete
60
    graph). This function avoids storing all cliques in memory by only
61
    keeping current candidate node lists in memory during its search.
62

63
    The implementation is adapted from the algorithm by Zhang, et
64
    al. (2005) [1]_ to output all cliques discovered.
65

66
    This algorithm ignores self-loops and parallel edges, since cliques
67
    are not conventionally defined with such edges.
68

69
    References
70
    ----------
71
    .. [1] Yun Zhang, Abu-Khzam, F.N., Baldwin, N.E., Chesler, E.J.,
72
           Langston, M.A., Samatova, N.F.,
73
           "Genome-Scale Computational Approaches to Memory-Intensive
74
           Applications in Systems Biology".
75
           *Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005
76
           Conference, pp. 12, 12--18 Nov. 2005.
77
           <https://doi.org/10.1109/SC.2005.29>.
78

79
    """
80
    index = {}
81
    nbrs = {}
82
    for u in G:
83
        index[u] = len(index)
84
        # Neighbors of u that appear after u in the iteration order of G.
85
        nbrs[u] = {v for v in G[u] if v not in index}
86

    
87
    queue = deque(([u], sorted(nbrs[u], key=index.__getitem__)) for u in G)
88
    # Loop invariants:
89
    # 1. len(base) is nondecreasing.
90
    # 2. (base + cnbrs) is sorted with respect to the iteration order of G.
91
    # 3. cnbrs is a set of common neighbors of nodes in base.
92
    while queue:
93
        base, cnbrs = map(list, queue.popleft())
94
        yield base
95
        for i, u in enumerate(cnbrs):
96
            # Use generators to reduce memory consumption.
97
            queue.append((chain(base, [u]),
98
                          filter(nbrs[u].__contains__,
99
                                 islice(cnbrs, i + 1, None))))
100

    
101

    
102
@not_implemented_for('directed')
103
def find_cliques(G):
104
    """Returns all maximal cliques in an undirected graph.
105

106
    For each node *v*, a *maximal clique for v* is a largest complete
107
    subgraph containing *v*. The largest maximal clique is sometimes
108
    called the *maximum clique*.
109

110
    This function returns an iterator over cliques, each of which is a
111
    list of nodes. It is an iterative implementation, so should not
112
    suffer from recursion depth issues.
113

114
    Parameters
115
    ----------
116
    G : NetworkX graph
117
        An undirected graph.
118

119
    Returns
120
    -------
121
    iterator
122
        An iterator over maximal cliques, each of which is a list of
123
        nodes in `G`. The order of cliques is arbitrary.
124

125
    See Also
126
    --------
127
    find_cliques_recursive
128
        A recursive version of the same algorithm.
129

130
    Notes
131
    -----
132
    To obtain a list of all maximal cliques, use
133
    `list(find_cliques(G))`. However, be aware that in the worst-case,
134
    the length of this list can be exponential in the number of nodes in
135
    the graph (for example, when the graph is the complete graph). This
136
    function avoids storing all cliques in memory by only keeping
137
    current candidate node lists in memory during its search.
138

139
    This implementation is based on the algorithm published by Bron and
140
    Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
141
    (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It
142
    essentially unrolls the recursion used in the references to avoid
143
    issues of recursion stack depth (for a recursive implementation, see
144
    :func:`find_cliques_recursive`).
145

146
    This algorithm ignores self-loops and parallel edges, since cliques
147
    are not conventionally defined with such edges.
148

149
    References
150
    ----------
151
    .. [1] Bron, C. and Kerbosch, J.
152
       "Algorithm 457: finding all cliques of an undirected graph".
153
       *Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
154
       <http://portal.acm.org/citation.cfm?doid=362342.362367>
155

156
    .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
157
       "The worst-case time complexity for generating all maximal
158
       cliques and computational experiments",
159
       *Theoretical Computer Science*, Volume 363, Issue 1,
160
       Computing and Combinatorics,
161
       10th Annual International Conference on
162
       Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
163
       <https://doi.org/10.1016/j.tcs.2006.06.015>
164

165
    .. [3] F. Cazals, C. Karande,
166
       "A note on the problem of reporting maximal cliques",
167
       *Theoretical Computer Science*,
168
       Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
169
       <https://doi.org/10.1016/j.tcs.2008.05.010>
170

171
    """
172
    if len(G) == 0:
173
        return
174

    
175
    adj = {u: {v for v in G[u] if v != u} for u in G}
176
    Q = [None]
177

    
178
    subg = set(G)
179
    cand = set(G)
180
    u = max(subg, key=lambda u: len(cand & adj[u]))
181
    ext_u = cand - adj[u]
182
    stack = []
183

    
184
    try:
185
        while True:
186
            if ext_u:
187
                q = ext_u.pop()
188
                cand.remove(q)
189
                Q[-1] = q
190
                adj_q = adj[q]
191
                subg_q = subg & adj_q
192
                if not subg_q:
193
                    yield Q[:]
194
                else:
195
                    cand_q = cand & adj_q
196
                    if cand_q:
197
                        stack.append((subg, cand, ext_u))
198
                        Q.append(None)
199
                        subg = subg_q
200
                        cand = cand_q
201
                        u = max(subg, key=lambda u: len(cand & adj[u]))
202
                        ext_u = cand - adj[u]
203
            else:
204
                Q.pop()
205
                subg, cand, ext_u = stack.pop()
206
    except IndexError:
207
        pass
208

    
209

    
210
# TODO Should this also be not implemented for directed graphs?
211
def find_cliques_recursive(G):
212
    """Returns all maximal cliques in a graph.
213

214
    For each node *v*, a *maximal clique for v* is a largest complete
215
    subgraph containing *v*. The largest maximal clique is sometimes
216
    called the *maximum clique*.
217

218
    This function returns an iterator over cliques, each of which is a
219
    list of nodes. It is a recursive implementation, so may suffer from
220
    recursion depth issues.
221

222
    Parameters
223
    ----------
224
    G : NetworkX graph
225

226
    Returns
227
    -------
228
    iterator
229
        An iterator over maximal cliques, each of which is a list of
230
        nodes in `G`. The order of cliques is arbitrary.
231

232
    See Also
233
    --------
234
    find_cliques
235
        An iterative version of the same algorithm.
236

237
    Notes
238
    -----
239
    To obtain a list of all maximal cliques, use
240
    `list(find_cliques_recursive(G))`. However, be aware that in the
241
    worst-case, the length of this list can be exponential in the number
242
    of nodes in the graph (for example, when the graph is the complete
243
    graph). This function avoids storing all cliques in memory by only
244
    keeping current candidate node lists in memory during its search.
245

246
    This implementation is based on the algorithm published by Bron and
247
    Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
248
    (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a
249
    non-recursive implementation, see :func:`find_cliques`.
250

251
    This algorithm ignores self-loops and parallel edges, since cliques
252
    are not conventionally defined with such edges.
253

254
    References
255
    ----------
256
    .. [1] Bron, C. and Kerbosch, J.
257
       "Algorithm 457: finding all cliques of an undirected graph".
258
       *Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
259
       <http://portal.acm.org/citation.cfm?doid=362342.362367>
260

261
    .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
262
       "The worst-case time complexity for generating all maximal
263
       cliques and computational experiments",
264
       *Theoretical Computer Science*, Volume 363, Issue 1,
265
       Computing and Combinatorics,
266
       10th Annual International Conference on
267
       Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
268
       <https://doi.org/10.1016/j.tcs.2006.06.015>
269

270
    .. [3] F. Cazals, C. Karande,
271
       "A note on the problem of reporting maximal cliques",
272
       *Theoretical Computer Science*,
273
       Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
274
       <https://doi.org/10.1016/j.tcs.2008.05.010>
275

276
    """
277
    if len(G) == 0:
278
        return iter([])
279

    
280
    adj = {u: {v for v in G[u] if v != u} for u in G}
281
    Q = []
282

    
283
    def expand(subg, cand):
284
        u = max(subg, key=lambda u: len(cand & adj[u]))
285
        for q in cand - adj[u]:
286
            cand.remove(q)
287
            Q.append(q)
288
            adj_q = adj[q]
289
            subg_q = subg & adj_q
290
            if not subg_q:
291
                yield Q[:]
292
            else:
293
                cand_q = cand & adj_q
294
                if cand_q:
295
                    for clique in expand(subg_q, cand_q):
296
                        yield clique
297
            Q.pop()
298

    
299
    return expand(set(G), set(G))
300

    
301

    
302
def make_max_clique_graph(G, create_using=None):
303
    """Returns the maximal clique graph of the given graph.
304

305
    The nodes of the maximal clique graph of `G` are the cliques of
306
    `G` and an edge joins two cliques if the cliques are not disjoint.
307

308
    Parameters
309
    ----------
310
    G : NetworkX graph
311

312
    create_using : NetworkX graph constructor, optional (default=nx.Graph)
313
       Graph type to create. If graph instance, then cleared before populated.
314

315
    Returns
316
    -------
317
    NetworkX graph
318
        A graph whose nodes are the cliques of `G` and whose edges
319
        join two cliques if they are not disjoint.
320

321
    Notes
322
    -----
323
    This function behaves like the following code::
324

325
        import networkx as nx
326
        G = nx.make_clique_bipartite(G)
327
        cliques = [v for v in G.nodes() if G.nodes[v]['bipartite'] == 0]
328
        G = nx.bipartite.project(G, cliques)
329
        G = nx.relabel_nodes(G, {-v: v - 1 for v in G})
330

331
    It should be faster, though, since it skips all the intermediate
332
    steps.
333

334
    """
335
    if create_using is None:
336
        B = G.__class__()
337
    else:
338
        B = nx.empty_graph(0, create_using)
339
    cliques = list(enumerate(set(c) for c in find_cliques(G)))
340
    # Add a numbered node for each clique.
341
    B.add_nodes_from(i for i, c in cliques)
342
    # Join cliques by an edge if they share a node.
343
    clique_pairs = combinations(cliques, 2)
344
    B.add_edges_from((i, j) for (i, c1), (j, c2) in clique_pairs if c1 & c2)
345
    return B
346

    
347

    
348
def make_clique_bipartite(G, fpos=None, create_using=None, name=None):
349
    """Returns the bipartite clique graph corresponding to `G`.
350

351
    In the returned bipartite graph, the "bottom" nodes are the nodes of
352
    `G` and the "top" nodes represent the maximal cliques of `G`.
353
    There is an edge from node *v* to clique *C* in the returned graph
354
    if and only if *v* is an element of *C*.
355

356
    Parameters
357
    ----------
358
    G : NetworkX graph
359
        An undirected graph.
360

361
    fpos : bool
362
        If True or not None, the returned graph will have an
363
        additional attribute, `pos`, a dictionary mapping node to
364
        position in the Euclidean plane.
365

366
    create_using : NetworkX graph constructor, optional (default=nx.Graph)
367
       Graph type to create. If graph instance, then cleared before populated.
368

369
    Returns
370
    -------
371
    NetworkX graph
372
        A bipartite graph whose "bottom" set is the nodes of the graph
373
        `G`, whose "top" set is the cliques of `G`, and whose edges
374
        join nodes of `G` to the cliques that contain them.
375

376
        The nodes of the graph `G` have the node attribute
377
        'bipartite' set to 1 and the nodes representing cliques
378
        have the node attribute 'bipartite' set to 0, as is the
379
        convention for bipartite graphs in NetworkX.
380

381
    """
382
    B = nx.empty_graph(0, create_using)
383
    B.clear()
384
    # The "bottom" nodes in the bipartite graph are the nodes of the
385
    # original graph, G.
386
    B.add_nodes_from(G, bipartite=1)
387
    for i, cl in enumerate(find_cliques(G)):
388
        # The "top" nodes in the bipartite graph are the cliques. These
389
        # nodes get negative numbers as labels.
390
        name = -i - 1
391
        B.add_node(name, bipartite=0)
392
        B.add_edges_from((v, name) for v in cl)
393
    return B
394

    
395

    
396
def graph_clique_number(G, cliques=None):
397
    """Returns the clique number of the graph.
398

399
    The *clique number* of a graph is the size of the largest clique in
400
    the graph.
401

402
    Parameters
403
    ----------
404
    G : NetworkX graph
405
        An undirected graph.
406

407
    cliques : list
408
        A list of cliques, each of which is itself a list of nodes. If
409
        not specified, the list of all cliques will be computed, as by
410
        :func:`find_cliques`.
411

412
    Returns
413
    -------
414
    int
415
        The size of the largest clique in `G`.
416

417
    Notes
418
    -----
419
    You should provide `cliques` if you have already computed the list
420
    of maximal cliques, in order to avoid an exponential time search for
421
    maximal cliques.
422

423
    """
424
    if cliques is None:
425
        cliques = find_cliques(G)
426
    if len(G.nodes) < 1:
427
        return 0
428
    return max([len(c) for c in cliques] or [1])
429

    
430

    
431
def graph_number_of_cliques(G, cliques=None):
432
    """Returns the number of maximal cliques in the graph.
433

434
    Parameters
435
    ----------
436
    G : NetworkX graph
437
        An undirected graph.
438

439
    cliques : list
440
        A list of cliques, each of which is itself a list of nodes. If
441
        not specified, the list of all cliques will be computed, as by
442
        :func:`find_cliques`.
443

444
    Returns
445
    -------
446
    int
447
        The number of maximal cliques in `G`.
448

449
    Notes
450
    -----
451
    You should provide `cliques` if you have already computed the list
452
    of maximal cliques, in order to avoid an exponential time search for
453
    maximal cliques.
454

455
    """
456
    if cliques is None:
457
        cliques = list(find_cliques(G))
458
    return len(cliques)
459

    
460

    
461
def node_clique_number(G, nodes=None, cliques=None):
462
    """ Returns the size of the largest maximal clique containing
463
    each given node.
464

465
    Returns a single or list depending on input nodes.
466
    Optional list of cliques can be input if already computed.
467
    """
468
    if cliques is None:
469
        if nodes is not None:
470
            # Use ego_graph to decrease size of graph
471
            if isinstance(nodes, list):
472
                d = {}
473
                for n in nodes:
474
                    H = nx.ego_graph(G, n)
475
                    d[n] = max((len(c) for c in find_cliques(H)))
476
            else:
477
                H = nx.ego_graph(G, nodes)
478
                d = max((len(c) for c in find_cliques(H)))
479
            return d
480
        # nodes is None--find all cliques
481
        cliques = list(find_cliques(G))
482

    
483
    if nodes is None:
484
        nodes = list(G.nodes())   # none, get entire graph
485

    
486
    if not isinstance(nodes, list):   # check for a list
487
        v = nodes
488
        # assume it is a single value
489
        d = max([len(c) for c in cliques if v in c])
490
    else:
491
        d = {}
492
        for v in nodes:
493
            d[v] = max([len(c) for c in cliques if v in c])
494
    return d
495

    
496
    # if nodes is None:                 # none, use entire graph
497
    #     nodes=G.nodes()
498
    # elif  not isinstance(nodes, list):    # check for a list
499
    #     nodes=[nodes]             # assume it is a single value
500

    
501
    # if cliques is None:
502
    #     cliques=list(find_cliques(G))
503
    # d={}
504
    # for v in nodes:
505
    #     d[v]=max([len(c) for c in cliques if v in c])
506

    
507
    # if nodes in G:
508
    #     return d[v] #return single value
509
    # return d
510

    
511

    
512
def number_of_cliques(G, nodes=None, cliques=None):
513
    """Returns the number of maximal cliques for each node.
514

515
    Returns a single or list depending on input nodes.
516
    Optional list of cliques can be input if already computed.
517
    """
518
    if cliques is None:
519
        cliques = list(find_cliques(G))
520

    
521
    if nodes is None:
522
        nodes = list(G.nodes())   # none, get entire graph
523

    
524
    if not isinstance(nodes, list):   # check for a list
525
        v = nodes
526
        # assume it is a single value
527
        numcliq = len([1 for c in cliques if v in c])
528
    else:
529
        numcliq = {}
530
        for v in nodes:
531
            numcliq[v] = len([1 for c in cliques if v in c])
532
    return numcliq
533

    
534

    
535
def cliques_containing_node(G, nodes=None, cliques=None):
536
    """Returns a list of cliques containing the given node.
537

538
    Returns a single list or list of lists depending on input nodes.
539
    Optional list of cliques can be input if already computed.
540
    """
541
    if cliques is None:
542
        cliques = list(find_cliques(G))
543

    
544
    if nodes is None:
545
        nodes = list(G.nodes())   # none, get entire graph
546

    
547
    if not isinstance(nodes, list):   # check for a list
548
        v = nodes
549
        # assume it is a single value
550
        vcliques = [c for c in cliques if v in c]
551
    else:
552
        vcliques = {}
553
        for v in nodes:
554
            vcliques[v] = [c for c in cliques if v in c]
555
    return vcliques