Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / cluster.py @ 5cef0f13

History | View | Annotate | Download (17.8 KB)

1
# -*- coding: utf-8 -*-
2
#
3
#    Copyright (C) 2004-2019 by
4
#    Aric Hagberg <hagberg@lanl.gov>
5
#    Dan Schult <dschult@colgate.edu>
6
#    Pieter Swart <swart@lanl.gov>
7
#    All rights reserved.
8
#    BSD license.
9
"""Algorithms to characterize the number of triangles in a graph."""
10
from __future__ import division
11

    
12
from itertools import chain
13
from itertools import combinations
14
from collections import Counter
15

    
16
import networkx as nx
17
from networkx.utils import not_implemented_for
18

    
19
__author__ = """\n""".join(['Aric Hagberg <aric.hagberg@gmail.com>',
20
                            'Dan Schult (dschult@colgate.edu)',
21
                            'Pieter Swart (swart@lanl.gov)',
22
                            'Jordi Torrents <jtorrents@milnou.net>'])
23

    
24
__all__ = ['triangles', 'average_clustering', 'clustering', 'transitivity',
25
           'square_clustering', 'generalized_degree']
26

    
27

    
28
@not_implemented_for('directed')
29
def triangles(G, nodes=None):
30
    """Compute the number of triangles.
31

32
    Finds the number of triangles that include a node as one vertex.
33

34
    Parameters
35
    ----------
36
    G : graph
37
       A networkx graph
38
    nodes : container of nodes, optional (default= all nodes in G)
39
       Compute triangles for nodes in this container.
40

41
    Returns
42
    -------
43
    out : dictionary
44
       Number of triangles keyed by node label.
45

46
    Examples
47
    --------
48
    >>> G=nx.complete_graph(5)
49
    >>> print(nx.triangles(G,0))
50
    6
51
    >>> print(nx.triangles(G))
52
    {0: 6, 1: 6, 2: 6, 3: 6, 4: 6}
53
    >>> print(list(nx.triangles(G,(0,1)).values()))
54
    [6, 6]
55

56
    Notes
57
    -----
58
    When computing triangles for the entire graph each triangle is counted
59
    three times, once at each node.  Self loops are ignored.
60

61
    """
62
    # If `nodes` represents a single node in the graph, return only its number
63
    # of triangles.
64
    if nodes in G:
65
        return next(_triangles_and_degree_iter(G, nodes))[2] // 2
66
    # Otherwise, `nodes` represents an iterable of nodes, so return a
67
    # dictionary mapping node to number of triangles.
68
    return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)}
69

    
70

    
71
@not_implemented_for('multigraph')
72
def _triangles_and_degree_iter(G, nodes=None):
73
    """ Return an iterator of (node, degree, triangles, generalized degree).
74

75
    This double counts triangles so you may want to divide by 2.
76
    See degree(), triangles() and generalized_degree() for definitions
77
    and details.
78

79
    """
80
    if nodes is None:
81
        nodes_nbrs = G.adj.items()
82
    else:
83
        nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
84

    
85
    for v, v_nbrs in nodes_nbrs:
86
        vs = set(v_nbrs) - {v}
87
        gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
88
        ntriangles = sum(k * val for k, val in gen_degree.items())
89
        yield (v, len(vs), ntriangles, gen_degree)
90

    
91

    
92
@not_implemented_for('multigraph')
93
def _weighted_triangles_and_degree_iter(G, nodes=None, weight='weight'):
94
    """ Return an iterator of (node, degree, weighted_triangles).
95

96
    Used for weighted clustering.
97

98
    """
99
    if weight is None or G.number_of_edges() == 0:
100
        max_weight = 1
101
    else:
102
        max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
103
    if nodes is None:
104
        nodes_nbrs = G.adj.items()
105
    else:
106
        nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
107

    
108
    def wt(u, v):
109
        return G[u][v].get(weight, 1) / max_weight
110

    
111
    for i, nbrs in nodes_nbrs:
112
        inbrs = set(nbrs) - {i}
113
        weighted_triangles = 0
114
        seen = set()
115
        for j in inbrs:
116
            seen.add(j)
117
            # This prevents double counting.
118
            jnbrs = set(G[j]) - seen
119
            # Only compute the edge weight once, before the inner inner
120
            # loop.
121
            wij = wt(i, j)
122
            weighted_triangles += sum((wij * wt(j, k) * wt(k, i)) ** (1 / 3)
123
                                      for k in inbrs & jnbrs)
124
        yield (i, len(inbrs), 2 * weighted_triangles)
125

    
126

    
127
@not_implemented_for('multigraph')
128
def _directed_triangles_and_degree_iter(G, nodes=None):
129
    """ Return an iterator of
130
    (node, total_degree, reciprocal_degree, directed_triangles).
131

132
    Used for directed clustering.
133

134
    """
135
    nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
136

    
137
    for i, preds, succs in nodes_nbrs:
138
        ipreds = set(preds) - {i}
139
        isuccs = set(succs) - {i}
140

    
141
        directed_triangles = 0
142
        for j in chain(ipreds, isuccs):
143
            jpreds = set(G._pred[j]) - {j}
144
            jsuccs = set(G._succ[j]) - {j}
145
            directed_triangles += sum((1 for k in
146
                                       chain((ipreds & jpreds),
147
                                             (ipreds & jsuccs),
148
                                             (isuccs & jpreds),
149
                                             (isuccs & jsuccs))))
150
        dtotal = len(ipreds) + len(isuccs)
151
        dbidirectional = len(ipreds & isuccs)
152
        yield (i, dtotal, dbidirectional, directed_triangles)
153

    
154

    
155
@not_implemented_for('multigraph')
156
def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight = 'weight'):
157
    """ Return an iterator of
158
    (node, total_degree, reciprocal_degree, directed_weighted_triangles).
159

160
    Used for directed weighted clustering.
161

162
    """
163
    if weight is None or G.number_of_edges() == 0:
164
        max_weight = 1
165
    else:
166
        max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
167

    
168
    nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
169

    
170
    def wt(u, v):
171
        return G[u][v].get(weight, 1) / max_weight
172

    
173
    for i, preds, succs in nodes_nbrs:
174
        ipreds = set(preds) - {i}
175
        isuccs = set(succs) - {i}
176

    
177
        directed_triangles = 0
178
        for j in ipreds:
179
            jpreds = set(G._pred[j]) - {j}
180
            jsuccs = set(G._succ[j]) - {j}
181
            directed_triangles += sum((wt(j, i) * wt(k, i) * wt(k, j))**(1 / 3)
182
                                      for k in ipreds & jpreds)
183
            directed_triangles += sum((wt(j, i) * wt(k, i) * wt(j, k))**(1 / 3)
184
                                      for k in ipreds & jsuccs)
185
            directed_triangles += sum((wt(j, i) * wt(i, k) * wt(k, j))**(1 / 3)
186
                                      for k in isuccs & jpreds)
187
            directed_triangles += sum((wt(j, i) * wt(i, k) * wt(j, k))**(1 / 3)
188
                                      for k in isuccs & jsuccs)
189

    
190
        for j in isuccs:
191
            jpreds = set(G._pred[j]) - {j}
192
            jsuccs = set(G._succ[j]) - {j}
193
            directed_triangles += sum((wt(i, j) * wt(k, i) * wt(k, j))**(1 / 3)
194
                                      for k in ipreds & jpreds)
195
            directed_triangles += sum((wt(i, j) * wt(k, i) * wt(j, k))**(1 / 3)
196
                                      for k in ipreds & jsuccs)
197
            directed_triangles += sum((wt(i, j) * wt(i, k) * wt(k, j))**(1 / 3)
198
                                      for k in isuccs & jpreds)
199
            directed_triangles += sum((wt(i, j) * wt(i, k) * wt(j, k))**(1 / 3)
200
                                      for k in isuccs & jsuccs)
201

    
202
        dtotal = len(ipreds) + len(isuccs)
203
        dbidirectional = len(ipreds & isuccs)
204
        yield (i, dtotal, dbidirectional, directed_triangles)
205

    
206

    
207
def average_clustering(G, nodes=None, weight=None, count_zeros=True):
208
    r"""Compute the average clustering coefficient for the graph G.
209

210
    The clustering coefficient for the graph is the average,
211

212
    .. math::
213

214
       C = \frac{1}{n}\sum_{v \in G} c_v,
215

216
    where :math:`n` is the number of nodes in `G`.
217

218
    Parameters
219
    ----------
220
    G : graph
221

222
    nodes : container of nodes, optional (default=all nodes in G)
223
       Compute average clustering for nodes in this container.
224

225
    weight : string or None, optional (default=None)
226
       The edge attribute that holds the numerical value used as a weight.
227
       If None, then each edge has weight 1.
228

229
    count_zeros : bool
230
       If False include only the nodes with nonzero clustering in the average.
231

232
    Returns
233
    -------
234
    avg : float
235
       Average clustering
236

237
    Examples
238
    --------
239
    >>> G=nx.complete_graph(5)
240
    >>> print(nx.average_clustering(G))
241
    1.0
242

243
    Notes
244
    -----
245
    This is a space saving routine; it might be faster
246
    to use the clustering function to get a list and then take the average.
247

248
    Self loops are ignored.
249

250
    References
251
    ----------
252
    .. [1] Generalizations of the clustering coefficient to weighted
253
       complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
254
       K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
255
       http://jponnela.com/web_documents/a9.pdf
256
    .. [2] Marcus Kaiser,  Mean clustering coefficients: the role of isolated
257
       nodes and leafs on clustering measures for small-world networks.
258
       https://arxiv.org/abs/0802.2512
259
    """
260
    c = clustering(G, nodes, weight=weight).values()
261
    if not count_zeros:
262
        c = [v for v in c if v > 0]
263
    return sum(c) / len(c)
264

    
265

    
266
def clustering(G, nodes=None, weight=None):
267
    r"""Compute the clustering coefficient for nodes.
268

269
    For unweighted graphs, the clustering of a node :math:`u`
270
    is the fraction of possible triangles through that node that exist,
271

272
    .. math::
273

274
      c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},
275

276
    where :math:`T(u)` is the number of triangles through node :math:`u` and
277
    :math:`deg(u)` is the degree of :math:`u`.
278

279
    For weighted graphs, there are several ways to define clustering [1]_.
280
    the one used here is defined
281
    as the geometric average of the subgraph edge weights [2]_,
282

283
    .. math::
284

285
       c_u = \frac{1}{deg(u)(deg(u)-1))}
286
             \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.
287

288
    The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight
289
    in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`.
290

291
    The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`.
292

293
    For directed graphs, the clustering is similarly defined as the fraction
294
    of all possible directed triangles or geometric average of the subgraph
295
    edge weights for unweighted and weighted directed graph respectively [3]_.
296

297
    .. math::
298

299
       c_u = \frac{1}{deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u)}
300
             T(u),
301

302
    where :math:`T(u)` is the number of directed triangles through node
303
    :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of
304
    :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of
305
    :math:`u`.
306

307
    Parameters
308
    ----------
309
    G : graph
310

311
    nodes : container of nodes, optional (default=all nodes in G)
312
       Compute clustering for nodes in this container.
313

314
    weight : string or None, optional (default=None)
315
       The edge attribute that holds the numerical value used as a weight.
316
       If None, then each edge has weight 1.
317

318
    Returns
319
    -------
320
    out : float, or dictionary
321
       Clustering coefficient at specified nodes
322

323
    Examples
324
    --------
325
    >>> G=nx.complete_graph(5)
326
    >>> print(nx.clustering(G,0))
327
    1.0
328
    >>> print(nx.clustering(G))
329
    {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
330

331
    Notes
332
    -----
333
    Self loops are ignored.
334

335
    References
336
    ----------
337
    .. [1] Generalizations of the clustering coefficient to weighted
338
       complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
339
       K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
340
       http://jponnela.com/web_documents/a9.pdf
341
    .. [2] Intensity and coherence of motifs in weighted complex
342
       networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
343
       Physical Review E, 71(6), 065103 (2005).
344
    .. [3] Clustering in complex directed networks by G. Fagiolo,
345
       Physical Review E, 76(2), 026107 (2007).
346
    """
347
    if G.is_directed():
348
        if weight is not None:
349
            td_iter = _directed_weighted_triangles_and_degree_iter(
350
                G, nodes, weight)
351
            clusterc = {v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
352
                        for v, dt, db, t in td_iter}
353
        else:
354
            td_iter = _directed_triangles_and_degree_iter(G, nodes)
355
            clusterc = {v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
356
                        for v, dt, db, t in td_iter}
357
    else:
358
        if weight is not None:
359
            td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight)
360
            clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for
361
                        v, d, t in td_iter}
362
        else:
363
            td_iter = _triangles_and_degree_iter(G, nodes)
364
            clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for
365
                        v, d, t, _ in td_iter}
366
    if nodes in G:
367
        # Return the value of the sole entry in the dictionary.
368
        return clusterc[nodes]
369
    return clusterc
370

    
371

    
372
def transitivity(G):
373
    r"""Compute graph transitivity, the fraction of all possible triangles
374
    present in G.
375

376
    Possible triangles are identified by the number of "triads"
377
    (two edges with a shared vertex).
378

379
    The transitivity is
380

381
    .. math::
382

383
        T = 3\frac{\#triangles}{\#triads}.
384

385
    Parameters
386
    ----------
387
    G : graph
388

389
    Returns
390
    -------
391
    out : float
392
       Transitivity
393

394
    Examples
395
    --------
396
    >>> G = nx.complete_graph(5)
397
    >>> print(nx.transitivity(G))
398
    1.0
399
    """
400
    triangles = sum(t for v, d, t, _ in _triangles_and_degree_iter(G))
401
    contri = sum(d * (d - 1) for v, d, t, _ in _triangles_and_degree_iter(G))
402
    return 0 if triangles == 0 else triangles / contri
403

    
404

    
405
def square_clustering(G, nodes=None):
406
    r""" Compute the squares clustering coefficient for nodes.
407

408
    For each node return the fraction of possible squares that exist at
409
    the node [1]_
410

411
    .. math::
412
       C_4(v) = \frac{ \sum_{u=1}^{k_v}
413
       \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v}
414
       \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]},
415

416
    where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and
417
    :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u -
418
    (1+q_v(u,w)+\theta_{uv}))(k_w - (1+q_v(u,w)+\theta_{uw}))`, where
419
    :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0
420
    otherwise.
421

422
    Parameters
423
    ----------
424
    G : graph
425

426
    nodes : container of nodes, optional (default=all nodes in G)
427
       Compute clustering for nodes in this container.
428

429
    Returns
430
    -------
431
    c4 : dictionary
432
       A dictionary keyed by node with the square clustering coefficient value.
433

434
    Examples
435
    --------
436
    >>> G=nx.complete_graph(5)
437
    >>> print(nx.square_clustering(G,0))
438
    1.0
439
    >>> print(nx.square_clustering(G))
440
    {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
441

442
    Notes
443
    -----
444
    While :math:`C_3(v)` (triangle clustering) gives the probability that
445
    two neighbors of node v are connected with each other, :math:`C_4(v)` is
446
    the probability that two neighbors of node v share a common
447
    neighbor different from v. This algorithm can be applied to both
448
    bipartite and unipartite networks.
449

450
    References
451
    ----------
452
    .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005
453
        Cycles and clustering in bipartite networks.
454
        Physical Review E (72) 056127.
455
    """
456
    if nodes is None:
457
        node_iter = G
458
    else:
459
        node_iter = G.nbunch_iter(nodes)
460
    clustering = {}
461
    for v in node_iter:
462
        clustering[v] = 0
463
        potential = 0
464
        for u, w in combinations(G[v], 2):
465
            squares = len((set(G[u]) & set(G[w])) - set([v]))
466
            clustering[v] += squares
467
            degm = squares + 1
468
            if w in G[u]:
469
                degm += 1
470
            potential += (len(G[u]) - degm) * (len(G[w]) - degm) + squares
471
        if potential > 0:
472
            clustering[v] /= potential
473
    if nodes in G:
474
        # Return the value of the sole entry in the dictionary.
475
        return clustering[nodes]
476
    return clustering
477

    
478

    
479
@not_implemented_for('directed')
480
def generalized_degree(G, nodes=None):
481
    r""" Compute the generalized degree for nodes.
482

483
    For each node, the generalized degree shows how many edges of given
484
    triangle multiplicity the node is connected to. The triangle multiplicity
485
    of an edge is the number of triangles an edge participates in. The
486
    generalized degree of node :math:`i` can be written as a vector
487
    :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where
488
    :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that
489
    participate in :math:`j` triangles.
490

491
    Parameters
492
    ----------
493
    G : graph
494

495
    nodes : container of nodes, optional (default=all nodes in G)
496
       Compute the generalized degree for nodes in this container.
497

498
    Returns
499
    -------
500
    out : Counter, or dictionary of Counters
501
       Generalized degree of specified nodes. The Counter is keyed by edge
502
       triangle multiplicity.
503

504
    Examples
505
    --------
506
    >>> G=nx.complete_graph(5)
507
    >>> print(nx.generalized_degree(G,0))
508
    Counter({3: 4})
509
    >>> print(nx.generalized_degree(G))
510
    {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})}
511

512
    To recover the number of triangles attached to a node:
513

514
    >>> k1 = nx.generalized_degree(G,0)
515
    >>> sum([k*v for k,v in k1.items()])/2 == nx.triangles(G,0)
516
    True
517

518
    Notes
519
    -----
520
    In a network of N nodes, the highest triangle multiplicty an edge can have
521
    is N-2.
522

523
    The return value does not include a `zero` entry if no edges of a
524
    particular triangle multiplicity are present.
525

526
    The number of triangles node :math:`i` is attached to can be recovered from
527
    the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc,
528
    k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`.
529

530
    References
531
    ----------
532
    .. [1] Networks with arbitrary edge multiplicities by V. Zlatić,
533
        D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters),
534
        Volume 97, Number 2 (2012).
535
        https://iopscience.iop.org/article/10.1209/0295-5075/97/28005
536
    """
537
    if nodes in G:
538
        return next(_triangles_and_degree_iter(G, nodes))[3]
539
    return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)}