Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / centrality / betweenness.py @ 5cef0f13

History | View | Annotate | Download (12.1 KB)

1
# coding=utf8
2
#    Copyright (C) 2004-2019 by
3
#    Aric Hagberg <hagberg@lanl.gov>
4
#    Dan Schult <dschult@colgate.edu>
5
#    Pieter Swart <swart@lanl.gov>
6
#    All rights reserved.
7
#    BSD license.
8
#
9
# Author: Aric Hagberg (hagberg@lanl.gov)
10
"""Betweenness centrality measures."""
11
from __future__ import division
12
from heapq import heappush, heappop
13
from itertools import count
14

    
15
import networkx as nx
16
from networkx.utils import py_random_state
17
from networkx.utils.decorators import not_implemented_for
18

    
19
__all__ = ['betweenness_centrality', 'edge_betweenness_centrality',
20
           'edge_betweenness']
21

    
22

    
23
@py_random_state(5)
24
@not_implemented_for('multigraph')
25
def betweenness_centrality(G, k=None, normalized=True, weight=None,
26
                           endpoints=False, seed=None):
27
    r"""Compute the shortest-path betweenness centrality for nodes.
28

29
    Betweenness centrality of a node $v$ is the sum of the
30
    fraction of all-pairs shortest paths that pass through $v$
31

32
    .. math::
33

34
       c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
35

36
    where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
37
    shortest $(s, t)$-paths,  and $\sigma(s, t|v)$ is the number of
38
    those paths  passing through some  node $v$ other than $s, t$.
39
    If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
40
    $\sigma(s, t|v) = 0$ [2]_.
41

42
    Parameters
43
    ----------
44
    G : graph
45
      A NetworkX graph.
46

47
    k : int, optional (default=None)
48
      If k is not None use k node samples to estimate betweenness.
49
      The value of k <= n where n is the number of nodes in the graph.
50
      Higher values give better approximation.
51

52
    normalized : bool, optional
53
      If True the betweenness values are normalized by `2/((n-1)(n-2))`
54
      for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
55
      is the number of nodes in G.
56

57
    weight : None or string, optional (default=None)
58
      If None, all edge weights are considered equal.
59
      Otherwise holds the name of the edge attribute used as weight.
60

61
    endpoints : bool, optional
62
      If True include the endpoints in the shortest path counts.
63

64
    seed : integer, random_state, or None (default)
65
        Indicator of random number generation state.
66
        See :ref:`Randomness<randomness>`.
67
        Note that this is only used if k is not None.
68

69
    Returns
70
    -------
71
    nodes : dictionary
72
       Dictionary of nodes with betweenness centrality as the value.
73

74
    See Also
75
    --------
76
    edge_betweenness_centrality
77
    load_centrality
78

79
    Notes
80
    -----
81
    The algorithm is from Ulrik Brandes [1]_.
82
    See [4]_ for the original first published version and [2]_ for details on
83
    algorithms for variations and related metrics.
84

85
    For approximate betweenness calculations set k=#samples to use
86
    k nodes ("pivots") to estimate the betweenness values. For an estimate
87
    of the number of pivots needed see [3]_.
88

89
    For weighted graphs the edge weights must be greater than zero.
90
    Zero edge weights can produce an infinite number of equal length
91
    paths between pairs of nodes.
92

93
    References
94
    ----------
95
    .. [1] Ulrik Brandes:
96
       A Faster Algorithm for Betweenness Centrality.
97
       Journal of Mathematical Sociology 25(2):163-177, 2001.
98
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
99
    .. [2] Ulrik Brandes:
100
       On Variants of Shortest-Path Betweenness
101
       Centrality and their Generic Computation.
102
       Social Networks 30(2):136-145, 2008.
103
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
104
    .. [3] Ulrik Brandes and Christian Pich:
105
       Centrality Estimation in Large Networks.
106
       International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
107
       http://www.inf.uni-konstanz.de/algo/publications/bp-celn-06.pdf
108
    .. [4] Linton C. Freeman:
109
       A set of measures of centrality based on betweenness.
110
       Sociometry 40: 35–41, 1977
111
       http://moreno.ss.uci.edu/23.pdf
112
    """
113
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
114
    if k is None:
115
        nodes = G
116
    else:
117
        nodes = seed.sample(G.nodes(), k)
118
    for s in nodes:
119
        # single source shortest paths
120
        if weight is None:  # use BFS
121
            S, P, sigma = _single_source_shortest_path_basic(G, s)
122
        else:  # use Dijkstra's algorithm
123
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
124
        # accumulation
125
        if endpoints:
126
            betweenness = _accumulate_endpoints(betweenness, S, P, sigma, s)
127
        else:
128
            betweenness = _accumulate_basic(betweenness, S, P, sigma, s)
129
    # rescaling
130
    betweenness = _rescale(betweenness, len(G), normalized=normalized,
131
                           directed=G.is_directed(), k=k, endpoints=endpoints)
132
    return betweenness
133

    
134

    
135
@py_random_state(4)
136
def edge_betweenness_centrality(G, k=None, normalized=True, weight=None,
137
                                seed=None):
138
    r"""Compute betweenness centrality for edges.
139

140
    Betweenness centrality of an edge $e$ is the sum of the
141
    fraction of all-pairs shortest paths that pass through $e$
142

143
    .. math::
144

145
       c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
146

147
    where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
148
    shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of
149
    those paths passing through edge $e$ [2]_.
150

151
    Parameters
152
    ----------
153
    G : graph
154
      A NetworkX graph.
155

156
    k : int, optional (default=None)
157
      If k is not None use k node samples to estimate betweenness.
158
      The value of k <= n where n is the number of nodes in the graph.
159
      Higher values give better approximation.
160

161
    normalized : bool, optional
162
      If True the betweenness values are normalized by $2/(n(n-1))$
163
      for graphs, and $1/(n(n-1))$ for directed graphs where $n$
164
      is the number of nodes in G.
165

166
    weight : None or string, optional (default=None)
167
      If None, all edge weights are considered equal.
168
      Otherwise holds the name of the edge attribute used as weight.
169

170
    seed : integer, random_state, or None (default)
171
        Indicator of random number generation state.
172
        See :ref:`Randomness<randomness>`.
173
        Note that this is only used if k is not None.
174

175
    Returns
176
    -------
177
    edges : dictionary
178
       Dictionary of edges with betweenness centrality as the value.
179

180
    See Also
181
    --------
182
    betweenness_centrality
183
    edge_load
184

185
    Notes
186
    -----
187
    The algorithm is from Ulrik Brandes [1]_.
188

189
    For weighted graphs the edge weights must be greater than zero.
190
    Zero edge weights can produce an infinite number of equal length
191
    paths between pairs of nodes.
192

193
    References
194
    ----------
195
    .. [1]  A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,
196
       Journal of Mathematical Sociology 25(2):163-177, 2001.
197
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
198
    .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
199
       Centrality and their Generic Computation.
200
       Social Networks 30(2):136-145, 2008.
201
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
202
    """
203
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
204
    # b[e]=0 for e in G.edges()
205
    betweenness.update(dict.fromkeys(G.edges(), 0.0))
206
    if k is None:
207
        nodes = G
208
    else:
209
        nodes = seed.sample(G.nodes(), k)
210
    for s in nodes:
211
        # single source shortest paths
212
        if weight is None:  # use BFS
213
            S, P, sigma = _single_source_shortest_path_basic(G, s)
214
        else:  # use Dijkstra's algorithm
215
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
216
        # accumulation
217
        betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
218
    # rescaling
219
    for n in G:  # remove nodes to only return edges
220
        del betweenness[n]
221
    betweenness = _rescale_e(betweenness, len(G), normalized=normalized,
222
                             directed=G.is_directed())
223
    return betweenness
224

    
225
# obsolete name
226

    
227

    
228
def edge_betweenness(G, k=None, normalized=True, weight=None, seed=None):
229
    return edge_betweenness_centrality(G, k, normalized, weight, seed)
230

    
231

    
232
# helpers for betweenness centrality
233

    
234
def _single_source_shortest_path_basic(G, s):
235
    S = []
236
    P = {}
237
    for v in G:
238
        P[v] = []
239
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
240
    D = {}
241
    sigma[s] = 1.0
242
    D[s] = 0
243
    Q = [s]
244
    while Q:   # use BFS to find shortest paths
245
        v = Q.pop(0)
246
        S.append(v)
247
        Dv = D[v]
248
        sigmav = sigma[v]
249
        for w in G[v]:
250
            if w not in D:
251
                Q.append(w)
252
                D[w] = Dv + 1
253
            if D[w] == Dv + 1:   # this is a shortest path, count paths
254
                sigma[w] += sigmav
255
                P[w].append(v)  # predecessors
256
    return S, P, sigma
257

    
258

    
259
def _single_source_dijkstra_path_basic(G, s, weight):
260
    # modified from Eppstein
261
    S = []
262
    P = {}
263
    for v in G:
264
        P[v] = []
265
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
266
    D = {}
267
    sigma[s] = 1.0
268
    push = heappush
269
    pop = heappop
270
    seen = {s: 0}
271
    c = count()
272
    Q = []   # use Q as heap with (distance,node id) tuples
273
    push(Q, (0, next(c), s, s))
274
    while Q:
275
        (dist, _, pred, v) = pop(Q)
276
        if v in D:
277
            continue  # already searched this node.
278
        sigma[v] += sigma[pred]  # count paths
279
        S.append(v)
280
        D[v] = dist
281
        for w, edgedata in G[v].items():
282
            vw_dist = dist + edgedata.get(weight, 1)
283
            if w not in D and (w not in seen or vw_dist < seen[w]):
284
                seen[w] = vw_dist
285
                push(Q, (vw_dist, next(c), v, w))
286
                sigma[w] = 0.0
287
                P[w] = [v]
288
            elif vw_dist == seen[w]:  # handle equal paths
289
                sigma[w] += sigma[v]
290
                P[w].append(v)
291
    return S, P, sigma
292

    
293

    
294
def _accumulate_basic(betweenness, S, P, sigma, s):
295
    delta = dict.fromkeys(S, 0)
296
    while S:
297
        w = S.pop()
298
        coeff = (1 + delta[w]) / sigma[w]
299
        for v in P[w]:
300
            delta[v] += sigma[v] * coeff
301
        if w != s:
302
            betweenness[w] += delta[w]
303
    return betweenness
304

    
305

    
306
def _accumulate_endpoints(betweenness, S, P, sigma, s):
307
    betweenness[s] += len(S) - 1
308
    delta = dict.fromkeys(S, 0)
309
    while S:
310
        w = S.pop()
311
        coeff = (1 + delta[w]) / sigma[w]
312
        for v in P[w]:
313
            delta[v] += sigma[v] * coeff
314
        if w != s:
315
            betweenness[w] += delta[w] + 1
316
    return betweenness
317

    
318

    
319
def _accumulate_edges(betweenness, S, P, sigma, s):
320
    delta = dict.fromkeys(S, 0)
321
    while S:
322
        w = S.pop()
323
        coeff = (1 + delta[w]) / sigma[w]
324
        for v in P[w]:
325
            c = sigma[v] * coeff
326
            if (v, w) not in betweenness:
327
                betweenness[(w, v)] += c
328
            else:
329
                betweenness[(v, w)] += c
330
            delta[v] += c
331
        if w != s:
332
            betweenness[w] += delta[w]
333
    return betweenness
334

    
335

    
336
def _rescale(betweenness, n, normalized,
337
             directed=False, k=None, endpoints=False):
338
    if normalized:
339
        if endpoints:
340
            if n < 2:
341
                scale = None  # no normalization
342
            else:
343
                # Scale factor should include endpoint nodes
344
                scale = 1 / (n * (n - 1))
345
        elif n <= 2:
346
            scale = None  # no normalization b=0 for all nodes
347
        else:
348
            scale = 1 / ((n - 1) * (n - 2))
349
    else:  # rescale by 2 for undirected graphs
350
        if not directed:
351
            scale = 0.5
352
        else:
353
            scale = None
354
    if scale is not None:
355
        if k is not None:
356
            scale = scale * n / k
357
        for v in betweenness:
358
            betweenness[v] *= scale
359
    return betweenness
360

    
361

    
362
def _rescale_e(betweenness, n, normalized, directed=False, k=None):
363
    if normalized:
364
        if n <= 1:
365
            scale = None  # no normalization b=0 for all nodes
366
        else:
367
            scale = 1 / (n * (n - 1))
368
    else:  # rescale by 2 for undirected graphs
369
        if not directed:
370
            scale = 0.5
371
        else:
372
            scale = None
373
    if scale is not None:
374
        if k is not None:
375
            scale = scale * n / k
376
        for v in betweenness:
377
            betweenness[v] *= scale
378
    return betweenness