Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / centrality / current_flow_betweenness.py @ 5cef0f13

History | View | Annotate | Download (13.3 KB)

1
#    Copyright (C) 2010-2019 by
2
#    Aric Hagberg <hagberg@lanl.gov>
3
#    Dan Schult <dschult@colgate.edu>
4
#    Pieter Swart <swart@lanl.gov>
5
#    All rights reserved.
6
#    BSD license.
7
#
8
# Author: Aric Hagberg (hagberg@lanl.gov)
9
"""Current-flow betweenness centrality measures."""
10
import networkx as nx
11
from networkx.algorithms.centrality.flow_matrix import *
12
from networkx.utils import (not_implemented_for,
13
                            reverse_cuthill_mckee_ordering,
14
                            py_random_state)
15

    
16
__all__ = ['current_flow_betweenness_centrality',
17
           'approximate_current_flow_betweenness_centrality',
18
           'edge_current_flow_betweenness_centrality']
19

    
20

    
21
@py_random_state(7)
22
@not_implemented_for('directed')
23
def approximate_current_flow_betweenness_centrality(G, normalized=True,
24
                                                    weight=None,
25
                                                    dtype=float, solver='full',
26
                                                    epsilon=0.5, kmax=10000,
27
                                                    seed=None):
28
    r"""Compute the approximate current-flow betweenness centrality for nodes.
29

30
    Approximates the current-flow betweenness centrality within absolute
31
    error of epsilon with high probability [1]_.
32

33

34
    Parameters
35
    ----------
36
    G : graph
37
      A NetworkX graph
38

39
    normalized : bool, optional (default=True)
40
      If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
41
      n is the number of nodes in G.
42

43
    weight : string or None, optional (default=None)
44
      Key for edge data used as the edge weight.
45
      If None, then use 1 as each edge weight.
46

47
    dtype : data type (float)
48
      Default data type for internal matrices.
49
      Set to np.float32 for lower memory consumption.
50

51
    solver : string (default='lu')
52
       Type of linear solver to use for computing the flow matrix.
53
       Options are "full" (uses most memory), "lu" (recommended), and
54
       "cg" (uses least memory).
55

56
    epsilon: float
57
        Absolute error tolerance.
58

59
    kmax: int
60
       Maximum number of sample node pairs to use for approximation.
61

62
    seed : integer, random_state, or None (default)
63
        Indicator of random number generation state.
64
        See :ref:`Randomness<randomness>`.
65

66
    Returns
67
    -------
68
    nodes : dictionary
69
       Dictionary of nodes with betweenness centrality as the value.
70

71
    See Also
72
    --------
73
    current_flow_betweenness_centrality
74

75
    Notes
76
    -----
77
    The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$
78
    and the space required is $O(m)$ for $n$ nodes and $m$ edges.
79

80
    If the edges have a 'weight' attribute they will be used as
81
    weights in this algorithm.  Unspecified weights are set to 1.
82

83
    References
84
    ----------
85
    .. [1] Ulrik Brandes and Daniel Fleischer:
86
       Centrality Measures Based on Current Flow.
87
       Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
88
       LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
89
       http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
90
    """
91
    try:
92
        import numpy as np
93
    except ImportError:
94
        raise ImportError('current_flow_betweenness_centrality requires NumPy ',
95
                          'http://scipy.org/')
96
    try:
97
        from scipy import sparse
98
        from scipy.sparse import linalg
99
    except ImportError:
100
        raise ImportError('current_flow_betweenness_centrality requires SciPy ',
101
                          'http://scipy.org/')
102
    if not nx.is_connected(G):
103
        raise nx.NetworkXError("Graph not connected.")
104
    solvername = {"full": FullInverseLaplacian,
105
                  "lu": SuperLUInverseLaplacian,
106
                  "cg": CGInverseLaplacian}
107
    n = G.number_of_nodes()
108
    ordering = list(reverse_cuthill_mckee_ordering(G))
109
    # make a copy with integer labels according to rcm ordering
110
    # this could be done without a copy if we really wanted to
111
    H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
112
    L = laplacian_sparse_matrix(H, nodelist=range(n), weight=weight,
113
                                dtype=dtype, format='csc')
114
    C = solvername[solver](L, dtype=dtype)  # initialize solver
115
    betweenness = dict.fromkeys(H, 0.0)
116
    nb = (n - 1.0) * (n - 2.0)  # normalization factor
117
    cstar = n * (n - 1) / nb
118
    l = 1  # parameter in approximation, adjustable
119
    k = l * int(np.ceil((cstar / epsilon)**2 * np.log(n)))
120
    if k > kmax:
121
        msg = 'Number random pairs k>kmax (%d>%d) ' % (k, kmax)
122
        raise nx.NetworkXError(msg, 'Increase kmax or epsilon')
123
    cstar2k = cstar / (2 * k)
124
    for i in range(k):
125
        s, t = seed.sample(range(n), 2)
126
        b = np.zeros(n, dtype=dtype)
127
        b[s] = 1
128
        b[t] = -1
129
        p = C.solve(b)
130
        for v in H:
131
            if v == s or v == t:
132
                continue
133
            for nbr in H[v]:
134
                w = H[v][nbr].get(weight, 1.0)
135
                betweenness[v] += w * np.abs(p[v] - p[nbr]) * cstar2k
136
    if normalized:
137
        factor = 1.0
138
    else:
139
        factor = nb / 2.0
140
    # remap to original node names and "unnormalize" if required
141
    return dict((ordering[k], float(v * factor)) for k, v in betweenness.items())
142

    
143

    
144
@not_implemented_for('directed')
145
def current_flow_betweenness_centrality(G, normalized=True, weight=None,
146
                                        dtype=float, solver='full'):
147
    r"""Compute current-flow betweenness centrality for nodes.
148

149
    Current-flow betweenness centrality uses an electrical current
150
    model for information spreading in contrast to betweenness
151
    centrality which uses shortest paths.
152

153
    Current-flow betweenness centrality is also known as
154
    random-walk betweenness centrality [2]_.
155

156
    Parameters
157
    ----------
158
    G : graph
159
      A NetworkX graph
160

161
    normalized : bool, optional (default=True)
162
      If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
163
      n is the number of nodes in G.
164

165
    weight : string or None, optional (default=None)
166
      Key for edge data used as the edge weight.
167
      If None, then use 1 as each edge weight.
168

169
    dtype : data type (float)
170
      Default data type for internal matrices.
171
      Set to np.float32 for lower memory consumption.
172

173
    solver : string (default='lu')
174
       Type of linear solver to use for computing the flow matrix.
175
       Options are "full" (uses most memory), "lu" (recommended), and
176
       "cg" (uses least memory).
177

178
    Returns
179
    -------
180
    nodes : dictionary
181
       Dictionary of nodes with betweenness centrality as the value.
182

183
    See Also
184
    --------
185
    approximate_current_flow_betweenness_centrality
186
    betweenness_centrality
187
    edge_betweenness_centrality
188
    edge_current_flow_betweenness_centrality
189

190
    Notes
191
    -----
192
    Current-flow betweenness can be computed in  $O(I(n-1)+mn \log n)$
193
    time [1]_, where $I(n-1)$ is the time needed to compute the
194
    inverse Laplacian.  For a full matrix this is $O(n^3)$ but using
195
    sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
196
    Laplacian matrix condition number.
197

198
    The space required is $O(nw)$ where $w$ is the width of the sparse
199
    Laplacian matrix.  Worse case is $w=n$ for $O(n^2)$.
200

201
    If the edges have a 'weight' attribute they will be used as
202
    weights in this algorithm.  Unspecified weights are set to 1.
203

204
    References
205
    ----------
206
    .. [1] Centrality Measures Based on Current Flow.
207
       Ulrik Brandes and Daniel Fleischer,
208
       Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
209
       LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
210
       http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
211

212
    .. [2] A measure of betweenness centrality based on random walks,
213
       M. E. J. Newman, Social Networks 27, 39-54 (2005).
214
    """
215
    try:
216
        import numpy as np
217
    except ImportError:
218
        raise ImportError('current_flow_betweenness_centrality requires NumPy ',
219
                          'http://scipy.org/')
220
    try:
221
        import scipy
222
    except ImportError:
223
        raise ImportError('current_flow_betweenness_centrality requires SciPy ',
224
                          'http://scipy.org/')
225
    if not nx.is_connected(G):
226
        raise nx.NetworkXError("Graph not connected.")
227
    n = G.number_of_nodes()
228
    ordering = list(reverse_cuthill_mckee_ordering(G))
229
    # make a copy with integer labels according to rcm ordering
230
    # this could be done without a copy if we really wanted to
231
    H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
232
    betweenness = dict.fromkeys(H, 0.0)  # b[v]=0 for v in H
233
    for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype,
234
                                       solver=solver):
235
        pos = dict(zip(row.argsort()[::-1], range(n)))
236
        for i in range(n):
237
            betweenness[s] += (i - pos[i]) * row[i]
238
            betweenness[t] += (n - i - 1 - pos[i]) * row[i]
239
    if normalized:
240
        nb = (n - 1.0) * (n - 2.0)  # normalization factor
241
    else:
242
        nb = 2.0
243
    for v in H:
244
        betweenness[v] = float((betweenness[v] - v) * 2.0 / nb)
245
    return dict((ordering[k], v) for k, v in betweenness.items())
246

    
247

    
248
@not_implemented_for('directed')
249
def edge_current_flow_betweenness_centrality(G, normalized=True,
250
                                             weight=None,
251
                                             dtype=float, solver='full'):
252
    r"""Compute current-flow betweenness centrality for edges.
253

254
    Current-flow betweenness centrality uses an electrical current
255
    model for information spreading in contrast to betweenness
256
    centrality which uses shortest paths.
257

258
    Current-flow betweenness centrality is also known as
259
    random-walk betweenness centrality [2]_.
260

261
    Parameters
262
    ----------
263
    G : graph
264
      A NetworkX graph
265

266
    normalized : bool, optional (default=True)
267
      If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
268
      n is the number of nodes in G.
269

270
    weight : string or None, optional (default=None)
271
      Key for edge data used as the edge weight.
272
      If None, then use 1 as each edge weight.
273

274
    dtype : data type (default=float)
275
      Default data type for internal matrices.
276
      Set to np.float32 for lower memory consumption.
277

278
    solver : string (default='lu')
279
       Type of linear solver to use for computing the flow matrix.
280
       Options are "full" (uses most memory), "lu" (recommended), and
281
       "cg" (uses least memory).
282

283
    Returns
284
    -------
285
    nodes : dictionary
286
       Dictionary of edge tuples with betweenness centrality as the value.
287

288
    Raises
289
    ------
290
    NetworkXError
291
        The algorithm does not support DiGraphs.
292
        If the input graph is an instance of DiGraph class, NetworkXError
293
        is raised.
294

295
    See Also
296
    --------
297
    betweenness_centrality
298
    edge_betweenness_centrality
299
    current_flow_betweenness_centrality
300

301
    Notes
302
    -----
303
    Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
304
    time [1]_, where $I(n-1)$ is the time needed to compute the
305
    inverse Laplacian.  For a full matrix this is $O(n^3)$ but using
306
    sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
307
    Laplacian matrix condition number.
308

309
    The space required is $O(nw)$ where $w$ is the width of the sparse
310
    Laplacian matrix.  Worse case is $w=n$ for $O(n^2)$.
311

312
    If the edges have a 'weight' attribute they will be used as
313
    weights in this algorithm.  Unspecified weights are set to 1.
314

315
    References
316
    ----------
317
    .. [1] Centrality Measures Based on Current Flow.
318
       Ulrik Brandes and Daniel Fleischer,
319
       Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
320
       LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
321
       http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
322

323
    .. [2] A measure of betweenness centrality based on random walks,
324
       M. E. J. Newman, Social Networks 27, 39-54 (2005).
325
    """
326
    from networkx.utils import reverse_cuthill_mckee_ordering
327
    try:
328
        import numpy as np
329
    except ImportError:
330
        raise ImportError('current_flow_betweenness_centrality requires NumPy ',
331
                          'http://scipy.org/')
332
    try:
333
        import scipy
334
    except ImportError:
335
        raise ImportError('current_flow_betweenness_centrality requires SciPy ',
336
                          'http://scipy.org/')
337
    if not nx.is_connected(G):
338
        raise nx.NetworkXError("Graph not connected.")
339
    n = G.number_of_nodes()
340
    ordering = list(reverse_cuthill_mckee_ordering(G))
341
    # make a copy with integer labels according to rcm ordering
342
    # this could be done without a copy if we really wanted to
343
    H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
344
    edges = (tuple(sorted((u, v))) for u, v in H.edges())
345
    betweenness = dict.fromkeys(edges, 0.0)
346
    if normalized:
347
        nb = (n - 1.0) * (n - 2.0)  # normalization factor
348
    else:
349
        nb = 2.0
350
    for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype,
351
                                    solver=solver):
352
        pos = dict(zip(row.argsort()[::-1], range(1, n + 1)))
353
        for i in range(n):
354
            betweenness[e] += (i + 1 - pos[i]) * row[i]
355
            betweenness[e] += (n - i - pos[i]) * row[i]
356
        betweenness[e] /= nb
357
    return dict(((ordering[s], ordering[t]), float(v))
358
                for (s, t), v in betweenness.items())
359

    
360

    
361
# fixture for nose tests
362
def setup_module(module):
363
    from nose import SkipTest
364
    try:
365
        import numpy
366
        import scipy
367
    except:
368
        raise SkipTest("NumPy not available")