Statistics
| Branch: | Revision:

root / fiddle / heuristic-betweenness-centrality / betweenness_centrality.py @ 3673c3e8

History | View | Annotate | Download (9.62 KB)

1
# coding=utf8
2
"""
3
Betweenness centrality measures.
4
WITH MODIFICATION by quynhxq - Dec 8, 2015.
5
This version also include the traffic matrix
6
"""
7
#    Copyright (C) 2004-2015 by
8
#    Aric Hagberg <hagberg@lanl.gov>
9
#    Dan Schult <dschult@colgate.edu>
10
#    Pieter Swart <swart@lanl.gov>
11
#    All rights reserved.
12
#    BSD license.
13
from heapq import heappush, heappop
14
from itertools import count
15
import networkx as nx
16
import random
17
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""
18

    
19
__all__ = ['betweenness_centrality',
20
           'edge_betweenness_centrality',
21
           'edge_betweenness']
22

    
23

    
24
def weight_betweenness_centrality(G, traffic_matrix=None, k=None, normalized=True, weight=None, rescale=False,
25
                           seed=None):
26
    r"""Compute the shortest-path betweenness centrality for nodes.
27

28
    Betweenness centrality of a node `v` is the sum of the
29
    fraction of all-pairs shortest paths that pass through `v`:
30

31
    .. math::
32

33
       c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
34

35
    where `V` is the set of nodes, `\sigma(s, t)` is the number of
36
    shortest `(s, t)`-paths,  and `\sigma(s, t|v)` is the number of those
37
    paths  passing through some  node `v` other than `s, t`.
38
    If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,
39
    `\sigma(s, t|v) = 0` [2]_.
40

41
    Parameters
42
    ----------
43
    G : graph
44
      A NetworkX graph
45

46
    h :
47
    k : int, optional (default=None)
48
      If k is not None use k node samples to estimate betweenness.
49
      The value of k <= n where n is the number of nodes in the graph.
50
      Higher values give better approximation.
51

52
    normalized : bool, optional
53
      If True the betweenness values are normalized by `2/((n-1)(n-2))`
54
      for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
55
      is the number of nodes in G.
56

57
    weight : None or string, optional
58
      If None, all edge weights are considered equal.
59
      Otherwise holds the name of the edge attribute used as weight.
60

61
    endpoints : bool, optional
62
      If True include the endpoints in the shortest path counts.
63

64
    Returns
65
    -------
66
    nodes : dictionary
67
       Dictionary of nodes with betweenness centrality as the value.
68

69
    See Also
70
    --------
71
    edge_betweenness_centrality
72
    load_centrality
73

74
    Notes
75
    -----
76
    The algorithm is from Rami Puzis [1]_.
77
    See [4]_ for the original first published version and [2]_ for details on
78
    algorithms for variations and related metrics.
79

80
    For the faster way to calculate BC using combinatorial path counting, see [3]_
81

82
    For weighted graphs the edge weights must be greater than zero.
83
    Zero edge weights can produce an infinite number of equal length
84
    paths between pairs of nodes.
85

86
    Check out [5]_ for the details on how to implement the _accumulate_endpoints
87

88
    References
89
    ----------
90
    .. [1] Rami Puzis:
91

92
    .. [2] Ulrik Brandes:
93
       On Variants of Shortest-Path Betweenness
94
       Centrality and their Generic Computation.
95
       Social Networks 30(2):136-145, 2008.
96
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
97
    .. [3] Ulrik Brandes and Christian Pich:
98
       A Faster Algorithm for Betweenness Centrality.
99
       Journal of Mathematical Sociology 25(2):163-177, 2001.
100
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
101
    .. [4] Linton C. Freeman:
102
       A set of measures of centrality based on betweenness.
103
       Sociometry 40: 35–41, 1977
104
       http://moreno.ss.uci.edu/23.pdf
105

106
       [5] Rami Puzis
107
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
108

109
    """
110
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
111
    if k is None:
112
        nodes = G
113
    else:
114
        random.seed(seed)
115
        nodes = random.sample(G.nodes(), k)
116
    vertices = sorted(G.nodes())
117

    
118
    for s in nodes:
119
        if weight is None:  # use BFS
120
            S, P, sigma = _single_source_shortest_path_basic(G, s)
121
        else:  # use Dijkstra's algorithm
122
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
123

    
124
        # accumulation
125
        if traffic_matrix:
126
            betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, vertices)
127
        else:
128
            betweenness = _accumulate_weight_basic(betweenness, S, P, sigma, s)
129

    
130
    if rescale:
131
        betweenness = _rescale(betweenness, len(G),
132
                   normalized=normalized,
133
                   directed=G.is_directed(),
134
                   k=k)
135
    # Dec 9, 2015 - try to remove rescaling
136
    # rescaling
137
    # betweenness = _rescale(betweenness, len(G),
138
    #                        normalized=normalized,
139
    #                        directed=G.is_directed(),
140
    #                        k=k)
141
    return betweenness
142

    
143

    
144
def _single_source_shortest_path_basic(G, s):
145
    S = []
146
    P = {}
147
    for v in G:
148
        P[v] = []
149
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
150
    D = {}
151
    sigma[s] = 1.0
152
    D[s] = 0
153
    Q = [s]
154
    while Q:   # use BFS to find shortest paths
155
        # print "sigma = %s" % sigma
156
        v = Q.pop(0)
157
        S.append(v)
158
        Dv = D[v]
159
        sigmav = sigma[v]
160
        for w in G[v]:
161
            if w not in D:
162
                Q.append(w)
163
                D[w] = Dv + 1
164
            if D[w] == Dv + 1:   # this is a shortest path, count paths
165
                sigma[w] += sigmav
166
                P[w].append(v)  # predecessors
167
    return S, P, sigma
168

    
169

    
170
def _single_source_dijkstra_path_basic(G, s, weight='weight'):
171
    # modified from Eppstein
172
    S = []
173
    P = {}
174
    for v in G:
175
        P[v] = []
176
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
177
    D = {}
178
    sigma[s] = 1.0
179
    push = heappush
180
    pop = heappop
181
    seen = {s: 0}
182
    c = count()
183
    Q = []   # use Q as heap with (distance,node id) tuples
184
    push(Q, (0, next(c), s, s))
185
    while Q:
186
        (dist, _, pred, v) = pop(Q)
187
        if v in D:
188
            continue  # already searched this node.
189
        sigma[v] += sigma[pred]  # count paths
190
        S.append(v)
191
        D[v] = dist
192
        for w, edgedata in G[v].items():
193
            vw_dist = dist + edgedata.get(weight, 1)
194
            if w not in D and (w not in seen or vw_dist < seen[w]):
195
                seen[w] = vw_dist
196
                push(Q, (vw_dist, next(c), v, w))
197
                sigma[w] = 0.0
198
                P[w] = [v]
199
            elif vw_dist == seen[w]:  # handle equal paths
200
                sigma[w] += sigma[v]
201
                P[w].append(v)
202
    return S, P, sigma
203

    
204

    
205
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y):
206
    x_pos = nodes.index(x)
207
    y_pos = nodes.index(y)
208
    try:
209
        return traffic_matrix[x_pos][y_pos]
210
    except:
211
        print "ERROR in get_value_from_traffic_matrix"
212
        return -1
213

    
214
def _accumulate_weight_basic(betweenness, S, P, sigma, s):
215
    r"""Accumlate the BC score.
216

217
    Implements the Algorithm 1 in [1]_ (line 15-21)
218

219
    In this one, the traffic_matrix is not provided. But we can deduce the value:
220
        h(s,v) = 1   if s != v
221
        h(s, v) = 0  if s == v
222

223
    .. [1] Rami Puzis
224
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
225
    """
226
    # betweenness[s] += len(S) - 1
227
    delta = dict.fromkeys(S, 0)
228
    while S:
229
        w = S.pop()
230
        # if w != s:
231
        delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1
232
        coeff = delta[w] / sigma[w]
233
        for v in P[w]:
234
            delta[v] += sigma[v] * coeff
235
        if w != s:
236
            betweenness[w] += delta[w]
237
    return betweenness
238

    
239
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, nodes):
240
    r"""Accumlate the BC score.
241

242
    Implements the Algorithm 1 in [1]_ (line 15-21)
243

244
    .. [1] Rami Puzis
245
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
246
    """
247
    # betweenness[s] += len(S) - 1
248
    delta = dict.fromkeys(S, 0)
249
    while S:
250
        w = S.pop()
251
        h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w)
252
        delta[w] += h
253

    
254
        coeff = delta[w] / sigma[w]
255

    
256
        for v in P[w]:
257
            delta[v] += sigma[v] * coeff
258
        if w != s:
259
            betweenness[w] += delta[w]
260
    return betweenness
261

    
262

    
263
def _accumulate_edges(betweenness, S, P, sigma, s):
264
    delta = dict.fromkeys(S, 0)
265
    while S:
266
        w = S.pop()
267
        coeff = (1.0 + delta[w]) / sigma[w]
268
        for v in P[w]:
269
            c = sigma[v] * coeff
270
            if (v, w) not in betweenness:
271
                betweenness[(w, v)] += c
272
            else:
273
                betweenness[(v, w)] += c
274
            delta[v] += c
275
        if w != s:
276
            betweenness[w] += delta[w]
277
    return betweenness
278

    
279

    
280
def _rescale(betweenness, n, normalized, directed=False, k=None):
281
    if normalized is True:
282
        if n <= 2:
283
            scale = None  # no normalization b=0 for all nodes
284
        else:
285
            scale = 1.0 / ((n - 1) * (n - 2))
286
    else:  # rescale by 2 for undirected graphs
287
        if not directed:
288
            scale = 1.0 / 2.0
289
        else:
290
            scale = None
291
    if scale is not None:
292
        print 'scale = %s' % scale
293
        if k is not None:
294
            scale = scale * n / k
295
        for v in betweenness:
296
            betweenness[v] *= scale
297
    return betweenness
298

    
299

    
300
def _rescale_e(betweenness, n, normalized, directed=False, k=None):
301
    if normalized is True:
302
        if n <= 1:
303
            scale = None  # no normalization b=0 for all nodes
304
        else:
305
            scale = 1.0 / (n * (n - 1))
306
    else:  # rescale by 2 for undirected graphs
307
        if not directed:
308
            scale = 1.0 / 2.0
309
        else:
310
            scale = None
311
    if scale is not None:
312
        if k is not None:
313
            scale = scale * n / k
314
        for v in betweenness:
315
            betweenness[v] *= scale
316
    return betweenness