Statistics
| Branch: | Revision:

root / globecomm / heuristic_bc / betweenness_centrality.py @ bd3d6dca

History | View | Annotate | Download (9.87 KB)

1
# coding=utf8
2
"""
3
Betweenness centrality measures.
4
WITH MODIFICATION by quynhxq - Dec 8, 2015.
5
This version also include the traffic matrix
6
"""
7
#    Copyright (C) 2004-2015 by
8
#    Aric Hagberg <hagberg@lanl.gov>
9
#    Dan Schult <dschult@colgate.edu>
10
#    Pieter Swart <swart@lanl.gov>
11
#    All rights reserved.
12
#    BSD license.
13
from heapq import heappush, heappop
14
from itertools import count
15
import networkx as nx
16
import random
17
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""
18

    
19
__all__ = ['betweenness_centrality',
20
           'edge_betweenness_centrality',
21
           'edge_betweenness']
22

    
23

    
24
def weight_betweenness_centrality(G, traffic_matrix=None, cut_without=0, k=None, normalized=True, weight=None, rescale=False,
25
                           seed=None):
26
    r"""Compute the shortest-path betweenness centrality for nodes.
27

28
    Betweenness centrality of a node `v` is the sum of the
29
    fraction of all-pairs shortest paths that pass through `v`:
30

31
    .. math::
32

33
       c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
34

35
    where `V` is the set of nodes, `\sigma(s, t)` is the number of
36
    shortest `(s, t)`-paths,  and `\sigma(s, t|v)` is the number of those
37
    paths  passing through some  node `v` other than `s, t`.
38
    If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,
39
    `\sigma(s, t|v) = 0` [2]_.
40

41
    Parameters
42
    ----------
43
    G : graph
44
      A NetworkX graph
45

46
    h :
47
    k : int, optional (default=None)
48
      If k is not None use k node samples to estimate betweenness.
49
      The value of k <= n where n is the number of nodes in the graph.
50
      Higher values give better approximation.
51

52
    normalized : bool, optional
53
      If True the betweenness values are normalized by `2/((n-1)(n-2))`
54
      for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
55
      is the number of nodes in G.
56

57
    weight : None or string, optional
58
      If None, all edge weights are considered equal.
59
      Otherwise holds the name of the edge attribute used as weight.
60

61
    endpoints : bool, optional
62
      If True include the endpoints in the shortest path counts.
63

64
    Returns
65
    -------
66
    nodes : dictionary
67
       Dictionary of nodes with betweenness centrality as the value.
68

69
    See Also
70
    --------
71
    edge_betweenness_centrality
72
    load_centrality
73

74
    Notes
75
    -----
76
    The algorithm is from Rami Puzis [1]_.
77
    See [4]_ for the original first published version and [2]_ for details on
78
    algorithms for variations and related metrics.
79

80
    For the faster way to calculate BC using combinatorial path counting, see [3]_
81

82
    For weighted graphs the edge weights must be greater than zero.
83
    Zero edge weights can produce an infinite number of equal length
84
    paths between pairs of nodes.
85

86
    Check out [5]_ for the details on how to implement the _accumulate_endpoints
87

88
    References
89
    ----------
90
    .. [1] Rami Puzis:
91

92
    .. [2] Ulrik Brandes:
93
       On Variants of Shortest-Path Betweenness
94
       Centrality and their Generic Computation.
95
       Social Networks 30(2):136-145, 2008.
96
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
97
    .. [3] Ulrik Brandes and Christian Pich:
98
       A Faster Algorithm for Betweenness Centrality.
99
       Journal of Mathematical Sociology 25(2):163-177, 2001.
100
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
101
    .. [4] Linton C. Freeman:
102
       A set of measures of centrality based on betweenness.
103
       Sociometry 40: 35–41, 1977
104
       http://moreno.ss.uci.edu/23.pdf
105

106
       [5] Rami Puzis
107
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
108

109
    """
110
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
111
    if k is None:
112
        nodes = G
113
    else:
114
        random.seed(seed)
115
        nodes = random.sample(G.nodes(), k)
116
    vertices = sorted(G.nodes())
117

    
118
    for s in nodes:
119
        if weight is None:  # use BFS
120
            S, P, sigma = _single_source_shortest_path_basic(G, s)
121
        else:  # use Dijkstra's algorithm
122
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
123

    
124
        # accumulation
125
        if traffic_matrix:
126
            betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, cut_without, vertices)
127
        else:
128
            betweenness = _accumulate_stress(betweenness, S, P, sigma, s)
129

    
130
    if rescale:
131
        betweenness = _rescale(betweenness, len(G),
132
                   normalized=normalized,
133
                   directed=G.is_directed(),
134
                   k=k)
135
    # Dec 9, 2015 - try to remove rescaling
136
    # rescaling
137
    # betweenness = _rescale(betweenness, len(G),
138
    #                        normalized=normalized,
139
    #                        directed=G.is_directed(),
140
    #                        k=k)
141
    return betweenness
142

    
143

    
144
def _single_source_shortest_path_basic(G, s):
145
    S = []
146
    P = {}
147
    for v in G:
148
        P[v] = []
149
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
150
    D = {}
151
    sigma[s] = 1.0
152
    D[s] = 0
153
    Q = [s]
154
    while Q:   # use BFS to find shortest paths
155
        # print "sigma = %s" % sigma
156
        v = Q.pop(0)
157
        S.append(v)
158
        Dv = D[v]
159
        sigmav = sigma[v]
160
        for w in G[v]:
161
            if w not in D:
162
                Q.append(w)
163
                D[w] = Dv + 1
164
            if D[w] == Dv + 1:   # this is a shortest path, count paths
165
                sigma[w] += sigmav
166
                P[w].append(v)  # predecessors
167
    return S, P, sigma
168

    
169

    
170
def _single_source_dijkstra_path_basic(G, s, weight='weight'):
171
    # modified from Eppstein
172
    S = []
173
    P = {}
174
    for v in G:
175
        P[v] = []
176
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
177
    D = {}
178
    sigma[s] = 1.0
179
    push = heappush
180
    pop = heappop
181
    seen = {s: 0}
182
    c = count()
183
    Q = []   # use Q as heap with (distance,node id) tuples
184
    push(Q, (0, next(c), s, s))
185
    while Q:
186
        (dist, _, pred, v) = pop(Q)
187
        if v in D:
188
            continue  # already searched this node.
189
        sigma[v] += sigma[pred]  # count paths
190
        S.append(v)
191
        D[v] = dist
192
        for w, edgedata in G[v].items():
193
            vw_dist = dist + edgedata.get(weight, 1)
194
            if w not in D and (w not in seen or vw_dist < seen[w]):
195
                seen[w] = vw_dist
196
                push(Q, (vw_dist, next(c), v, w))
197
                sigma[w] = 0.0
198
                P[w] = [v]
199
            elif vw_dist == seen[w]:  # handle equal paths
200
                sigma[w] += sigma[v]
201
                P[w].append(v)
202
    return S, P, sigma
203

    
204

    
205
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y):
206
    x_pos = nodes.index(x)
207
    y_pos = nodes.index(y)
208
    try:
209
        return traffic_matrix[x_pos][y_pos]
210
    except:
211
        print "ERROR in get_value_from_traffic_matrix"
212
        return -1
213

    
214
def _accumulate_stress(betweenness,S,P,sigma,s):
215
    delta = dict.fromkeys(S,0)
216
    while S:
217
        w = S.pop()
218
        for v in P[w]:
219
            delta[v] += (1.0+delta[w])
220
        if w != s:
221
            betweenness[w] += sigma[w]*delta[w]
222
    return betweenness
223

    
224
def _accumulate_weight_basic(betweenness, S, P, sigma, s):
225
    r"""Accumlate the BC score.
226

227
    Implements the Algorithm 1 in [1]_ (line 15-21)
228

229
    In this one, the traffic_matrix is not provided. But we can deduce the value:
230
        h(s,v) = 1   if s != v
231
        h(s, v) = 0  if s == v
232

233
    .. [1] Rami Puzis
234
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
235
    """
236
    delta = dict.fromkeys(S, 0)
237
    while S:
238
        w = S.pop()
239
        # if w != s:
240
        delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1
241
        coeff = delta[w] / sigma[w]
242
        for v in P[w]:
243
            delta[v] += sigma[v] * coeff
244
        if w != s:
245
            betweenness[w] += delta[w]
246
    return betweenness
247

    
248
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, cut_without, nodes):
249
    r"""Accumlate the BC score.
250

251
    Implements the Algorithm 1 in [1]_ (line 15-21)
252

253
    .. [1] Rami Puzis
254
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
255
    """
256
    delta = dict.fromkeys(S, 0)
257
    while S:
258
        w = S.pop()
259
        h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w)
260
        betweenness[s] += h
261
        delta[w] += h
262

    
263
        coeff = delta[w] / sigma[w]
264

    
265
        for v in P[w]:
266
            delta[v] += sigma[v] * coeff
267
        if w != s:
268
            betweenness[w] += delta[w]
269
    return betweenness
270

    
271

    
272
def _accumulate_edges(betweenness, S, P, sigma, s):
273
    delta = dict.fromkeys(S, 0)
274
    while S:
275
        w = S.pop()
276
        coeff = (1.0 + delta[w]) / sigma[w]
277
        for v in P[w]:
278
            c = sigma[v] * coeff
279
            if (v, w) not in betweenness:
280
                betweenness[(w, v)] += c
281
            else:
282
                betweenness[(v, w)] += c
283
            delta[v] += c
284
        if w != s:
285
            betweenness[w] += delta[w]
286
    return betweenness
287

    
288

    
289
def _rescale(betweenness, n, normalized, directed=False, k=None):
290
    if normalized is True:
291
        if n <= 2:
292
            scale = None  # no normalization b=0 for all nodes
293
        else:
294
            scale = 1.0 / ((n - 1) * (n - 2))
295
    else:  # rescale by 2 for undirected graphs
296
        if not directed:
297
            scale = 1.0 / 2.0
298
        else:
299
            scale = None
300
    if scale is not None:
301
        print 'scale = %s' % scale
302
        if k is not None:
303
            scale = scale * n / k
304
        for v in betweenness:
305
            betweenness[v] *= scale
306
    return betweenness
307

    
308

    
309
def _rescale_e(betweenness, n, normalized, directed=False, k=None):
310
    if normalized is True:
311
        if n <= 1:
312
            scale = None  # no normalization b=0 for all nodes
313
        else:
314
            scale = 1.0 / (n * (n - 1))
315
    else:  # rescale by 2 for undirected graphs
316
        if not directed:
317
            scale = 1.0 / 2.0
318
        else:
319
            scale = None
320
    if scale is not None:
321
        if k is not None:
322
            scale = scale * n / k
323
        for v in betweenness:
324
            betweenness[v] *= scale
325
    return betweenness