Statistics
| Branch: | Revision:

root / fiddle / heuristic-betweenness-centrality / betweenness_centrality.py @ 739fe075

History | View | Annotate | Download (10.9 KB)

1
# coding=utf8
2
"""
3
Betweenness centrality measures.
4
WITH MODIFICATION by quynhxq - Dec 8, 2015.
5
This version also include the traffic matrix
6
"""
7
#    Copyright (C) 2004-2015 by
8
#    Aric Hagberg <hagberg@lanl.gov>
9
#    Dan Schult <dschult@colgate.edu>
10
#    Pieter Swart <swart@lanl.gov>
11
#    All rights reserved.
12
#    BSD license.
13
from heapq import heappush, heappop
14
from itertools import count
15
import networkx as nx
16
import random
17
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""
18

    
19
__all__ = ['betweenness_centrality',
20
           'edge_betweenness_centrality',
21
           'edge_betweenness']
22

    
23

    
24
def weight_betweenness_centrality(G, traffic_matrix=None, k=None, normalized=True, weight=None, rescale=False,
25
                           seed=None):
26
    r"""Compute the shortest-path betweenness centrality for nodes.
27

28
    Betweenness centrality of a node `v` is the sum of the
29
    fraction of all-pairs shortest paths that pass through `v`:
30

31
    .. math::
32

33
       c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
34

35
    where `V` is the set of nodes, `\sigma(s, t)` is the number of
36
    shortest `(s, t)`-paths,  and `\sigma(s, t|v)` is the number of those
37
    paths  passing through some  node `v` other than `s, t`.
38
    If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,
39
    `\sigma(s, t|v) = 0` [2]_.
40

41
    Parameters
42
    ----------
43
    G : graph
44
      A NetworkX graph
45

46
    h :
47
    k : int, optional (default=None)
48
      If k is not None use k node samples to estimate betweenness.
49
      The value of k <= n where n is the number of nodes in the graph.
50
      Higher values give better approximation.
51

52
    normalized : bool, optional
53
      If True the betweenness values are normalized by `2/((n-1)(n-2))`
54
      for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
55
      is the number of nodes in G.
56

57
    weight : None or string, optional
58
      If None, all edge weights are considered equal.
59
      Otherwise holds the name of the edge attribute used as weight.
60

61
    endpoints : bool, optional
62
      If True include the endpoints in the shortest path counts.
63

64
    Returns
65
    -------
66
    nodes : dictionary
67
       Dictionary of nodes with betweenness centrality as the value.
68

69
    See Also
70
    --------
71
    edge_betweenness_centrality
72
    load_centrality
73

74
    Notes
75
    -----
76
    The algorithm is from Rami Puzis [1]_.
77
    See [4]_ for the original first published version and [2]_ for details on
78
    algorithms for variations and related metrics.
79

80
    For the faster way to calculate BC using combinatorial path counting, see [3]_
81

82
    For weighted graphs the edge weights must be greater than zero.
83
    Zero edge weights can produce an infinite number of equal length
84
    paths between pairs of nodes.
85

86
    Check out [5]_ for the details on how to implement the _accumulate_endpoints
87

88
    References
89
    ----------
90
    .. [1] Rami Puzis:
91

92
    .. [2] Ulrik Brandes:
93
       On Variants of Shortest-Path Betweenness
94
       Centrality and their Generic Computation.
95
       Social Networks 30(2):136-145, 2008.
96
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
97
    .. [3] Ulrik Brandes and Christian Pich:
98
       A Faster Algorithm for Betweenness Centrality.
99
       Journal of Mathematical Sociology 25(2):163-177, 2001.
100
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
101
    .. [4] Linton C. Freeman:
102
       A set of measures of centrality based on betweenness.
103
       Sociometry 40: 35–41, 1977
104
       http://moreno.ss.uci.edu/23.pdf
105

106
       [5] Rami Puzis
107
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
108

109
    """
110
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
111
    if k is None:
112
        nodes = G
113
    else:
114
        random.seed(seed)
115
        nodes = random.sample(G.nodes(), k)
116
    vertices = G.nodes()
117

    
118
    # print 'nodes = %s' % nodes
119
    # print 'vertices = %s' % vertices
120
    for s in nodes:
121
        # single source shortest paths
122
        # print '\nxxx _single_source_shortest_path'
123
        if weight is None:  # use BFS
124
            S, P, sigma = _single_source_shortest_path_basic(G, s)
125
        else:  # use Dijkstra's algorithm
126
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
127

    
128
        # print s
129
        # print S
130
        # print P
131
        # print sigma
132
        # accumulation
133
        if traffic_matrix:
134
            betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, vertices)
135
        else:
136
            betweenness = _accumulate_weight_basic(betweenness, S, P, sigma, s)
137

    
138
    print '@@@ betweenness = %s' % betweenness
139
    if rescale:
140
        betweenness = _rescale(betweenness, len(G),
141
                   normalized=normalized,
142
                   directed=G.is_directed(),
143
                   k=k)
144
    # Dec 9, 2015 - try to remove rescaling
145
    # rescaling
146
    # betweenness = _rescale(betweenness, len(G),
147
    #                        normalized=normalized,
148
    #                        directed=G.is_directed(),
149
    #                        k=k)
150
    return betweenness
151

    
152

    
153
def _single_source_shortest_path_basic(G, s):
154
    S = []
155
    P = {}
156
    for v in G:
157
        P[v] = []
158
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
159
    D = {}
160
    sigma[s] = 1.0
161
    D[s] = 0
162
    Q = [s]
163
    while Q:   # use BFS to find shortest paths
164
        # print "sigma = %s" % sigma
165
        v = Q.pop(0)
166
        S.append(v)
167
        Dv = D[v]
168
        sigmav = sigma[v]
169
        for w in G[v]:
170
            if w not in D:
171
                Q.append(w)
172
                D[w] = Dv + 1
173
            if D[w] == Dv + 1:   # this is a shortest path, count paths
174
                sigma[w] += sigmav
175
                P[w].append(v)  # predecessors
176
    return S, P, sigma
177

    
178

    
179
def _single_source_dijkstra_path_basic(G, s, weight='weight'):
180
    # modified from Eppstein
181
    S = []
182
    P = {}
183
    for v in G:
184
        P[v] = []
185
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
186
    D = {}
187
    sigma[s] = 1.0
188
    print sigma
189
    push = heappush
190
    pop = heappop
191
    seen = {s: 0}
192
    c = count()
193
    Q = []   # use Q as heap with (distance,node id) tuples
194
    push(Q, (0, next(c), s, s))
195
    while Q:
196
        (dist, _, pred, v) = pop(Q)
197
        if v in D:
198
            continue  # already searched this node.
199
        sigma[v] += sigma[pred]  # count paths
200
        S.append(v)
201
        D[v] = dist
202
        for w, edgedata in G[v].items():
203
            vw_dist = dist + edgedata.get(weight, 1)
204
            if w not in D and (w not in seen or vw_dist < seen[w]):
205
                seen[w] = vw_dist
206
                push(Q, (vw_dist, next(c), v, w))
207
                sigma[w] = 0.0
208
                P[w] = [v]
209
            elif vw_dist == seen[w]:  # handle equal paths
210
                sigma[w] += sigma[v]
211
                P[w].append(v)
212
    return S, P, sigma
213

    
214

    
215
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y):
216
    x_pos = nodes.index(x)
217
    y_pos = nodes.index(y)
218
    try:
219
        return traffic_matrix[x_pos][y_pos]
220
    except:
221
        print "ERROR in get_value_from_traffic_matrix"
222
        return -1
223

    
224
def _accumulate_basic(betweenness, S, P, sigma, s, traffic_matrix, nodes):
225
    delta = dict.fromkeys(S, 0)
226
    while S:
227
        w = S.pop()
228
        print 'w = %s' % w
229
        coeff = (1.0 + delta[w]) / sigma[w]
230
        # coeff = (delta[w]) / sigma[w]
231
        for v in P[w]:
232
            print '\tv = %s' % v
233
            h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, v)
234
            print '\t h = %s' % h
235
            delta[v] += sigma[v] * coeff + h
236
        print 'delta = %s' % delta
237
        if w != s:
238
            # print h
239
            # betweenness[w] += delta[w] * h
240
            betweenness[w] += delta[w]
241
            print 'betweenness = %s' % betweenness
242
    return betweenness
243

    
244

    
245
def _accumulate_weight_basic(betweenness, S, P, sigma, s):
246
    r"""Accumlate the BC score.
247

248
    Implements the Algorithm 1 in [1]_ (line 15-21)
249

250
    In this one, the traffic_matrix is not provided. But we can deduce the value:
251
        h(s,v) = 1   if s != v
252
        h(s, v) = 0  if s == v
253

254
    .. [1] Rami Puzis
255
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
256
    """
257
    # betweenness[s] += len(S) - 1
258
    delta = dict.fromkeys(S, 0)
259
    while S:
260
        w = S.pop()
261
        if w != s:
262
            delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1
263
        coeff = delta[w] / sigma[w]
264
        for v in P[w]:
265
            delta[v] += sigma[v] * coeff
266
        if w != s:
267
            betweenness[w] += delta[w]
268
            # print 'out betweenness = %s' % betweenness
269
    return betweenness
270

    
271
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, nodes):
272
    r"""Accumlate the BC score.
273

274
    Implements the Algorithm 1 in [1]_ (line 15-21)
275

276
    .. [1] Rami Puzis
277
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
278
    """
279
    # betweenness[s] += len(S) - 1
280
    delta = dict.fromkeys(S, 0)
281
    while S:
282
        # print 'in betweenness = %s' % betweenness
283
        w = S.pop()
284
        h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w)
285
        delta[w] += h
286
        coeff = delta[w] / sigma[w]
287
        # print "  coeff = %s" % coeff
288
        for v in P[w]:
289
            # print "  v = %s" % v
290
            # print "    h = %s" % h
291
            delta[v] += sigma[v] * coeff
292
            # print '  delta = %s' % delta
293
        if w != s:
294
            betweenness[w] += delta[w]
295
            # print 'betweenness = %s' % betweenness
296
    return betweenness
297

    
298

    
299
def _accumulate_edges(betweenness, S, P, sigma, s):
300
    delta = dict.fromkeys(S, 0)
301
    while S:
302
        w = S.pop()
303
        coeff = (1.0 + delta[w]) / sigma[w]
304
        for v in P[w]:
305
            c = sigma[v] * coeff
306
            if (v, w) not in betweenness:
307
                betweenness[(w, v)] += c
308
            else:
309
                betweenness[(v, w)] += c
310
            delta[v] += c
311
        if w != s:
312
            betweenness[w] += delta[w]
313
    return betweenness
314

    
315

    
316
def _rescale(betweenness, n, normalized, directed=False, k=None):
317
    if normalized is True:
318
        if n <= 2:
319
            scale = None  # no normalization b=0 for all nodes
320
        else:
321
            scale = 1.0 / ((n - 1) * (n - 2))
322
    else:  # rescale by 2 for undirected graphs
323
        if not directed:
324
            scale = 1.0 / 2.0
325
        else:
326
            scale = None
327
    if scale is not None:
328
        print 'scale = %s' % scale
329
        if k is not None:
330
            scale = scale * n / k
331
        for v in betweenness:
332
            betweenness[v] *= scale
333
    return betweenness
334

    
335

    
336
def _rescale_e(betweenness, n, normalized, directed=False, k=None):
337
    if normalized is True:
338
        if n <= 1:
339
            scale = None  # no normalization b=0 for all nodes
340
        else:
341
            scale = 1.0 / (n * (n - 1))
342
    else:  # rescale by 2 for undirected graphs
343
        if not directed:
344
            scale = 1.0 / 2.0
345
        else:
346
            scale = None
347
    if scale is not None:
348
        if k is not None:
349
            scale = scale * n / k
350
        for v in betweenness:
351
            betweenness[v] *= scale
352
    return betweenness