Statistics
| Branch: | Revision:

root / fiddle / heuristic-betweenness-centrality / betweenness_centrality.py @ 82c7c698

History | View | Annotate | Download (9.93 KB)

1 181e7c50 Quynh PX Nguyen
# coding=utf8
2
"""
3
Betweenness centrality measures.
4
WITH MODIFICATION by quynhxq - Dec 8, 2015.
5
This version also include the traffic matrix
6
"""
7
#    Copyright (C) 2004-2015 by
8
#    Aric Hagberg <hagberg@lanl.gov>
9
#    Dan Schult <dschult@colgate.edu>
10
#    Pieter Swart <swart@lanl.gov>
11
#    All rights reserved.
12
#    BSD license.
13
from heapq import heappush, heappop
14
from itertools import count
15
import networkx as nx
16
import random
17
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""
18
19
__all__ = ['betweenness_centrality',
20
           'edge_betweenness_centrality',
21
           'edge_betweenness']
22
23
24 3c6ce57c Quynh PX Nguyen
def weight_betweenness_centrality(G, traffic_matrix=None, k=None, normalized=True, weight=None, rescale=False,
25 181e7c50 Quynh PX Nguyen
                           seed=None):
26
    r"""Compute the shortest-path betweenness centrality for nodes.
27

28
    Betweenness centrality of a node `v` is the sum of the
29
    fraction of all-pairs shortest paths that pass through `v`:
30

31
    .. math::
32

33
       c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
34

35
    where `V` is the set of nodes, `\sigma(s, t)` is the number of
36
    shortest `(s, t)`-paths,  and `\sigma(s, t|v)` is the number of those
37
    paths  passing through some  node `v` other than `s, t`.
38
    If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,
39
    `\sigma(s, t|v) = 0` [2]_.
40

41
    Parameters
42
    ----------
43
    G : graph
44
      A NetworkX graph
45

46
    h :
47
    k : int, optional (default=None)
48
      If k is not None use k node samples to estimate betweenness.
49
      The value of k <= n where n is the number of nodes in the graph.
50
      Higher values give better approximation.
51

52
    normalized : bool, optional
53
      If True the betweenness values are normalized by `2/((n-1)(n-2))`
54
      for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
55
      is the number of nodes in G.
56

57
    weight : None or string, optional
58
      If None, all edge weights are considered equal.
59
      Otherwise holds the name of the edge attribute used as weight.
60

61
    endpoints : bool, optional
62
      If True include the endpoints in the shortest path counts.
63

64
    Returns
65
    -------
66
    nodes : dictionary
67
       Dictionary of nodes with betweenness centrality as the value.
68

69
    See Also
70
    --------
71
    edge_betweenness_centrality
72
    load_centrality
73

74
    Notes
75
    -----
76 3c6ce57c Quynh PX Nguyen
    The algorithm is from Rami Puzis [1]_.
77 181e7c50 Quynh PX Nguyen
    See [4]_ for the original first published version and [2]_ for details on
78
    algorithms for variations and related metrics.
79

80 3c6ce57c Quynh PX Nguyen
    For the faster way to calculate BC using combinatorial path counting, see [3]_
81 181e7c50 Quynh PX Nguyen

82
    For weighted graphs the edge weights must be greater than zero.
83
    Zero edge weights can produce an infinite number of equal length
84
    paths between pairs of nodes.
85

86 3c6ce57c Quynh PX Nguyen
    Check out [5]_ for the details on how to implement the _accumulate_endpoints
87

88 181e7c50 Quynh PX Nguyen
    References
89
    ----------
90 3c6ce57c Quynh PX Nguyen
    .. [1] Rami Puzis:
91

92 181e7c50 Quynh PX Nguyen
    .. [2] Ulrik Brandes:
93
       On Variants of Shortest-Path Betweenness
94
       Centrality and their Generic Computation.
95
       Social Networks 30(2):136-145, 2008.
96
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
97
    .. [3] Ulrik Brandes and Christian Pich:
98 3c6ce57c Quynh PX Nguyen
       A Faster Algorithm for Betweenness Centrality.
99
       Journal of Mathematical Sociology 25(2):163-177, 2001.
100
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
101 181e7c50 Quynh PX Nguyen
    .. [4] Linton C. Freeman:
102
       A set of measures of centrality based on betweenness.
103
       Sociometry 40: 35–41, 1977
104
       http://moreno.ss.uci.edu/23.pdf
105

106 3c6ce57c Quynh PX Nguyen
       [5] Rami Puzis
107
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
108

109 181e7c50 Quynh PX Nguyen
    """
110
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
111
    if k is None:
112
        nodes = G
113
    else:
114
        random.seed(seed)
115
        nodes = random.sample(G.nodes(), k)
116 82c7c698 Quynh PX Nguyen
    vertices = sorted(G.nodes())
117 181e7c50 Quynh PX Nguyen
118
    for s in nodes:
119
        if weight is None:  # use BFS
120
            S, P, sigma = _single_source_shortest_path_basic(G, s)
121
        else:  # use Dijkstra's algorithm
122
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
123
124
        # accumulation
125 3c6ce57c Quynh PX Nguyen
        if traffic_matrix:
126
            betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, vertices)
127 181e7c50 Quynh PX Nguyen
        else:
128 3c6ce57c Quynh PX Nguyen
            betweenness = _accumulate_weight_basic(betweenness, S, P, sigma, s)
129
130
    if rescale:
131
        betweenness = _rescale(betweenness, len(G),
132
                   normalized=normalized,
133
                   directed=G.is_directed(),
134
                   k=k)
135 181e7c50 Quynh PX Nguyen
    # Dec 9, 2015 - try to remove rescaling
136
    # rescaling
137
    # betweenness = _rescale(betweenness, len(G),
138
    #                        normalized=normalized,
139
    #                        directed=G.is_directed(),
140
    #                        k=k)
141
    return betweenness
142
143
144
def _single_source_shortest_path_basic(G, s):
145
    S = []
146
    P = {}
147
    for v in G:
148
        P[v] = []
149
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
150
    D = {}
151
    sigma[s] = 1.0
152
    D[s] = 0
153
    Q = [s]
154
    while Q:   # use BFS to find shortest paths
155 3c6ce57c Quynh PX Nguyen
        # print "sigma = %s" % sigma
156 181e7c50 Quynh PX Nguyen
        v = Q.pop(0)
157
        S.append(v)
158
        Dv = D[v]
159
        sigmav = sigma[v]
160
        for w in G[v]:
161
            if w not in D:
162
                Q.append(w)
163
                D[w] = Dv + 1
164
            if D[w] == Dv + 1:   # this is a shortest path, count paths
165
                sigma[w] += sigmav
166
                P[w].append(v)  # predecessors
167
    return S, P, sigma
168
169
170
def _single_source_dijkstra_path_basic(G, s, weight='weight'):
171
    # modified from Eppstein
172
    S = []
173
    P = {}
174
    for v in G:
175
        P[v] = []
176
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
177
    D = {}
178
    sigma[s] = 1.0
179
    push = heappush
180
    pop = heappop
181
    seen = {s: 0}
182
    c = count()
183
    Q = []   # use Q as heap with (distance,node id) tuples
184
    push(Q, (0, next(c), s, s))
185
    while Q:
186
        (dist, _, pred, v) = pop(Q)
187
        if v in D:
188
            continue  # already searched this node.
189
        sigma[v] += sigma[pred]  # count paths
190
        S.append(v)
191
        D[v] = dist
192
        for w, edgedata in G[v].items():
193
            vw_dist = dist + edgedata.get(weight, 1)
194
            if w not in D and (w not in seen or vw_dist < seen[w]):
195
                seen[w] = vw_dist
196
                push(Q, (vw_dist, next(c), v, w))
197
                sigma[w] = 0.0
198
                P[w] = [v]
199
            elif vw_dist == seen[w]:  # handle equal paths
200
                sigma[w] += sigma[v]
201
                P[w].append(v)
202
    return S, P, sigma
203
204
205
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y):
206
    x_pos = nodes.index(x)
207
    y_pos = nodes.index(y)
208
    try:
209
        return traffic_matrix[x_pos][y_pos]
210
    except:
211
        print "ERROR in get_value_from_traffic_matrix"
212
        return -1
213
214
def _accumulate_basic(betweenness, S, P, sigma, s, traffic_matrix, nodes):
215
    delta = dict.fromkeys(S, 0)
216
    while S:
217
        w = S.pop()
218
        coeff = (1.0 + delta[w]) / sigma[w]
219
        for v in P[w]:
220
            h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, v)
221
            delta[v] += sigma[v] * coeff + h
222
        if w != s:
223 3c6ce57c Quynh PX Nguyen
            betweenness[w] += delta[w]
224 181e7c50 Quynh PX Nguyen
225
226 3c6ce57c Quynh PX Nguyen
def _accumulate_weight_basic(betweenness, S, P, sigma, s):
227
    r"""Accumlate the BC score.
228

229
    Implements the Algorithm 1 in [1]_ (line 15-21)
230

231
    In this one, the traffic_matrix is not provided. But we can deduce the value:
232
        h(s,v) = 1   if s != v
233
        h(s, v) = 0  if s == v
234

235
    .. [1] Rami Puzis
236
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
237
    """
238 181e7c50 Quynh PX Nguyen
    delta = dict.fromkeys(S, 0)
239
    while S:
240
        w = S.pop()
241
        if w != s:
242 739fe075 Quynh PX Nguyen
            delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1
243 3c6ce57c Quynh PX Nguyen
        coeff = delta[w] / sigma[w]
244
        for v in P[w]:
245
            delta[v] += sigma[v] * coeff
246
        if w != s:
247
            betweenness[w] += delta[w]
248
    return betweenness
249
250
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, nodes):
251
    r"""Accumlate the BC score.
252

253
    Implements the Algorithm 1 in [1]_ (line 15-21)
254

255
    .. [1] Rami Puzis
256
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
257
    """
258
    delta = dict.fromkeys(S, 0)
259
    while S:
260
        w = S.pop()
261
        h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w)
262
        delta[w] += h
263
        coeff = delta[w] / sigma[w]
264 82c7c698 Quynh PX Nguyen
265 181e7c50 Quynh PX Nguyen
        for v in P[w]:
266
            delta[v] += sigma[v] * coeff
267 3c6ce57c Quynh PX Nguyen
        if w != s:
268
            betweenness[w] += delta[w]
269 181e7c50 Quynh PX Nguyen
    return betweenness
270
271
272
def _accumulate_edges(betweenness, S, P, sigma, s):
273
    delta = dict.fromkeys(S, 0)
274
    while S:
275
        w = S.pop()
276
        coeff = (1.0 + delta[w]) / sigma[w]
277
        for v in P[w]:
278
            c = sigma[v] * coeff
279
            if (v, w) not in betweenness:
280
                betweenness[(w, v)] += c
281
            else:
282
                betweenness[(v, w)] += c
283
            delta[v] += c
284
        if w != s:
285
            betweenness[w] += delta[w]
286
    return betweenness
287
288
289
def _rescale(betweenness, n, normalized, directed=False, k=None):
290
    if normalized is True:
291
        if n <= 2:
292
            scale = None  # no normalization b=0 for all nodes
293
        else:
294
            scale = 1.0 / ((n - 1) * (n - 2))
295
    else:  # rescale by 2 for undirected graphs
296
        if not directed:
297
            scale = 1.0 / 2.0
298
        else:
299
            scale = None
300
    if scale is not None:
301 3c6ce57c Quynh PX Nguyen
        print 'scale = %s' % scale
302 181e7c50 Quynh PX Nguyen
        if k is not None:
303
            scale = scale * n / k
304
        for v in betweenness:
305
            betweenness[v] *= scale
306
    return betweenness
307
308
309
def _rescale_e(betweenness, n, normalized, directed=False, k=None):
310
    if normalized is True:
311
        if n <= 1:
312
            scale = None  # no normalization b=0 for all nodes
313
        else:
314
            scale = 1.0 / (n * (n - 1))
315
    else:  # rescale by 2 for undirected graphs
316
        if not directed:
317
            scale = 1.0 / 2.0
318
        else:
319
            scale = None
320
    if scale is not None:
321
        if k is not None:
322
            scale = scale * n / k
323
        for v in betweenness:
324
            betweenness[v] *= scale
325
    return betweenness