root / globecomm / heuristic_bc / betweenness_centrality.py @ bd3d6dca
History | View | Annotate | Download (9.87 KB)
1 |
# coding=utf8
|
---|---|
2 |
"""
|
3 |
Betweenness centrality measures.
|
4 |
WITH MODIFICATION by quynhxq - Dec 8, 2015.
|
5 |
This version also include the traffic matrix
|
6 |
"""
|
7 |
# Copyright (C) 2004-2015 by
|
8 |
# Aric Hagberg <hagberg@lanl.gov>
|
9 |
# Dan Schult <dschult@colgate.edu>
|
10 |
# Pieter Swart <swart@lanl.gov>
|
11 |
# All rights reserved.
|
12 |
# BSD license.
|
13 |
from heapq import heappush, heappop |
14 |
from itertools import count |
15 |
import networkx as nx |
16 |
import random |
17 |
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""
|
18 |
|
19 |
__all__ = ['betweenness_centrality',
|
20 |
'edge_betweenness_centrality',
|
21 |
'edge_betweenness']
|
22 |
|
23 |
|
24 |
def weight_betweenness_centrality(G, traffic_matrix=None, cut_without=0, k=None, normalized=True, weight=None, rescale=False, |
25 |
seed=None):
|
26 |
r"""Compute the shortest-path betweenness centrality for nodes.
|
27 |
|
28 |
Betweenness centrality of a node `v` is the sum of the
|
29 |
fraction of all-pairs shortest paths that pass through `v`:
|
30 |
|
31 |
.. math::
|
32 |
|
33 |
c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
|
34 |
|
35 |
where `V` is the set of nodes, `\sigma(s, t)` is the number of
|
36 |
shortest `(s, t)`-paths, and `\sigma(s, t|v)` is the number of those
|
37 |
paths passing through some node `v` other than `s, t`.
|
38 |
If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,
|
39 |
`\sigma(s, t|v) = 0` [2]_.
|
40 |
|
41 |
Parameters
|
42 |
----------
|
43 |
G : graph
|
44 |
A NetworkX graph
|
45 |
|
46 |
h :
|
47 |
k : int, optional (default=None)
|
48 |
If k is not None use k node samples to estimate betweenness.
|
49 |
The value of k <= n where n is the number of nodes in the graph.
|
50 |
Higher values give better approximation.
|
51 |
|
52 |
normalized : bool, optional
|
53 |
If True the betweenness values are normalized by `2/((n-1)(n-2))`
|
54 |
for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
|
55 |
is the number of nodes in G.
|
56 |
|
57 |
weight : None or string, optional
|
58 |
If None, all edge weights are considered equal.
|
59 |
Otherwise holds the name of the edge attribute used as weight.
|
60 |
|
61 |
endpoints : bool, optional
|
62 |
If True include the endpoints in the shortest path counts.
|
63 |
|
64 |
Returns
|
65 |
-------
|
66 |
nodes : dictionary
|
67 |
Dictionary of nodes with betweenness centrality as the value.
|
68 |
|
69 |
See Also
|
70 |
--------
|
71 |
edge_betweenness_centrality
|
72 |
load_centrality
|
73 |
|
74 |
Notes
|
75 |
-----
|
76 |
The algorithm is from Rami Puzis [1]_.
|
77 |
See [4]_ for the original first published version and [2]_ for details on
|
78 |
algorithms for variations and related metrics.
|
79 |
|
80 |
For the faster way to calculate BC using combinatorial path counting, see [3]_
|
81 |
|
82 |
For weighted graphs the edge weights must be greater than zero.
|
83 |
Zero edge weights can produce an infinite number of equal length
|
84 |
paths between pairs of nodes.
|
85 |
|
86 |
Check out [5]_ for the details on how to implement the _accumulate_endpoints
|
87 |
|
88 |
References
|
89 |
----------
|
90 |
.. [1] Rami Puzis:
|
91 |
|
92 |
.. [2] Ulrik Brandes:
|
93 |
On Variants of Shortest-Path Betweenness
|
94 |
Centrality and their Generic Computation.
|
95 |
Social Networks 30(2):136-145, 2008.
|
96 |
http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
|
97 |
.. [3] Ulrik Brandes and Christian Pich:
|
98 |
A Faster Algorithm for Betweenness Centrality.
|
99 |
Journal of Mathematical Sociology 25(2):163-177, 2001.
|
100 |
http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
|
101 |
.. [4] Linton C. Freeman:
|
102 |
A set of measures of centrality based on betweenness.
|
103 |
Sociometry 40: 35–41, 1977
|
104 |
http://moreno.ss.uci.edu/23.pdf
|
105 |
|
106 |
[5] Rami Puzis
|
107 |
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
|
108 |
|
109 |
"""
|
110 |
betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G |
111 |
if k is None: |
112 |
nodes = G |
113 |
else:
|
114 |
random.seed(seed) |
115 |
nodes = random.sample(G.nodes(), k) |
116 |
vertices = sorted(G.nodes())
|
117 |
|
118 |
for s in nodes: |
119 |
if weight is None: # use BFS |
120 |
S, P, sigma = _single_source_shortest_path_basic(G, s) |
121 |
else: # use Dijkstra's algorithm |
122 |
S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight) |
123 |
|
124 |
# accumulation
|
125 |
if traffic_matrix:
|
126 |
betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, cut_without, vertices) |
127 |
else:
|
128 |
betweenness = _accumulate_stress(betweenness, S, P, sigma, s) |
129 |
|
130 |
if rescale:
|
131 |
betweenness = _rescale(betweenness, len(G),
|
132 |
normalized=normalized, |
133 |
directed=G.is_directed(), |
134 |
k=k) |
135 |
# Dec 9, 2015 - try to remove rescaling
|
136 |
# rescaling
|
137 |
# betweenness = _rescale(betweenness, len(G),
|
138 |
# normalized=normalized,
|
139 |
# directed=G.is_directed(),
|
140 |
# k=k)
|
141 |
return betweenness
|
142 |
|
143 |
|
144 |
def _single_source_shortest_path_basic(G, s): |
145 |
S = [] |
146 |
P = {} |
147 |
for v in G: |
148 |
P[v] = [] |
149 |
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G |
150 |
D = {} |
151 |
sigma[s] = 1.0
|
152 |
D[s] = 0
|
153 |
Q = [s] |
154 |
while Q: # use BFS to find shortest paths |
155 |
# print "sigma = %s" % sigma
|
156 |
v = Q.pop(0)
|
157 |
S.append(v) |
158 |
Dv = D[v] |
159 |
sigmav = sigma[v] |
160 |
for w in G[v]: |
161 |
if w not in D: |
162 |
Q.append(w) |
163 |
D[w] = Dv + 1
|
164 |
if D[w] == Dv + 1: # this is a shortest path, count paths |
165 |
sigma[w] += sigmav |
166 |
P[w].append(v) # predecessors
|
167 |
return S, P, sigma
|
168 |
|
169 |
|
170 |
def _single_source_dijkstra_path_basic(G, s, weight='weight'): |
171 |
# modified from Eppstein
|
172 |
S = [] |
173 |
P = {} |
174 |
for v in G: |
175 |
P[v] = [] |
176 |
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G |
177 |
D = {} |
178 |
sigma[s] = 1.0
|
179 |
push = heappush |
180 |
pop = heappop |
181 |
seen = {s: 0}
|
182 |
c = count() |
183 |
Q = [] # use Q as heap with (distance,node id) tuples
|
184 |
push(Q, (0, next(c), s, s)) |
185 |
while Q:
|
186 |
(dist, _, pred, v) = pop(Q) |
187 |
if v in D: |
188 |
continue # already searched this node. |
189 |
sigma[v] += sigma[pred] # count paths
|
190 |
S.append(v) |
191 |
D[v] = dist |
192 |
for w, edgedata in G[v].items(): |
193 |
vw_dist = dist + edgedata.get(weight, 1)
|
194 |
if w not in D and (w not in seen or vw_dist < seen[w]): |
195 |
seen[w] = vw_dist |
196 |
push(Q, (vw_dist, next(c), v, w))
|
197 |
sigma[w] = 0.0
|
198 |
P[w] = [v] |
199 |
elif vw_dist == seen[w]: # handle equal paths |
200 |
sigma[w] += sigma[v] |
201 |
P[w].append(v) |
202 |
return S, P, sigma
|
203 |
|
204 |
|
205 |
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y): |
206 |
x_pos = nodes.index(x) |
207 |
y_pos = nodes.index(y) |
208 |
try:
|
209 |
return traffic_matrix[x_pos][y_pos]
|
210 |
except:
|
211 |
print "ERROR in get_value_from_traffic_matrix" |
212 |
return -1 |
213 |
|
214 |
def _accumulate_stress(betweenness,S,P,sigma,s): |
215 |
delta = dict.fromkeys(S,0) |
216 |
while S:
|
217 |
w = S.pop() |
218 |
for v in P[w]: |
219 |
delta[v] += (1.0+delta[w])
|
220 |
if w != s:
|
221 |
betweenness[w] += sigma[w]*delta[w] |
222 |
return betweenness
|
223 |
|
224 |
def _accumulate_weight_basic(betweenness, S, P, sigma, s): |
225 |
r"""Accumlate the BC score.
|
226 |
|
227 |
Implements the Algorithm 1 in [1]_ (line 15-21)
|
228 |
|
229 |
In this one, the traffic_matrix is not provided. But we can deduce the value:
|
230 |
h(s,v) = 1 if s != v
|
231 |
h(s, v) = 0 if s == v
|
232 |
|
233 |
.. [1] Rami Puzis
|
234 |
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
|
235 |
"""
|
236 |
delta = dict.fromkeys(S, 0) |
237 |
while S:
|
238 |
w = S.pop() |
239 |
# if w != s:
|
240 |
delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1 |
241 |
coeff = delta[w] / sigma[w] |
242 |
for v in P[w]: |
243 |
delta[v] += sigma[v] * coeff |
244 |
if w != s:
|
245 |
betweenness[w] += delta[w] |
246 |
return betweenness
|
247 |
|
248 |
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, cut_without, nodes): |
249 |
r"""Accumlate the BC score.
|
250 |
|
251 |
Implements the Algorithm 1 in [1]_ (line 15-21)
|
252 |
|
253 |
.. [1] Rami Puzis
|
254 |
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
|
255 |
"""
|
256 |
delta = dict.fromkeys(S, 0) |
257 |
while S:
|
258 |
w = S.pop() |
259 |
h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w) |
260 |
betweenness[s] += h |
261 |
delta[w] += h |
262 |
|
263 |
coeff = delta[w] / sigma[w] |
264 |
|
265 |
for v in P[w]: |
266 |
delta[v] += sigma[v] * coeff |
267 |
if w != s:
|
268 |
betweenness[w] += delta[w] |
269 |
return betweenness
|
270 |
|
271 |
|
272 |
def _accumulate_edges(betweenness, S, P, sigma, s): |
273 |
delta = dict.fromkeys(S, 0) |
274 |
while S:
|
275 |
w = S.pop() |
276 |
coeff = (1.0 + delta[w]) / sigma[w]
|
277 |
for v in P[w]: |
278 |
c = sigma[v] * coeff |
279 |
if (v, w) not in betweenness: |
280 |
betweenness[(w, v)] += c |
281 |
else:
|
282 |
betweenness[(v, w)] += c |
283 |
delta[v] += c |
284 |
if w != s:
|
285 |
betweenness[w] += delta[w] |
286 |
return betweenness
|
287 |
|
288 |
|
289 |
def _rescale(betweenness, n, normalized, directed=False, k=None): |
290 |
if normalized is True: |
291 |
if n <= 2: |
292 |
scale = None # no normalization b=0 for all nodes |
293 |
else:
|
294 |
scale = 1.0 / ((n - 1) * (n - 2)) |
295 |
else: # rescale by 2 for undirected graphs |
296 |
if not directed: |
297 |
scale = 1.0 / 2.0 |
298 |
else:
|
299 |
scale = None
|
300 |
if scale is not None: |
301 |
print 'scale = %s' % scale |
302 |
if k is not None: |
303 |
scale = scale * n / k |
304 |
for v in betweenness: |
305 |
betweenness[v] *= scale |
306 |
return betweenness
|
307 |
|
308 |
|
309 |
def _rescale_e(betweenness, n, normalized, directed=False, k=None): |
310 |
if normalized is True: |
311 |
if n <= 1: |
312 |
scale = None # no normalization b=0 for all nodes |
313 |
else:
|
314 |
scale = 1.0 / (n * (n - 1)) |
315 |
else: # rescale by 2 for undirected graphs |
316 |
if not directed: |
317 |
scale = 1.0 / 2.0 |
318 |
else:
|
319 |
scale = None
|
320 |
if scale is not None: |
321 |
if k is not None: |
322 |
scale = scale * n / k |
323 |
for v in betweenness: |
324 |
betweenness[v] *= scale |
325 |
return betweenness
|