root / fiddle / heuristicbetweennesscentrality / betweenness_centrality.py @ 739fe075
History  View  Annotate  Download (10.9 KB)
1 
# coding=utf8


2 
"""

3 
Betweenness centrality measures.

4 
WITH MODIFICATION by quynhxq  Dec 8, 2015.

5 
This version also include the traffic matrix

6 
"""

7 
# Copyright (C) 20042015 by

8 
# Aric Hagberg <hagberg@lanl.gov>

9 
# Dan Schult <dschult@colgate.edu>

10 
# Pieter Swart <swart@lanl.gov>

11 
# All rights reserved.

12 
# BSD license.

13 
from heapq import heappush, heappop 
14 
from itertools import count 
15 
import networkx as nx 
16 
import random 
17 
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""

18  
19 
__all__ = ['betweenness_centrality',

20 
'edge_betweenness_centrality',

21 
'edge_betweenness']

22  
23  
24 
def weight_betweenness_centrality(G, traffic_matrix=None, k=None, normalized=True, weight=None, rescale=False, 
25 
seed=None):

26 
r"""Compute the shortestpath betweenness centrality for nodes.

27 

28 
Betweenness centrality of a node `v` is the sum of the

29 
fraction of allpairs shortest paths that pass through `v`:

30 

31 
.. math::

32 

33 
c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, tv)}{\sigma(s, t)}

34 

35 
where `V` is the set of nodes, `\sigma(s, t)` is the number of

36 
shortest `(s, t)`paths, and `\sigma(s, tv)` is the number of those

37 
paths passing through some node `v` other than `s, t`.

38 
If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,

39 
`\sigma(s, tv) = 0` [2]_.

40 

41 
Parameters

42 


43 
G : graph

44 
A NetworkX graph

45 

46 
h :

47 
k : int, optional (default=None)

48 
If k is not None use k node samples to estimate betweenness.

49 
The value of k <= n where n is the number of nodes in the graph.

50 
Higher values give better approximation.

51 

52 
normalized : bool, optional

53 
If True the betweenness values are normalized by `2/((n1)(n2))`

54 
for graphs, and `1/((n1)(n2))` for directed graphs where `n`

55 
is the number of nodes in G.

56 

57 
weight : None or string, optional

58 
If None, all edge weights are considered equal.

59 
Otherwise holds the name of the edge attribute used as weight.

60 

61 
endpoints : bool, optional

62 
If True include the endpoints in the shortest path counts.

63 

64 
Returns

65 


66 
nodes : dictionary

67 
Dictionary of nodes with betweenness centrality as the value.

68 

69 
See Also

70 


71 
edge_betweenness_centrality

72 
load_centrality

73 

74 
Notes

75 


76 
The algorithm is from Rami Puzis [1]_.

77 
See [4]_ for the original first published version and [2]_ for details on

78 
algorithms for variations and related metrics.

79 

80 
For the faster way to calculate BC using combinatorial path counting, see [3]_

81 

82 
For weighted graphs the edge weights must be greater than zero.

83 
Zero edge weights can produce an infinite number of equal length

84 
paths between pairs of nodes.

85 

86 
Check out [5]_ for the details on how to implement the _accumulate_endpoints

87 

88 
References

89 


90 
.. [1] Rami Puzis:

91 

92 
.. [2] Ulrik Brandes:

93 
On Variants of ShortestPath Betweenness

94 
Centrality and their Generic Computation.

95 
Social Networks 30(2):136145, 2008.

96 
http://www.inf.unikonstanz.de/algo/publications/bvspbc08.pdf

97 
.. [3] Ulrik Brandes and Christian Pich:

98 
A Faster Algorithm for Betweenness Centrality.

99 
Journal of Mathematical Sociology 25(2):163177, 2001.

100 
http://www.inf.unikonstanz.de/algo/publications/bfabc01.pdf

101 
.. [4] Linton C. Freeman:

102 
A set of measures of centrality based on betweenness.

103 
Sociometry 40: 35–41, 1977

104 
http://moreno.ss.uci.edu/23.pdf

105 

106 
[5] Rami Puzis

107 
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

108 

109 
"""

110 
betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G 
111 
if k is None: 
112 
nodes = G 
113 
else:

114 
random.seed(seed) 
115 
nodes = random.sample(G.nodes(), k) 
116 
vertices = G.nodes() 
117  
118 
# print 'nodes = %s' % nodes

119 
# print 'vertices = %s' % vertices

120 
for s in nodes: 
121 
# single source shortest paths

122 
# print '\nxxx _single_source_shortest_path'

123 
if weight is None: # use BFS 
124 
S, P, sigma = _single_source_shortest_path_basic(G, s) 
125 
else: # use Dijkstra's algorithm 
126 
S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight) 
127  
128 
# print s

129 
# print S

130 
# print P

131 
# print sigma

132 
# accumulation

133 
if traffic_matrix:

134 
betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, vertices) 
135 
else:

136 
betweenness = _accumulate_weight_basic(betweenness, S, P, sigma, s) 
137  
138 
print '@@@ betweenness = %s' % betweenness 
139 
if rescale:

140 
betweenness = _rescale(betweenness, len(G),

141 
normalized=normalized, 
142 
directed=G.is_directed(), 
143 
k=k) 
144 
# Dec 9, 2015  try to remove rescaling

145 
# rescaling

146 
# betweenness = _rescale(betweenness, len(G),

147 
# normalized=normalized,

148 
# directed=G.is_directed(),

149 
# k=k)

150 
return betweenness

151  
152  
153 
def _single_source_shortest_path_basic(G, s): 
154 
S = [] 
155 
P = {} 
156 
for v in G: 
157 
P[v] = [] 
158 
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G 
159 
D = {} 
160 
sigma[s] = 1.0

161 
D[s] = 0

162 
Q = [s] 
163 
while Q: # use BFS to find shortest paths 
164 
# print "sigma = %s" % sigma

165 
v = Q.pop(0)

166 
S.append(v) 
167 
Dv = D[v] 
168 
sigmav = sigma[v] 
169 
for w in G[v]: 
170 
if w not in D: 
171 
Q.append(w) 
172 
D[w] = Dv + 1

173 
if D[w] == Dv + 1: # this is a shortest path, count paths 
174 
sigma[w] += sigmav 
175 
P[w].append(v) # predecessors

176 
return S, P, sigma

177  
178  
179 
def _single_source_dijkstra_path_basic(G, s, weight='weight'): 
180 
# modified from Eppstein

181 
S = [] 
182 
P = {} 
183 
for v in G: 
184 
P[v] = [] 
185 
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G 
186 
D = {} 
187 
sigma[s] = 1.0

188 
print sigma

189 
push = heappush 
190 
pop = heappop 
191 
seen = {s: 0}

192 
c = count() 
193 
Q = [] # use Q as heap with (distance,node id) tuples

194 
push(Q, (0, next(c), s, s)) 
195 
while Q:

196 
(dist, _, pred, v) = pop(Q) 
197 
if v in D: 
198 
continue # already searched this node. 
199 
sigma[v] += sigma[pred] # count paths

200 
S.append(v) 
201 
D[v] = dist 
202 
for w, edgedata in G[v].items(): 
203 
vw_dist = dist + edgedata.get(weight, 1)

204 
if w not in D and (w not in seen or vw_dist < seen[w]): 
205 
seen[w] = vw_dist 
206 
push(Q, (vw_dist, next(c), v, w))

207 
sigma[w] = 0.0

208 
P[w] = [v] 
209 
elif vw_dist == seen[w]: # handle equal paths 
210 
sigma[w] += sigma[v] 
211 
P[w].append(v) 
212 
return S, P, sigma

213  
214  
215 
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y): 
216 
x_pos = nodes.index(x) 
217 
y_pos = nodes.index(y) 
218 
try:

219 
return traffic_matrix[x_pos][y_pos]

220 
except:

221 
print "ERROR in get_value_from_traffic_matrix" 
222 
return 1 
223  
224 
def _accumulate_basic(betweenness, S, P, sigma, s, traffic_matrix, nodes): 
225 
delta = dict.fromkeys(S, 0) 
226 
while S:

227 
w = S.pop() 
228 
print 'w = %s' % w 
229 
coeff = (1.0 + delta[w]) / sigma[w]

230 
# coeff = (delta[w]) / sigma[w]

231 
for v in P[w]: 
232 
print '\tv = %s' % v 
233 
h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, v) 
234 
print '\t h = %s' % h 
235 
delta[v] += sigma[v] * coeff + h 
236 
print 'delta = %s' % delta 
237 
if w != s:

238 
# print h

239 
# betweenness[w] += delta[w] * h

240 
betweenness[w] += delta[w] 
241 
print 'betweenness = %s' % betweenness 
242 
return betweenness

243  
244  
245 
def _accumulate_weight_basic(betweenness, S, P, sigma, s): 
246 
r"""Accumlate the BC score.

247 

248 
Implements the Algorithm 1 in [1]_ (line 1521)

249 

250 
In this one, the traffic_matrix is not provided. But we can deduce the value:

251 
h(s,v) = 1 if s != v

252 
h(s, v) = 0 if s == v

253 

254 
.. [1] Rami Puzis

255 
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

256 
"""

257 
# betweenness[s] += len(S)  1

258 
delta = dict.fromkeys(S, 0) 
259 
while S:

260 
w = S.pop() 
261 
if w != s:

262 
delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1 
263 
coeff = delta[w] / sigma[w] 
264 
for v in P[w]: 
265 
delta[v] += sigma[v] * coeff 
266 
if w != s:

267 
betweenness[w] += delta[w] 
268 
# print 'out betweenness = %s' % betweenness

269 
return betweenness

270  
271 
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, nodes): 
272 
r"""Accumlate the BC score.

273 

274 
Implements the Algorithm 1 in [1]_ (line 1521)

275 

276 
.. [1] Rami Puzis

277 
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

278 
"""

279 
# betweenness[s] += len(S)  1

280 
delta = dict.fromkeys(S, 0) 
281 
while S:

282 
# print 'in betweenness = %s' % betweenness

283 
w = S.pop() 
284 
h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w) 
285 
delta[w] += h 
286 
coeff = delta[w] / sigma[w] 
287 
# print " coeff = %s" % coeff

288 
for v in P[w]: 
289 
# print " v = %s" % v

290 
# print " h = %s" % h

291 
delta[v] += sigma[v] * coeff 
292 
# print ' delta = %s' % delta

293 
if w != s:

294 
betweenness[w] += delta[w] 
295 
# print 'betweenness = %s' % betweenness

296 
return betweenness

297  
298  
299 
def _accumulate_edges(betweenness, S, P, sigma, s): 
300 
delta = dict.fromkeys(S, 0) 
301 
while S:

302 
w = S.pop() 
303 
coeff = (1.0 + delta[w]) / sigma[w]

304 
for v in P[w]: 
305 
c = sigma[v] * coeff 
306 
if (v, w) not in betweenness: 
307 
betweenness[(w, v)] += c 
308 
else:

309 
betweenness[(v, w)] += c 
310 
delta[v] += c 
311 
if w != s:

312 
betweenness[w] += delta[w] 
313 
return betweenness

314  
315  
316 
def _rescale(betweenness, n, normalized, directed=False, k=None): 
317 
if normalized is True: 
318 
if n <= 2: 
319 
scale = None # no normalization b=0 for all nodes 
320 
else:

321 
scale = 1.0 / ((n  1) * (n  2)) 
322 
else: # rescale by 2 for undirected graphs 
323 
if not directed: 
324 
scale = 1.0 / 2.0 
325 
else:

326 
scale = None

327 
if scale is not None: 
328 
print 'scale = %s' % scale 
329 
if k is not None: 
330 
scale = scale * n / k 
331 
for v in betweenness: 
332 
betweenness[v] *= scale 
333 
return betweenness

334  
335  
336 
def _rescale_e(betweenness, n, normalized, directed=False, k=None): 
337 
if normalized is True: 
338 
if n <= 1: 
339 
scale = None # no normalization b=0 for all nodes 
340 
else:

341 
scale = 1.0 / (n * (n  1)) 
342 
else: # rescale by 2 for undirected graphs 
343 
if not directed: 
344 
scale = 1.0 / 2.0 
345 
else:

346 
scale = None

347 
if scale is not None: 
348 
if k is not None: 
349 
scale = scale * n / k 
350 
for v in betweenness: 
351 
betweenness[v] *= scale 
352 
return betweenness
