root / fiddle / heuristicbetweennesscentrality / betweenness_centrality.py @ 82c7c698
History  View  Annotate  Download (9.93 KB)
1 
# coding=utf8


2 
"""

3 
Betweenness centrality measures.

4 
WITH MODIFICATION by quynhxq  Dec 8, 2015.

5 
This version also include the traffic matrix

6 
"""

7 
# Copyright (C) 20042015 by

8 
# Aric Hagberg <hagberg@lanl.gov>

9 
# Dan Schult <dschult@colgate.edu>

10 
# Pieter Swart <swart@lanl.gov>

11 
# All rights reserved.

12 
# BSD license.

13 
from heapq import heappush, heappop 
14 
from itertools import count 
15 
import networkx as nx 
16 
import random 
17 
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""

18  
19 
__all__ = ['betweenness_centrality',

20 
'edge_betweenness_centrality',

21 
'edge_betweenness']

22  
23  
24 
def weight_betweenness_centrality(G, traffic_matrix=None, k=None, normalized=True, weight=None, rescale=False, 
25 
seed=None):

26 
r"""Compute the shortestpath betweenness centrality for nodes.

27 

28 
Betweenness centrality of a node `v` is the sum of the

29 
fraction of allpairs shortest paths that pass through `v`:

30 

31 
.. math::

32 

33 
c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, tv)}{\sigma(s, t)}

34 

35 
where `V` is the set of nodes, `\sigma(s, t)` is the number of

36 
shortest `(s, t)`paths, and `\sigma(s, tv)` is the number of those

37 
paths passing through some node `v` other than `s, t`.

38 
If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,

39 
`\sigma(s, tv) = 0` [2]_.

40 

41 
Parameters

42 


43 
G : graph

44 
A NetworkX graph

45 

46 
h :

47 
k : int, optional (default=None)

48 
If k is not None use k node samples to estimate betweenness.

49 
The value of k <= n where n is the number of nodes in the graph.

50 
Higher values give better approximation.

51 

52 
normalized : bool, optional

53 
If True the betweenness values are normalized by `2/((n1)(n2))`

54 
for graphs, and `1/((n1)(n2))` for directed graphs where `n`

55 
is the number of nodes in G.

56 

57 
weight : None or string, optional

58 
If None, all edge weights are considered equal.

59 
Otherwise holds the name of the edge attribute used as weight.

60 

61 
endpoints : bool, optional

62 
If True include the endpoints in the shortest path counts.

63 

64 
Returns

65 


66 
nodes : dictionary

67 
Dictionary of nodes with betweenness centrality as the value.

68 

69 
See Also

70 


71 
edge_betweenness_centrality

72 
load_centrality

73 

74 
Notes

75 


76 
The algorithm is from Rami Puzis [1]_.

77 
See [4]_ for the original first published version and [2]_ for details on

78 
algorithms for variations and related metrics.

79 

80 
For the faster way to calculate BC using combinatorial path counting, see [3]_

81 

82 
For weighted graphs the edge weights must be greater than zero.

83 
Zero edge weights can produce an infinite number of equal length

84 
paths between pairs of nodes.

85 

86 
Check out [5]_ for the details on how to implement the _accumulate_endpoints

87 

88 
References

89 


90 
.. [1] Rami Puzis:

91 

92 
.. [2] Ulrik Brandes:

93 
On Variants of ShortestPath Betweenness

94 
Centrality and their Generic Computation.

95 
Social Networks 30(2):136145, 2008.

96 
http://www.inf.unikonstanz.de/algo/publications/bvspbc08.pdf

97 
.. [3] Ulrik Brandes and Christian Pich:

98 
A Faster Algorithm for Betweenness Centrality.

99 
Journal of Mathematical Sociology 25(2):163177, 2001.

100 
http://www.inf.unikonstanz.de/algo/publications/bfabc01.pdf

101 
.. [4] Linton C. Freeman:

102 
A set of measures of centrality based on betweenness.

103 
Sociometry 40: 35–41, 1977

104 
http://moreno.ss.uci.edu/23.pdf

105 

106 
[5] Rami Puzis

107 
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

108 

109 
"""

110 
betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G 
111 
if k is None: 
112 
nodes = G 
113 
else:

114 
random.seed(seed) 
115 
nodes = random.sample(G.nodes(), k) 
116 
vertices = sorted(G.nodes())

117  
118 
for s in nodes: 
119 
if weight is None: # use BFS 
120 
S, P, sigma = _single_source_shortest_path_basic(G, s) 
121 
else: # use Dijkstra's algorithm 
122 
S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight) 
123  
124 
# accumulation

125 
if traffic_matrix:

126 
betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, vertices) 
127 
else:

128 
betweenness = _accumulate_weight_basic(betweenness, S, P, sigma, s) 
129  
130 
if rescale:

131 
betweenness = _rescale(betweenness, len(G),

132 
normalized=normalized, 
133 
directed=G.is_directed(), 
134 
k=k) 
135 
# Dec 9, 2015  try to remove rescaling

136 
# rescaling

137 
# betweenness = _rescale(betweenness, len(G),

138 
# normalized=normalized,

139 
# directed=G.is_directed(),

140 
# k=k)

141 
return betweenness

142  
143  
144 
def _single_source_shortest_path_basic(G, s): 
145 
S = [] 
146 
P = {} 
147 
for v in G: 
148 
P[v] = [] 
149 
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G 
150 
D = {} 
151 
sigma[s] = 1.0

152 
D[s] = 0

153 
Q = [s] 
154 
while Q: # use BFS to find shortest paths 
155 
# print "sigma = %s" % sigma

156 
v = Q.pop(0)

157 
S.append(v) 
158 
Dv = D[v] 
159 
sigmav = sigma[v] 
160 
for w in G[v]: 
161 
if w not in D: 
162 
Q.append(w) 
163 
D[w] = Dv + 1

164 
if D[w] == Dv + 1: # this is a shortest path, count paths 
165 
sigma[w] += sigmav 
166 
P[w].append(v) # predecessors

167 
return S, P, sigma

168  
169  
170 
def _single_source_dijkstra_path_basic(G, s, weight='weight'): 
171 
# modified from Eppstein

172 
S = [] 
173 
P = {} 
174 
for v in G: 
175 
P[v] = [] 
176 
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G 
177 
D = {} 
178 
sigma[s] = 1.0

179 
push = heappush 
180 
pop = heappop 
181 
seen = {s: 0}

182 
c = count() 
183 
Q = [] # use Q as heap with (distance,node id) tuples

184 
push(Q, (0, next(c), s, s)) 
185 
while Q:

186 
(dist, _, pred, v) = pop(Q) 
187 
if v in D: 
188 
continue # already searched this node. 
189 
sigma[v] += sigma[pred] # count paths

190 
S.append(v) 
191 
D[v] = dist 
192 
for w, edgedata in G[v].items(): 
193 
vw_dist = dist + edgedata.get(weight, 1)

194 
if w not in D and (w not in seen or vw_dist < seen[w]): 
195 
seen[w] = vw_dist 
196 
push(Q, (vw_dist, next(c), v, w))

197 
sigma[w] = 0.0

198 
P[w] = [v] 
199 
elif vw_dist == seen[w]: # handle equal paths 
200 
sigma[w] += sigma[v] 
201 
P[w].append(v) 
202 
return S, P, sigma

203  
204  
205 
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y): 
206 
x_pos = nodes.index(x) 
207 
y_pos = nodes.index(y) 
208 
try:

209 
return traffic_matrix[x_pos][y_pos]

210 
except:

211 
print "ERROR in get_value_from_traffic_matrix" 
212 
return 1 
213  
214 
def _accumulate_basic(betweenness, S, P, sigma, s, traffic_matrix, nodes): 
215 
delta = dict.fromkeys(S, 0) 
216 
while S:

217 
w = S.pop() 
218 
coeff = (1.0 + delta[w]) / sigma[w]

219 
for v in P[w]: 
220 
h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, v) 
221 
delta[v] += sigma[v] * coeff + h 
222 
if w != s:

223 
betweenness[w] += delta[w] 
224  
225  
226 
def _accumulate_weight_basic(betweenness, S, P, sigma, s): 
227 
r"""Accumlate the BC score.

228 

229 
Implements the Algorithm 1 in [1]_ (line 1521)

230 

231 
In this one, the traffic_matrix is not provided. But we can deduce the value:

232 
h(s,v) = 1 if s != v

233 
h(s, v) = 0 if s == v

234 

235 
.. [1] Rami Puzis

236 
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

237 
"""

238 
delta = dict.fromkeys(S, 0) 
239 
while S:

240 
w = S.pop() 
241 
if w != s:

242 
delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1 
243 
coeff = delta[w] / sigma[w] 
244 
for v in P[w]: 
245 
delta[v] += sigma[v] * coeff 
246 
if w != s:

247 
betweenness[w] += delta[w] 
248 
return betweenness

249  
250 
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, nodes): 
251 
r"""Accumlate the BC score.

252 

253 
Implements the Algorithm 1 in [1]_ (line 1521)

254 

255 
.. [1] Rami Puzis

256 
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

257 
"""

258 
delta = dict.fromkeys(S, 0) 
259 
while S:

260 
w = S.pop() 
261 
h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w) 
262 
delta[w] += h 
263 
coeff = delta[w] / sigma[w] 
264  
265 
for v in P[w]: 
266 
delta[v] += sigma[v] * coeff 
267 
if w != s:

268 
betweenness[w] += delta[w] 
269 
return betweenness

270  
271  
272 
def _accumulate_edges(betweenness, S, P, sigma, s): 
273 
delta = dict.fromkeys(S, 0) 
274 
while S:

275 
w = S.pop() 
276 
coeff = (1.0 + delta[w]) / sigma[w]

277 
for v in P[w]: 
278 
c = sigma[v] * coeff 
279 
if (v, w) not in betweenness: 
280 
betweenness[(w, v)] += c 
281 
else:

282 
betweenness[(v, w)] += c 
283 
delta[v] += c 
284 
if w != s:

285 
betweenness[w] += delta[w] 
286 
return betweenness

287  
288  
289 
def _rescale(betweenness, n, normalized, directed=False, k=None): 
290 
if normalized is True: 
291 
if n <= 2: 
292 
scale = None # no normalization b=0 for all nodes 
293 
else:

294 
scale = 1.0 / ((n  1) * (n  2)) 
295 
else: # rescale by 2 for undirected graphs 
296 
if not directed: 
297 
scale = 1.0 / 2.0 
298 
else:

299 
scale = None

300 
if scale is not None: 
301 
print 'scale = %s' % scale 
302 
if k is not None: 
303 
scale = scale * n / k 
304 
for v in betweenness: 
305 
betweenness[v] *= scale 
306 
return betweenness

307  
308  
309 
def _rescale_e(betweenness, n, normalized, directed=False, k=None): 
310 
if normalized is True: 
311 
if n <= 1: 
312 
scale = None # no normalization b=0 for all nodes 
313 
else:

314 
scale = 1.0 / (n * (n  1)) 
315 
else: # rescale by 2 for undirected graphs 
316 
if not directed: 
317 
scale = 1.0 / 2.0 
318 
else:

319 
scale = None

320 
if scale is not None: 
321 
if k is not None: 
322 
scale = scale * n / k 
323 
for v in betweenness: 
324 
betweenness[v] *= scale 
325 
return betweenness
