ioftools / networkxMiCe / networkxmaster / networkx / algorithms / centrality / current_flow_betweenness.py @ 5cef0f13
History  View  Annotate  Download (13.3 KB)
1 
# Copyright (C) 20102019 by


2 
# Aric Hagberg <hagberg@lanl.gov>

3 
# Dan Schult <dschult@colgate.edu>

4 
# Pieter Swart <swart@lanl.gov>

5 
# All rights reserved.

6 
# BSD license.

7 
#

8 
# Author: Aric Hagberg (hagberg@lanl.gov)

9 
"""Currentflow betweenness centrality measures."""

10 
import networkx as nx 
11 
from networkx.algorithms.centrality.flow_matrix import * 
12 
from networkx.utils import (not_implemented_for, 
13 
reverse_cuthill_mckee_ordering, 
14 
py_random_state) 
15  
16 
__all__ = ['current_flow_betweenness_centrality',

17 
'approximate_current_flow_betweenness_centrality',

18 
'edge_current_flow_betweenness_centrality']

19  
20  
21 
@py_random_state(7) 
22 
@not_implemented_for('directed') 
23 
def approximate_current_flow_betweenness_centrality(G, normalized=True, 
24 
weight=None,

25 
dtype=float, solver='full', 
26 
epsilon=0.5, kmax=10000, 
27 
seed=None):

28 
r"""Compute the approximate currentflow betweenness centrality for nodes.

29 

30 
Approximates the currentflow betweenness centrality within absolute

31 
error of epsilon with high probability [1]_.

32 

33 

34 
Parameters

35 


36 
G : graph

37 
A NetworkX graph

38 

39 
normalized : bool, optional (default=True)

40 
If True the betweenness values are normalized by 2/[(n1)(n2)] where

41 
n is the number of nodes in G.

42 

43 
weight : string or None, optional (default=None)

44 
Key for edge data used as the edge weight.

45 
If None, then use 1 as each edge weight.

46 

47 
dtype : data type (float)

48 
Default data type for internal matrices.

49 
Set to np.float32 for lower memory consumption.

50 

51 
solver : string (default='lu')

52 
Type of linear solver to use for computing the flow matrix.

53 
Options are "full" (uses most memory), "lu" (recommended), and

54 
"cg" (uses least memory).

55 

56 
epsilon: float

57 
Absolute error tolerance.

58 

59 
kmax: int

60 
Maximum number of sample node pairs to use for approximation.

61 

62 
seed : integer, random_state, or None (default)

63 
Indicator of random number generation state.

64 
See :ref:`Randomness<randomness>`.

65 

66 
Returns

67 


68 
nodes : dictionary

69 
Dictionary of nodes with betweenness centrality as the value.

70 

71 
See Also

72 


73 
current_flow_betweenness_centrality

74 

75 
Notes

76 


77 
The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$

78 
and the space required is $O(m)$ for $n$ nodes and $m$ edges.

79 

80 
If the edges have a 'weight' attribute they will be used as

81 
weights in this algorithm. Unspecified weights are set to 1.

82 

83 
References

84 


85 
.. [1] Ulrik Brandes and Daniel Fleischer:

86 
Centrality Measures Based on Current Flow.

87 
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).

88 
LNCS 3404, pp. 533544. SpringerVerlag, 2005.

89 
http://algo.unikonstanz.de/publications/bfcmbcf05.pdf

90 
"""

91 
try:

92 
import numpy as np 
93 
except ImportError: 
94 
raise ImportError('current_flow_betweenness_centrality requires NumPy ', 
95 
'http://scipy.org/')

96 
try:

97 
from scipy import sparse 
98 
from scipy.sparse import linalg 
99 
except ImportError: 
100 
raise ImportError('current_flow_betweenness_centrality requires SciPy ', 
101 
'http://scipy.org/')

102 
if not nx.is_connected(G): 
103 
raise nx.NetworkXError("Graph not connected.") 
104 
solvername = {"full": FullInverseLaplacian,

105 
"lu": SuperLUInverseLaplacian,

106 
"cg": CGInverseLaplacian}

107 
n = G.number_of_nodes() 
108 
ordering = list(reverse_cuthill_mckee_ordering(G))

109 
# make a copy with integer labels according to rcm ordering

110 
# this could be done without a copy if we really wanted to

111 
H = nx.relabel_nodes(G, dict(zip(ordering, range(n)))) 
112 
L = laplacian_sparse_matrix(H, nodelist=range(n), weight=weight,

113 
dtype=dtype, format='csc')

114 
C = solvername[solver](L, dtype=dtype) # initialize solver

115 
betweenness = dict.fromkeys(H, 0.0) 
116 
nb = (n  1.0) * (n  2.0) # normalization factor 
117 
cstar = n * (n  1) / nb

118 
l = 1 # parameter in approximation, adjustable 
119 
k = l * int(np.ceil((cstar / epsilon)**2 * np.log(n))) 
120 
if k > kmax:

121 
msg = 'Number random pairs k>kmax (%d>%d) ' % (k, kmax)

122 
raise nx.NetworkXError(msg, 'Increase kmax or epsilon') 
123 
cstar2k = cstar / (2 * k)

124 
for i in range(k): 
125 
s, t = seed.sample(range(n), 2) 
126 
b = np.zeros(n, dtype=dtype) 
127 
b[s] = 1

128 
b[t] = 1

129 
p = C.solve(b) 
130 
for v in H: 
131 
if v == s or v == t: 
132 
continue

133 
for nbr in H[v]: 
134 
w = H[v][nbr].get(weight, 1.0)

135 
betweenness[v] += w * np.abs(p[v]  p[nbr]) * cstar2k 
136 
if normalized:

137 
factor = 1.0

138 
else:

139 
factor = nb / 2.0

140 
# remap to original node names and "unnormalize" if required

141 
return dict((ordering[k], float(v * factor)) for k, v in betweenness.items()) 
142  
143  
144 
@not_implemented_for('directed') 
145 
def current_flow_betweenness_centrality(G, normalized=True, weight=None, 
146 
dtype=float, solver='full'): 
147 
r"""Compute currentflow betweenness centrality for nodes.

148 

149 
Currentflow betweenness centrality uses an electrical current

150 
model for information spreading in contrast to betweenness

151 
centrality which uses shortest paths.

152 

153 
Currentflow betweenness centrality is also known as

154 
randomwalk betweenness centrality [2]_.

155 

156 
Parameters

157 


158 
G : graph

159 
A NetworkX graph

160 

161 
normalized : bool, optional (default=True)

162 
If True the betweenness values are normalized by 2/[(n1)(n2)] where

163 
n is the number of nodes in G.

164 

165 
weight : string or None, optional (default=None)

166 
Key for edge data used as the edge weight.

167 
If None, then use 1 as each edge weight.

168 

169 
dtype : data type (float)

170 
Default data type for internal matrices.

171 
Set to np.float32 for lower memory consumption.

172 

173 
solver : string (default='lu')

174 
Type of linear solver to use for computing the flow matrix.

175 
Options are "full" (uses most memory), "lu" (recommended), and

176 
"cg" (uses least memory).

177 

178 
Returns

179 


180 
nodes : dictionary

181 
Dictionary of nodes with betweenness centrality as the value.

182 

183 
See Also

184 


185 
approximate_current_flow_betweenness_centrality

186 
betweenness_centrality

187 
edge_betweenness_centrality

188 
edge_current_flow_betweenness_centrality

189 

190 
Notes

191 


192 
Currentflow betweenness can be computed in $O(I(n1)+mn \log n)$

193 
time [1]_, where $I(n1)$ is the time needed to compute the

194 
inverse Laplacian. For a full matrix this is $O(n^3)$ but using

195 
sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the

196 
Laplacian matrix condition number.

197 

198 
The space required is $O(nw)$ where $w$ is the width of the sparse

199 
Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.

200 

201 
If the edges have a 'weight' attribute they will be used as

202 
weights in this algorithm. Unspecified weights are set to 1.

203 

204 
References

205 


206 
.. [1] Centrality Measures Based on Current Flow.

207 
Ulrik Brandes and Daniel Fleischer,

208 
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).

209 
LNCS 3404, pp. 533544. SpringerVerlag, 2005.

210 
http://algo.unikonstanz.de/publications/bfcmbcf05.pdf

211 

212 
.. [2] A measure of betweenness centrality based on random walks,

213 
M. E. J. Newman, Social Networks 27, 3954 (2005).

214 
"""

215 
try:

216 
import numpy as np 
217 
except ImportError: 
218 
raise ImportError('current_flow_betweenness_centrality requires NumPy ', 
219 
'http://scipy.org/')

220 
try:

221 
import scipy 
222 
except ImportError: 
223 
raise ImportError('current_flow_betweenness_centrality requires SciPy ', 
224 
'http://scipy.org/')

225 
if not nx.is_connected(G): 
226 
raise nx.NetworkXError("Graph not connected.") 
227 
n = G.number_of_nodes() 
228 
ordering = list(reverse_cuthill_mckee_ordering(G))

229 
# make a copy with integer labels according to rcm ordering

230 
# this could be done without a copy if we really wanted to

231 
H = nx.relabel_nodes(G, dict(zip(ordering, range(n)))) 
232 
betweenness = dict.fromkeys(H, 0.0) # b[v]=0 for v in H 
233 
for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype, 
234 
solver=solver): 
235 
pos = dict(zip(row.argsort()[::1], range(n))) 
236 
for i in range(n): 
237 
betweenness[s] += (i  pos[i]) * row[i] 
238 
betweenness[t] += (n  i  1  pos[i]) * row[i]

239 
if normalized:

240 
nb = (n  1.0) * (n  2.0) # normalization factor 
241 
else:

242 
nb = 2.0

243 
for v in H: 
244 
betweenness[v] = float((betweenness[v]  v) * 2.0 / nb) 
245 
return dict((ordering[k], v) for k, v in betweenness.items()) 
246  
247  
248 
@not_implemented_for('directed') 
249 
def edge_current_flow_betweenness_centrality(G, normalized=True, 
250 
weight=None,

251 
dtype=float, solver='full'): 
252 
r"""Compute currentflow betweenness centrality for edges.

253 

254 
Currentflow betweenness centrality uses an electrical current

255 
model for information spreading in contrast to betweenness

256 
centrality which uses shortest paths.

257 

258 
Currentflow betweenness centrality is also known as

259 
randomwalk betweenness centrality [2]_.

260 

261 
Parameters

262 


263 
G : graph

264 
A NetworkX graph

265 

266 
normalized : bool, optional (default=True)

267 
If True the betweenness values are normalized by 2/[(n1)(n2)] where

268 
n is the number of nodes in G.

269 

270 
weight : string or None, optional (default=None)

271 
Key for edge data used as the edge weight.

272 
If None, then use 1 as each edge weight.

273 

274 
dtype : data type (default=float)

275 
Default data type for internal matrices.

276 
Set to np.float32 for lower memory consumption.

277 

278 
solver : string (default='lu')

279 
Type of linear solver to use for computing the flow matrix.

280 
Options are "full" (uses most memory), "lu" (recommended), and

281 
"cg" (uses least memory).

282 

283 
Returns

284 


285 
nodes : dictionary

286 
Dictionary of edge tuples with betweenness centrality as the value.

287 

288 
Raises

289 


290 
NetworkXError

291 
The algorithm does not support DiGraphs.

292 
If the input graph is an instance of DiGraph class, NetworkXError

293 
is raised.

294 

295 
See Also

296 


297 
betweenness_centrality

298 
edge_betweenness_centrality

299 
current_flow_betweenness_centrality

300 

301 
Notes

302 


303 
Currentflow betweenness can be computed in $O(I(n1)+mn \log n)$

304 
time [1]_, where $I(n1)$ is the time needed to compute the

305 
inverse Laplacian. For a full matrix this is $O(n^3)$ but using

306 
sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the

307 
Laplacian matrix condition number.

308 

309 
The space required is $O(nw)$ where $w$ is the width of the sparse

310 
Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.

311 

312 
If the edges have a 'weight' attribute they will be used as

313 
weights in this algorithm. Unspecified weights are set to 1.

314 

315 
References

316 


317 
.. [1] Centrality Measures Based on Current Flow.

318 
Ulrik Brandes and Daniel Fleischer,

319 
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).

320 
LNCS 3404, pp. 533544. SpringerVerlag, 2005.

321 
http://algo.unikonstanz.de/publications/bfcmbcf05.pdf

322 

323 
.. [2] A measure of betweenness centrality based on random walks,

324 
M. E. J. Newman, Social Networks 27, 3954 (2005).

325 
"""

326 
from networkx.utils import reverse_cuthill_mckee_ordering 
327 
try:

328 
import numpy as np 
329 
except ImportError: 
330 
raise ImportError('current_flow_betweenness_centrality requires NumPy ', 
331 
'http://scipy.org/')

332 
try:

333 
import scipy 
334 
except ImportError: 
335 
raise ImportError('current_flow_betweenness_centrality requires SciPy ', 
336 
'http://scipy.org/')

337 
if not nx.is_connected(G): 
338 
raise nx.NetworkXError("Graph not connected.") 
339 
n = G.number_of_nodes() 
340 
ordering = list(reverse_cuthill_mckee_ordering(G))

341 
# make a copy with integer labels according to rcm ordering

342 
# this could be done without a copy if we really wanted to

343 
H = nx.relabel_nodes(G, dict(zip(ordering, range(n)))) 
344 
edges = (tuple(sorted((u, v))) for u, v in H.edges()) 
345 
betweenness = dict.fromkeys(edges, 0.0) 
346 
if normalized:

347 
nb = (n  1.0) * (n  2.0) # normalization factor 
348 
else:

349 
nb = 2.0

350 
for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype, 
351 
solver=solver): 
352 
pos = dict(zip(row.argsort()[::1], range(1, n + 1))) 
353 
for i in range(n): 
354 
betweenness[e] += (i + 1  pos[i]) * row[i]

355 
betweenness[e] += (n  i  pos[i]) * row[i] 
356 
betweenness[e] /= nb 
357 
return dict(((ordering[s], ordering[t]), float(v)) 
358 
for (s, t), v in betweenness.items()) 
359  
360  
361 
# fixture for nose tests

362 
def setup_module(module): 
363 
from nose import SkipTest 
364 
try:

365 
import numpy 
366 
import scipy 
367 
except:

368 
raise SkipTest("NumPy not available") 