# coding=utf8
"""
Betweenness centrality measures.
WITH MODIFICATION by quynhxq - Dec 8, 2015.
This version also include the traffic matrix
"""
# Copyright (C) 2004-2015 by
# Aric Hagberg
# Dan Schult
# Pieter Swart
# All rights reserved.
# BSD license.
from heapq import heappush, heappop
from itertools import count
import networkx as nx
import random
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""
__all__ = ['betweenness_centrality',
'edge_betweenness_centrality',
'edge_betweenness']
def weight_betweenness_centrality(G, traffic_matrix=None, cut_without=0, k=None, normalized=True, weight=None, rescale=False,
seed=None):
r"""Compute the shortest-path betweenness centrality for nodes.
Betweenness centrality of a node `v` is the sum of the
fraction of all-pairs shortest paths that pass through `v`:
.. math::
c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
where `V` is the set of nodes, `\sigma(s, t)` is the number of
shortest `(s, t)`-paths, and `\sigma(s, t|v)` is the number of those
paths passing through some node `v` other than `s, t`.
If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,
`\sigma(s, t|v) = 0` [2]_.
Parameters
----------
G : graph
A NetworkX graph
h :
k : int, optional (default=None)
If k is not None use k node samples to estimate betweenness.
The value of k <= n where n is the number of nodes in the graph.
Higher values give better approximation.
normalized : bool, optional
If True the betweenness values are normalized by `2/((n-1)(n-2))`
for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
is the number of nodes in G.
weight : None or string, optional
If None, all edge weights are considered equal.
Otherwise holds the name of the edge attribute used as weight.
endpoints : bool, optional
If True include the endpoints in the shortest path counts.
Returns
-------
nodes : dictionary
Dictionary of nodes with betweenness centrality as the value.
See Also
--------
edge_betweenness_centrality
load_centrality
Notes
-----
The algorithm is from Rami Puzis [1]_.
See [4]_ for the original first published version and [2]_ for details on
algorithms for variations and related metrics.
For the faster way to calculate BC using combinatorial path counting, see [3]_
For weighted graphs the edge weights must be greater than zero.
Zero edge weights can produce an infinite number of equal length
paths between pairs of nodes.
Check out [5]_ for the details on how to implement the _accumulate_endpoints
References
----------
.. [1] Rami Puzis:
.. [2] Ulrik Brandes:
On Variants of Shortest-Path Betweenness
Centrality and their Generic Computation.
Social Networks 30(2):136-145, 2008.
http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
.. [3] Ulrik Brandes and Christian Pich:
A Faster Algorithm for Betweenness Centrality.
Journal of Mathematical Sociology 25(2):163-177, 2001.
http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
.. [4] Linton C. Freeman:
A set of measures of centrality based on betweenness.
Sociometry 40: 35–41, 1977
http://moreno.ss.uci.edu/23.pdf
[5] Rami Puzis
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
"""
betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
if k is None:
nodes = G
else:
random.seed(seed)
nodes = random.sample(G.nodes(), k)
vertices = sorted(G.nodes())
for s in nodes:
if weight is None: # use BFS
S, P, sigma = _single_source_shortest_path_basic(G, s)
else: # use Dijkstra's algorithm
S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
# accumulation
if traffic_matrix:
betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, cut_without, vertices)
else:
betweenness = _accumulate_stress(betweenness, S, P, sigma, s)
if rescale:
betweenness = _rescale(betweenness, len(G),
normalized=normalized,
directed=G.is_directed(),
k=k)
# Dec 9, 2015 - try to remove rescaling
# rescaling
# betweenness = _rescale(betweenness, len(G),
# normalized=normalized,
# directed=G.is_directed(),
# k=k)
return betweenness
def _single_source_shortest_path_basic(G, s):
S = []
P = {}
for v in G:
P[v] = []
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
D = {}
sigma[s] = 1.0
D[s] = 0
Q = [s]
while Q: # use BFS to find shortest paths
# print "sigma = %s" % sigma
v = Q.pop(0)
S.append(v)
Dv = D[v]
sigmav = sigma[v]
for w in G[v]:
if w not in D:
Q.append(w)
D[w] = Dv + 1
if D[w] == Dv + 1: # this is a shortest path, count paths
sigma[w] += sigmav
P[w].append(v) # predecessors
return S, P, sigma
def _single_source_dijkstra_path_basic(G, s, weight='weight'):
# modified from Eppstein
S = []
P = {}
for v in G:
P[v] = []
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
D = {}
sigma[s] = 1.0
push = heappush
pop = heappop
seen = {s: 0}
c = count()
Q = [] # use Q as heap with (distance,node id) tuples
push(Q, (0, next(c), s, s))
while Q:
(dist, _, pred, v) = pop(Q)
if v in D:
continue # already searched this node.
sigma[v] += sigma[pred] # count paths
S.append(v)
D[v] = dist
for w, edgedata in G[v].items():
vw_dist = dist + edgedata.get(weight, 1)
if w not in D and (w not in seen or vw_dist < seen[w]):
seen[w] = vw_dist
push(Q, (vw_dist, next(c), v, w))
sigma[w] = 0.0
P[w] = [v]
elif vw_dist == seen[w]: # handle equal paths
sigma[w] += sigma[v]
P[w].append(v)
return S, P, sigma
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y):
x_pos = nodes.index(x)
y_pos = nodes.index(y)
try:
return traffic_matrix[x_pos][y_pos]
except:
print "ERROR in get_value_from_traffic_matrix"
return -1
def _accumulate_stress(betweenness,S,P,sigma,s):
delta = dict.fromkeys(S,0)
while S:
w = S.pop()
for v in P[w]:
delta[v] += (1.0+delta[w])
if w != s:
betweenness[w] += sigma[w]*delta[w]
return betweenness
def _accumulate_weight_basic(betweenness, S, P, sigma, s):
r"""Accumlate the BC score.
Implements the Algorithm 1 in [1]_ (line 15-21)
In this one, the traffic_matrix is not provided. But we can deduce the value:
h(s,v) = 1 if s != v
h(s, v) = 0 if s == v
.. [1] Rami Puzis
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
"""
delta = dict.fromkeys(S, 0)
while S:
w = S.pop()
# if w != s:
delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1
coeff = delta[w] / sigma[w]
for v in P[w]:
delta[v] += sigma[v] * coeff
if w != s:
betweenness[w] += delta[w]
return betweenness
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, cut_without, nodes):
r"""Accumlate the BC score.
Implements the Algorithm 1 in [1]_ (line 15-21)
.. [1] Rami Puzis
Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
"""
delta = dict.fromkeys(S, 0)
while S:
w = S.pop()
h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w)
betweenness[s] += h
delta[w] += h
coeff = delta[w] / sigma[w]
for v in P[w]:
delta[v] += sigma[v] * coeff
if w != s:
betweenness[w] += delta[w]
return betweenness
def _accumulate_edges(betweenness, S, P, sigma, s):
delta = dict.fromkeys(S, 0)
while S:
w = S.pop()
coeff = (1.0 + delta[w]) / sigma[w]
for v in P[w]:
c = sigma[v] * coeff
if (v, w) not in betweenness:
betweenness[(w, v)] += c
else:
betweenness[(v, w)] += c
delta[v] += c
if w != s:
betweenness[w] += delta[w]
return betweenness
def _rescale(betweenness, n, normalized, directed=False, k=None):
if normalized is True:
if n <= 2:
scale = None # no normalization b=0 for all nodes
else:
scale = 1.0 / ((n - 1) * (n - 2))
else: # rescale by 2 for undirected graphs
if not directed:
scale = 1.0 / 2.0
else:
scale = None
if scale is not None:
print 'scale = %s' % scale
if k is not None:
scale = scale * n / k
for v in betweenness:
betweenness[v] *= scale
return betweenness
def _rescale_e(betweenness, n, normalized, directed=False, k=None):
if normalized is True:
if n <= 1:
scale = None # no normalization b=0 for all nodes
else:
scale = 1.0 / (n * (n - 1))
else: # rescale by 2 for undirected graphs
if not directed:
scale = 1.0 / 2.0
else:
scale = None
if scale is not None:
if k is not None:
scale = scale * n / k
for v in betweenness:
betweenness[v] *= scale
return betweenness