Revision 3c6ce57c fiddle/heuristic-betweenness-centrality/betweenness_centrality.py

View differences:

fiddle/heuristic-betweenness-centrality/betweenness_centrality.py
21 21
           'edge_betweenness']
22 22

  
23 23

  
24
def betweenness_centrality(G, traffic_matrix, k=None, normalized=True, weight=None,
25
                           endpoints=False,
24
def weight_betweenness_centrality(G, traffic_matrix=None, k=None, normalized=True, weight=None, rescale=False,
26 25
                           seed=None):
27 26
    r"""Compute the shortest-path betweenness centrality for nodes.
28 27

  
......
74 73

  
75 74
    Notes
76 75
    -----
77
    The algorithm is from Ulrik Brandes [1]_.
76
    The algorithm is from Rami Puzis [1]_.
78 77
    See [4]_ for the original first published version and [2]_ for details on
79 78
    algorithms for variations and related metrics.
80 79

  
81
    For approximate betweenness calculations set k=#samples to use
82
    k nodes ("pivots") to estimate the betweenness values. For an estimate
83
    of the number of pivots needed see [3]_.
80
    For the faster way to calculate BC using combinatorial path counting, see [3]_
84 81

  
85 82
    For weighted graphs the edge weights must be greater than zero.
86 83
    Zero edge weights can produce an infinite number of equal length
87 84
    paths between pairs of nodes.
88 85

  
86
    Check out [5]_ for the details on how to implement the _accumulate_endpoints
87

  
89 88
    References
90 89
    ----------
91
    .. [1] Ulrik Brandes:
92
       A Faster Algorithm for Betweenness Centrality.
93
       Journal of Mathematical Sociology 25(2):163-177, 2001.
94
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
90
    .. [1] Rami Puzis:
91

  
95 92
    .. [2] Ulrik Brandes:
96 93
       On Variants of Shortest-Path Betweenness
97 94
       Centrality and their Generic Computation.
98 95
       Social Networks 30(2):136-145, 2008.
99 96
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
100 97
    .. [3] Ulrik Brandes and Christian Pich:
101
       Centrality Estimation in Large Networks.
102
       International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
103
       http://www.inf.uni-konstanz.de/algo/publications/bp-celn-06.pdf
98
       A Faster Algorithm for Betweenness Centrality.
99
       Journal of Mathematical Sociology 25(2):163-177, 2001.
100
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
104 101
    .. [4] Linton C. Freeman:
105 102
       A set of measures of centrality based on betweenness.
106 103
       Sociometry 40: 35–41, 1977
107 104
       http://moreno.ss.uci.edu/23.pdf
108 105

  
106
       [5] Rami Puzis
107
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
108

  
109 109
    """
110 110
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
111 111
    if k is None:
......
115 115
        nodes = random.sample(G.nodes(), k)
116 116
    vertices = G.nodes()
117 117

  
118
    print 'nodes = %s' % nodes
119
    print 'vertices = %s' % vertices
118
    # print 'nodes = %s' % nodes
119
    # print 'vertices = %s' % vertices
120 120
    for s in nodes:
121 121
        # single source shortest paths
122
        # print '\nxxx _single_source_shortest_path'
122 123
        if weight is None:  # use BFS
123 124
            S, P, sigma = _single_source_shortest_path_basic(G, s)
124 125
        else:  # use Dijkstra's algorithm
125 126
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
126 127

  
127
        print '\nxxx _single_source_shortest_path'
128
        print s
129
        print S
130
        print P
131
        print sigma
128
        # print s
129
        # print S
130
        # print P
131
        # print sigma
132 132
        # accumulation
133
        if endpoints:
134
            betweenness = _accumulate_endpoints(betweenness, S, P, sigma, s, traffic_matrix, vertices)
133
        if traffic_matrix:
134
            betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, vertices)
135 135
        else:
136
            betweenness = _accumulate_basic(betweenness, S, P, sigma, s, traffic_matrix, vertices)
136
            betweenness = _accumulate_weight_basic(betweenness, S, P, sigma, s)
137

  
138
    print 'betweenness = %s' % betweenness
139
    if rescale:
140
        betweenness = _rescale(betweenness, len(G),
141
                   normalized=normalized,
142
                   directed=G.is_directed(),
143
                   k=k)
137 144
    # Dec 9, 2015 - try to remove rescaling
138 145
    # rescaling
139 146
    # betweenness = _rescale(betweenness, len(G),
......
143 150
    return betweenness
144 151

  
145 152

  
146
def edge_betweenness_centrality(G, k=None, normalized=True, weight=None,
147
                                seed=None):
148
    r"""Compute betweenness centrality for edges.
149

  
150
    Betweenness centrality of an edge `e` is the sum of the
151
    fraction of all-pairs shortest paths that pass through `e`:
152

  
153
    .. math::
154

  
155
       c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
156

  
157
    where `V` is the set of nodes,`\sigma(s, t)` is the number of
158
    shortest `(s, t)`-paths, and `\sigma(s, t|e)` is the number of
159
    those paths passing through edge `e` [2]_.
160

  
161
    Parameters
162
    ----------
163
    G : graph
164
      A NetworkX graph
165

  
166
    k : int, optional (default=None)
167
      If k is not None use k node samples to estimate betweenness.
168
      The value of k <= n where n is the number of nodes in the graph.
169
      Higher values give better approximation.
170

  
171
    normalized : bool, optional
172
      If True the betweenness values are normalized by `2/(n(n-1))`
173
      for graphs, and `1/(n(n-1))` for directed graphs where `n`
174
      is the number of nodes in G.
175

  
176
    weight : None or string, optional
177
      If None, all edge weights are considered equal.
178
      Otherwise holds the name of the edge attribute used as weight.
179

  
180
    Returns
181
    -------
182
    edges : dictionary
183
       Dictionary of edges with betweenness centrality as the value.
184

  
185
    See Also
186
    --------
187
    betweenness_centrality
188
    edge_load
189

  
190
    Notes
191
    -----
192
    The algorithm is from Ulrik Brandes [1]_.
193

  
194
    For weighted graphs the edge weights must be greater than zero.
195
    Zero edge weights can produce an infinite number of equal length
196
    paths between pairs of nodes.
197

  
198
    References
199
    ----------
200
    .. [1]  A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,
201
       Journal of Mathematical Sociology 25(2):163-177, 2001.
202
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
203
    .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
204
       Centrality and their Generic Computation.
205
       Social Networks 30(2):136-145, 2008.
206
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
207
    """
208
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
209
    # b[e]=0 for e in G.edges()
210
    betweenness.update(dict.fromkeys(G.edges(), 0.0))
211
    if k is None:
212
        nodes = G
213
    else:
214
        random.seed(seed)
215
        nodes = random.sample(G.nodes(), k)
216
    for s in nodes:
217
        # single source shortest paths
218
        if weight is None:  # use BFS
219
            S, P, sigma = _single_source_shortest_path_basic(G, s)
220
        else:  # use Dijkstra's algorithm
221
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
222
        # accumulation
223
        betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
224

  
225
    # rescaling
226
    for n in G:  # remove nodes to only return edges
227
        del betweenness[n]
228

  
229
    betweenness = _rescale_e(betweenness, len(G),
230
                             normalized=normalized,
231
                             directed=G.is_directed())
232
    return betweenness
233

  
234
# obsolete name
235

  
236

  
237
def edge_betweenness(G, k=None, normalized=True, weight=None, seed=None):
238
    return edge_betweenness_centrality(G, k, normalized, weight, seed)
239

  
240

  
241
# helpers for betweenness centrality
242

  
243 153
def _single_source_shortest_path_basic(G, s):
244 154
    S = []
245 155
    P = {}
......
251 161
    D[s] = 0
252 162
    Q = [s]
253 163
    while Q:   # use BFS to find shortest paths
164
        # print "sigma = %s" % sigma
254 165
        v = Q.pop(0)
255 166
        S.append(v)
256 167
        Dv = D[v]
......
274 185
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
275 186
    D = {}
276 187
    sigma[s] = 1.0
188
    print sigma
277 189
    push = heappush
278 190
    pop = heappop
279 191
    seen = {s: 0}
......
325 237
        if w != s:
326 238
            # print h
327 239
            # betweenness[w] += delta[w] * h
328
            betweenness[w] += delta[w] * 1
240
            betweenness[w] += delta[w]
329 241
            print 'betweenness = %s' % betweenness
330 242
    return betweenness
331 243

  
332 244

  
333
def _accumulate_endpoints(betweenness, S, P, sigma, s, traffic_matrix, nodes):
334
    betweenness[s] += len(S) - 1
245
def _accumulate_weight_basic(betweenness, S, P, sigma, s):
246
    r"""Accumlate the BC score.
247

  
248
    Implements the Algorithm 1 in [1]_ (line 15-21)
249

  
250
    In this one, the traffic_matrix is not provided. But we can deduce the value:
251
        h(s,v) = 1   if s != v
252
        h(s, v) = 0  if s == v
253

  
254
    .. [1] Rami Puzis
255
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
256
    """
257
    # betweenness[s] += len(S) - 1
335 258
    delta = dict.fromkeys(S, 0)
336 259
    while S:
337 260
        w = S.pop()
338
        coeff = (1.0 + delta[w]) / sigma[w]
339
        # coeff = (delta[w]) / sigma[w]
340 261
        if w != s:
341
            h = _get_value_from_traffic_matrix(nodes, traffic_matrix, w, s)
342
            # h = 1
343
            # betweenness[w] += (delta[w] + 1) + h
344
            betweenness[w] += delta[w] + h
345
            print 'betweenness = %s' % betweenness
262
            delta[w] += 1
263
        coeff = delta[w] / sigma[w]
264
        for v in P[w]:
265
            delta[v] += sigma[v] * coeff
266
        if w != s:
267
            betweenness[w] += delta[w]
268
            # print 'out betweenness = %s' % betweenness
269
    return betweenness
270

  
271
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, nodes):
272
    r"""Accumlate the BC score.
273

  
274
    Implements the Algorithm 1 in [1]_ (line 15-21)
275

  
276
    .. [1] Rami Puzis
277
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
278
    """
279
    # betweenness[s] += len(S) - 1
280
    delta = dict.fromkeys(S, 0)
281
    while S:
282
        # print 'in betweenness = %s' % betweenness
283
        w = S.pop()
284
        h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w)
285
        delta[w] += h
286
        coeff = delta[w] / sigma[w]
287
        # print "  coeff = %s" % coeff
346 288
        for v in P[w]:
289
            # print "  v = %s" % v
290
            # print "    h = %s" % h
347 291
            delta[v] += sigma[v] * coeff
292
            # print '  delta = %s' % delta
293
        if w != s:
294
            betweenness[w] += delta[w]
295
            # print 'betweenness = %s' % betweenness
348 296
    return betweenness
349 297

  
350 298

  
......
377 325
        else:
378 326
            scale = None
379 327
    if scale is not None:
328
        print 'scale = %s' % scale
380 329
        if k is not None:
381 330
            scale = scale * n / k
382 331
        for v in betweenness:

Also available in: Unified diff