## Revision 3c6ce57c fiddle/heuristic-betweenness-centrality/betweenness_centrality.py

View differences:

fiddle/heuristic-betweenness-centrality/betweenness_centrality.py
21 21
           'edge_betweenness']

22 22

23 23

24
def betweenness_centrality(G, traffic_matrix, k=None, normalized=True, weight=None,

25
                           endpoints=False,

24
def weight_betweenness_centrality(G, traffic_matrix=None, k=None, normalized=True, weight=None, rescale=False,

26 25
                           seed=None):

27 26
    r"""Compute the shortest-path betweenness centrality for nodes.

28 27

......
74 73

75 74
    Notes

76 75
    -----

77
    The algorithm is from Ulrik Brandes [1]_.

76
    The algorithm is from Rami Puzis [1]_.

78 77
    See [4]_ for the original first published version and [2]_ for details on

79 78
    algorithms for variations and related metrics.

80 79

81
    For approximate betweenness calculations set k=#samples to use

82
    k nodes ("pivots") to estimate the betweenness values. For an estimate

83
    of the number of pivots needed see [3]_.

80
    For the faster way to calculate BC using combinatorial path counting, see [3]_

84 81

85 82
    For weighted graphs the edge weights must be greater than zero.

86 83
    Zero edge weights can produce an infinite number of equal length

87 84
    paths between pairs of nodes.

88 85

86
    Check out [5]_ for the details on how to implement the _accumulate_endpoints

87

89 88
    References

90 89
    ----------

91
    .. [1] Ulrik Brandes:

92
       A Faster Algorithm for Betweenness Centrality.

93
       Journal of Mathematical Sociology 25(2):163-177, 2001.

94
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf

90
    .. [1] Rami Puzis:

91

95 92
    .. [2] Ulrik Brandes:

96 93
       On Variants of Shortest-Path Betweenness

97 94
       Centrality and their Generic Computation.

98 95
       Social Networks 30(2):136-145, 2008.

99 96
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf

100 97
    .. [3] Ulrik Brandes and Christian Pich:

101
       Centrality Estimation in Large Networks.

102
       International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.

103
       http://www.inf.uni-konstanz.de/algo/publications/bp-celn-06.pdf

98
       A Faster Algorithm for Betweenness Centrality.

99
       Journal of Mathematical Sociology 25(2):163-177, 2001.

100
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf

104 101
    .. [4] Linton C. Freeman:

105 102
       A set of measures of centrality based on betweenness.

106 103
       Sociometry 40: 35–41, 1977

107 104
       http://moreno.ss.uci.edu/23.pdf

108 105

106
       [5] Rami Puzis

107
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

108

109 109
    """

110 110
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G

111 111
    if k is None:

......
115 115
        nodes = random.sample(G.nodes(), k)

116 116
    vertices = G.nodes()

117 117

118
    print 'nodes = %s' % nodes

119
    print 'vertices = %s' % vertices

118
    # print 'nodes = %s' % nodes

119
    # print 'vertices = %s' % vertices

120 120
    for s in nodes:

121 121
        # single source shortest paths

122
        # print '\nxxx _single_source_shortest_path'

122 123
        if weight is None:  # use BFS

123 124
            S, P, sigma = _single_source_shortest_path_basic(G, s)

124 125
        else:  # use Dijkstra's algorithm

125 126
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)

126 127

127
        print '\nxxx _single_source_shortest_path'

128
        print s

129
        print S

130
        print P

131
        print sigma

128
        # print s

129
        # print S

130
        # print P

131
        # print sigma

132 132
        # accumulation

133
        if endpoints:

134
            betweenness = _accumulate_endpoints(betweenness, S, P, sigma, s, traffic_matrix, vertices)

133
        if traffic_matrix:

134
            betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, vertices)

135 135
        else:

136
            betweenness = _accumulate_basic(betweenness, S, P, sigma, s, traffic_matrix, vertices)

136
            betweenness = _accumulate_weight_basic(betweenness, S, P, sigma, s)

137

138
    print 'betweenness = %s' % betweenness

139
    if rescale:

140
        betweenness = _rescale(betweenness, len(G),

141
                   normalized=normalized,

142
                   directed=G.is_directed(),

143
                   k=k)

137 144
    # Dec 9, 2015 - try to remove rescaling

138 145
    # rescaling

139 146
    # betweenness = _rescale(betweenness, len(G),

......
143 150
    return betweenness

144 151

145 152

146
def edge_betweenness_centrality(G, k=None, normalized=True, weight=None,

147
                                seed=None):

148
    r"""Compute betweenness centrality for edges.

149

150
    Betweenness centrality of an edge e is the sum of the

151
    fraction of all-pairs shortest paths that pass through e:

152

153
    .. math::

154

155
       c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}

156

157
    where V is the set of nodes,\sigma(s, t) is the number of

158
    shortest (s, t)-paths, and \sigma(s, t|e) is the number of

159
    those paths passing through edge e [2]_.

160

161
    Parameters

162
    ----------

163
    G : graph

164
      A NetworkX graph

165

166
    k : int, optional (default=None)

167
      If k is not None use k node samples to estimate betweenness.

168
      The value of k <= n where n is the number of nodes in the graph.

169
      Higher values give better approximation.

170

171
    normalized : bool, optional

172
      If True the betweenness values are normalized by 2/(n(n-1))

173
      for graphs, and 1/(n(n-1)) for directed graphs where n

174
      is the number of nodes in G.

175

176
    weight : None or string, optional

177
      If None, all edge weights are considered equal.

178
      Otherwise holds the name of the edge attribute used as weight.

179

180
    Returns

181
    -------

182
    edges : dictionary

183
       Dictionary of edges with betweenness centrality as the value.

184

185
    See Also

186
    --------

187
    betweenness_centrality

188
    edge_load

189

190
    Notes

191
    -----

192
    The algorithm is from Ulrik Brandes [1]_.

193

194
    For weighted graphs the edge weights must be greater than zero.

195
    Zero edge weights can produce an infinite number of equal length

196
    paths between pairs of nodes.

197

198
    References

199
    ----------

200
    .. [1]  A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,

201
       Journal of Mathematical Sociology 25(2):163-177, 2001.

202
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf

203
    .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness

204
       Centrality and their Generic Computation.

205
       Social Networks 30(2):136-145, 2008.

206
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf

207
    """

208
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G

209
    # b[e]=0 for e in G.edges()

210
    betweenness.update(dict.fromkeys(G.edges(), 0.0))

211
    if k is None:

212
        nodes = G

213
    else:

214
        random.seed(seed)

215
        nodes = random.sample(G.nodes(), k)

216
    for s in nodes:

217
        # single source shortest paths

218
        if weight is None:  # use BFS

219
            S, P, sigma = _single_source_shortest_path_basic(G, s)

220
        else:  # use Dijkstra's algorithm

221
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)

222
        # accumulation

223
        betweenness = _accumulate_edges(betweenness, S, P, sigma, s)

224

225
    # rescaling

226
    for n in G:  # remove nodes to only return edges

227
        del betweenness[n]

228

229
    betweenness = _rescale_e(betweenness, len(G),

230
                             normalized=normalized,

231
                             directed=G.is_directed())

232
    return betweenness

233

234
# obsolete name

235

236

237
def edge_betweenness(G, k=None, normalized=True, weight=None, seed=None):

238
    return edge_betweenness_centrality(G, k, normalized, weight, seed)

239

240

241
# helpers for betweenness centrality

242

243 153
def _single_source_shortest_path_basic(G, s):

244 154
    S = []

245 155
    P = {}

......
251 161
    D[s] = 0

252 162
    Q = [s]

253 163
    while Q:   # use BFS to find shortest paths

164
        # print "sigma = %s" % sigma

254 165
        v = Q.pop(0)

255 166
        S.append(v)

256 167
        Dv = D[v]

......
274 185
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G

275 186
    D = {}

276 187
    sigma[s] = 1.0

188
    print sigma

277 189
    push = heappush

278 190
    pop = heappop

279 191
    seen = {s: 0}

......
325 237
        if w != s:

326 238
            # print h

327 239
            # betweenness[w] += delta[w] * h

328
            betweenness[w] += delta[w] * 1

240
            betweenness[w] += delta[w]

329 241
            print 'betweenness = %s' % betweenness

330 242
    return betweenness

331 243

332 244

333
def _accumulate_endpoints(betweenness, S, P, sigma, s, traffic_matrix, nodes):

334
    betweenness[s] += len(S) - 1

245
def _accumulate_weight_basic(betweenness, S, P, sigma, s):

246
    r"""Accumlate the BC score.

247

248
    Implements the Algorithm 1 in [1]_ (line 15-21)

249

250
    In this one, the traffic_matrix is not provided. But we can deduce the value:

251
        h(s,v) = 1   if s != v

252
        h(s, v) = 0  if s == v

253

254
    .. [1] Rami Puzis

255
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

256
    """

257
    # betweenness[s] += len(S) - 1

335 258
    delta = dict.fromkeys(S, 0)

336 259
    while S:

337 260
        w = S.pop()

338
        coeff = (1.0 + delta[w]) / sigma[w]

339
        # coeff = (delta[w]) / sigma[w]

340 261
        if w != s:

341
            h = _get_value_from_traffic_matrix(nodes, traffic_matrix, w, s)

342
            # h = 1

343
            # betweenness[w] += (delta[w] + 1) + h

344
            betweenness[w] += delta[w] + h

345
            print 'betweenness = %s' % betweenness

262
            delta[w] += 1

263
        coeff = delta[w] / sigma[w]

264
        for v in P[w]:

265
            delta[v] += sigma[v] * coeff

266
        if w != s:

267
            betweenness[w] += delta[w]

268
            # print 'out betweenness = %s' % betweenness

269
    return betweenness

270

271
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, nodes):

272
    r"""Accumlate the BC score.

273

274
    Implements the Algorithm 1 in [1]_ (line 15-21)

275

276
    .. [1] Rami Puzis

277
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures

278
    """

279
    # betweenness[s] += len(S) - 1

280
    delta = dict.fromkeys(S, 0)

281
    while S:

282
        # print 'in betweenness = %s' % betweenness

283
        w = S.pop()

284
        h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w)

285
        delta[w] += h

286
        coeff = delta[w] / sigma[w]

287
        # print "  coeff = %s" % coeff

346 288
        for v in P[w]:

289
            # print "  v = %s" % v

290
            # print "    h = %s" % h

347 291
            delta[v] += sigma[v] * coeff

292
            # print '  delta = %s' % delta

293
        if w != s:

294
            betweenness[w] += delta[w]

295
            # print 'betweenness = %s' % betweenness

348 296
    return betweenness

349 297

350 298

......
377 325
        else:

378 326
            scale = None

379 327
    if scale is not None:

328
        print 'scale = %s' % scale

380 329
        if k is not None:

381 330
            scale = scale * n / k

382 331
        for v in betweenness:


Also available in: Unified diff