Revision bd3d6dca

View differences:

globecomm/analyse_snapshot.py
1
import os
2
import sys
3
from collections import defaultdict
4
import numpy as np
5
import scipy
6
from scipy import stats
7
import matplotlib.pyplot as plt
8

  
9
from pdb import set_trace as debugger
10

  
11
class Experiment():
12
    def __init__(self, num_of_nodes, num_of_snapshots):
13
        self.scores = np.zeros((num_of_snapshots, num_of_nodes))
14
        self.index_of_snapshot = 0
15
        self.num_of_nodes = num_of_nodes
16
        self.num_of_snapshots = num_of_snapshots
17

  
18
    def add_new_result(self, in_filepath):
19
        if self.index_of_snapshot == self.num_of_snapshots:
20
            print "ERROR: the number of snapshots provided is less than the input files"
21
            sys.exit()
22

  
23
        data = np.loadtxt(in_filepath, delimiter=',', dtype={
24
                'names': ('node_id', 'score'),
25
                'formats': ('i4', 'f4')
26
            })
27

  
28
        for row in data:
29
            self.update_score(row[0], row[1])
30

  
31
        self.index_of_snapshot += 1
32

  
33
    def update_score(self, node_id, score):
34
        self.scores[self.index_of_snapshot][node_id] = score
35

  
36
    def summarize(self):
37
        np.average(self.scores, axis=1)
38

  
39
    def spearman_rank_correlation_coef(self):
40
        self.time_diff = defaultdict(list)
41
        for j in range(self.num_of_snapshots):
42
            for i in range(j, self.num_of_snapshots):
43
                diff = scipy.stats.spearmanr(self.scores[j], self.scores[i])
44
                self.time_diff[i-j].append(diff[0])
45

  
46
        max_key = max(self.time_diff.keys()) + 1
47
        min_diff = [0 for i in range(max_key)]
48
        max_diff = [0 for i in range(max_key)]
49
        mean_diff = [0 for i in range(max_key)]
50

  
51
        for i, value in self.time_diff.iteritems():
52
            min_diff[i] = np.min(value)
53
            max_diff[i] = np.max(value)
54
            mean_diff[i] = np.mean(value)
55

  
56
        # Plot
57
        x_range = sorted(self.time_diff.keys())
58
        plt.plot(x_range, min_diff, label='min')
59
        plt.plot(x_range, mean_diff, label='mean')
60
        plt.plot(x_range, max_diff, label='max')
61

  
62
        plt.ylabel('Spearman rank correlation coefficient')
63
        plt.xlabel('time diff (?)')
64
        plt.legend()
65
        plt.title('FFGraz')
66
        plt.show()
67

  
68

  
69
def all_files_for_network(network_name, dir):
70
    files = []
71
    for file in os.listdir(dir):
72
        prefix = file.split('_')[0]
73
        if prefix == network_name:
74
            files.append(os.path.join(dir, file))
75

  
76
    return files
77

  
78
def main():
79
    dir = 'output'
80
    network = 'FFGraz'
81
    files = all_files_for_network(network, dir)
82
    num_of_snapshots = len(files)
83
    num_of_nodes = 200
84
    exp = Experiment(num_of_nodes, num_of_snapshots)
85
    for file in files:
86
        exp.add_new_result(file)
87

  
88
    exp.summarize()
89
    exp.spearman_rank_correlation_coef()
90

  
91
if __name__ == '__main__':
92
    main()
globecomm/docs/README.md
1
The input folder contains graph edge files:
2

  
3
input_graphs
4
    |--- FFGraz_1.edges
5
    |--- FFGraz_2.edges
6
    |--- ninux_1.edges
7
    |--- <graph name>_<snapshot id>
8

  
9
The experiment will use HBC to calculate betweenness centrality for every *.edges files. The filename contains information that needed to classify the BC calculation.
10

  
globecomm/heuristic_bc/betweenness_centrality.py
1
# coding=utf8
2
"""
3
Betweenness centrality measures.
4
WITH MODIFICATION by quynhxq - Dec 8, 2015.
5
This version also include the traffic matrix
6
"""
7
#    Copyright (C) 2004-2015 by
8
#    Aric Hagberg <hagberg@lanl.gov>
9
#    Dan Schult <dschult@colgate.edu>
10
#    Pieter Swart <swart@lanl.gov>
11
#    All rights reserved.
12
#    BSD license.
13
from heapq import heappush, heappop
14
from itertools import count
15
import networkx as nx
16
import random
17
__author__ = """Aric Hagberg (hagberg@lanl.gov)"""
18

  
19
__all__ = ['betweenness_centrality',
20
           'edge_betweenness_centrality',
21
           'edge_betweenness']
22

  
23

  
24
def weight_betweenness_centrality(G, traffic_matrix=None, cut_without=0, k=None, normalized=True, weight=None, rescale=False,
25
                           seed=None):
26
    r"""Compute the shortest-path betweenness centrality for nodes.
27

  
28
    Betweenness centrality of a node `v` is the sum of the
29
    fraction of all-pairs shortest paths that pass through `v`:
30

  
31
    .. math::
32

  
33
       c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
34

  
35
    where `V` is the set of nodes, `\sigma(s, t)` is the number of
36
    shortest `(s, t)`-paths,  and `\sigma(s, t|v)` is the number of those
37
    paths  passing through some  node `v` other than `s, t`.
38
    If `s = t`, `\sigma(s, t) = 1`, and if `v \in {s, t}`,
39
    `\sigma(s, t|v) = 0` [2]_.
40

  
41
    Parameters
42
    ----------
43
    G : graph
44
      A NetworkX graph
45

  
46
    h :
47
    k : int, optional (default=None)
48
      If k is not None use k node samples to estimate betweenness.
49
      The value of k <= n where n is the number of nodes in the graph.
50
      Higher values give better approximation.
51

  
52
    normalized : bool, optional
53
      If True the betweenness values are normalized by `2/((n-1)(n-2))`
54
      for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
55
      is the number of nodes in G.
56

  
57
    weight : None or string, optional
58
      If None, all edge weights are considered equal.
59
      Otherwise holds the name of the edge attribute used as weight.
60

  
61
    endpoints : bool, optional
62
      If True include the endpoints in the shortest path counts.
63

  
64
    Returns
65
    -------
66
    nodes : dictionary
67
       Dictionary of nodes with betweenness centrality as the value.
68

  
69
    See Also
70
    --------
71
    edge_betweenness_centrality
72
    load_centrality
73

  
74
    Notes
75
    -----
76
    The algorithm is from Rami Puzis [1]_.
77
    See [4]_ for the original first published version and [2]_ for details on
78
    algorithms for variations and related metrics.
79

  
80
    For the faster way to calculate BC using combinatorial path counting, see [3]_
81

  
82
    For weighted graphs the edge weights must be greater than zero.
83
    Zero edge weights can produce an infinite number of equal length
84
    paths between pairs of nodes.
85

  
86
    Check out [5]_ for the details on how to implement the _accumulate_endpoints
87

  
88
    References
89
    ----------
90
    .. [1] Rami Puzis:
91

  
92
    .. [2] Ulrik Brandes:
93
       On Variants of Shortest-Path Betweenness
94
       Centrality and their Generic Computation.
95
       Social Networks 30(2):136-145, 2008.
96
       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
97
    .. [3] Ulrik Brandes and Christian Pich:
98
       A Faster Algorithm for Betweenness Centrality.
99
       Journal of Mathematical Sociology 25(2):163-177, 2001.
100
       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
101
    .. [4] Linton C. Freeman:
102
       A set of measures of centrality based on betweenness.
103
       Sociometry 40: 35–41, 1977
104
       http://moreno.ss.uci.edu/23.pdf
105

  
106
       [5] Rami Puzis
107
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
108

  
109
    """
110
    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
111
    if k is None:
112
        nodes = G
113
    else:
114
        random.seed(seed)
115
        nodes = random.sample(G.nodes(), k)
116
    vertices = sorted(G.nodes())
117

  
118
    for s in nodes:
119
        if weight is None:  # use BFS
120
            S, P, sigma = _single_source_shortest_path_basic(G, s)
121
        else:  # use Dijkstra's algorithm
122
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
123

  
124
        # accumulation
125
        if traffic_matrix:
126
            betweenness = _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, cut_without, vertices)
127
        else:
128
            betweenness = _accumulate_stress(betweenness, S, P, sigma, s)
129

  
130
    if rescale:
131
        betweenness = _rescale(betweenness, len(G),
132
                   normalized=normalized,
133
                   directed=G.is_directed(),
134
                   k=k)
135
    # Dec 9, 2015 - try to remove rescaling
136
    # rescaling
137
    # betweenness = _rescale(betweenness, len(G),
138
    #                        normalized=normalized,
139
    #                        directed=G.is_directed(),
140
    #                        k=k)
141
    return betweenness
142

  
143

  
144
def _single_source_shortest_path_basic(G, s):
145
    S = []
146
    P = {}
147
    for v in G:
148
        P[v] = []
149
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
150
    D = {}
151
    sigma[s] = 1.0
152
    D[s] = 0
153
    Q = [s]
154
    while Q:   # use BFS to find shortest paths
155
        # print "sigma = %s" % sigma
156
        v = Q.pop(0)
157
        S.append(v)
158
        Dv = D[v]
159
        sigmav = sigma[v]
160
        for w in G[v]:
161
            if w not in D:
162
                Q.append(w)
163
                D[w] = Dv + 1
164
            if D[w] == Dv + 1:   # this is a shortest path, count paths
165
                sigma[w] += sigmav
166
                P[w].append(v)  # predecessors
167
    return S, P, sigma
168

  
169

  
170
def _single_source_dijkstra_path_basic(G, s, weight='weight'):
171
    # modified from Eppstein
172
    S = []
173
    P = {}
174
    for v in G:
175
        P[v] = []
176
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
177
    D = {}
178
    sigma[s] = 1.0
179
    push = heappush
180
    pop = heappop
181
    seen = {s: 0}
182
    c = count()
183
    Q = []   # use Q as heap with (distance,node id) tuples
184
    push(Q, (0, next(c), s, s))
185
    while Q:
186
        (dist, _, pred, v) = pop(Q)
187
        if v in D:
188
            continue  # already searched this node.
189
        sigma[v] += sigma[pred]  # count paths
190
        S.append(v)
191
        D[v] = dist
192
        for w, edgedata in G[v].items():
193
            vw_dist = dist + edgedata.get(weight, 1)
194
            if w not in D and (w not in seen or vw_dist < seen[w]):
195
                seen[w] = vw_dist
196
                push(Q, (vw_dist, next(c), v, w))
197
                sigma[w] = 0.0
198
                P[w] = [v]
199
            elif vw_dist == seen[w]:  # handle equal paths
200
                sigma[w] += sigma[v]
201
                P[w].append(v)
202
    return S, P, sigma
203

  
204

  
205
def _get_value_from_traffic_matrix(nodes, traffic_matrix, x, y):
206
    x_pos = nodes.index(x)
207
    y_pos = nodes.index(y)
208
    try:
209
        return traffic_matrix[x_pos][y_pos]
210
    except:
211
        print "ERROR in get_value_from_traffic_matrix"
212
        return -1
213

  
214
def _accumulate_stress(betweenness,S,P,sigma,s):
215
    delta = dict.fromkeys(S,0)
216
    while S:
217
        w = S.pop()
218
        for v in P[w]:
219
            delta[v] += (1.0+delta[w])
220
        if w != s:
221
            betweenness[w] += sigma[w]*delta[w]
222
    return betweenness
223

  
224
def _accumulate_weight_basic(betweenness, S, P, sigma, s):
225
    r"""Accumlate the BC score.
226

  
227
    Implements the Algorithm 1 in [1]_ (line 15-21)
228

  
229
    In this one, the traffic_matrix is not provided. But we can deduce the value:
230
        h(s,v) = 1   if s != v
231
        h(s, v) = 0  if s == v
232

  
233
    .. [1] Rami Puzis
234
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
235
    """
236
    delta = dict.fromkeys(S, 0)
237
    while S:
238
        w = S.pop()
239
        # if w != s:
240
        delta[w] += 1 # this is the h, when w != s, then h(s, w) = 1
241
        coeff = delta[w] / sigma[w]
242
        for v in P[w]:
243
            delta[v] += sigma[v] * coeff
244
        if w != s:
245
            betweenness[w] += delta[w]
246
    return betweenness
247

  
248
def _accumulate_weight(betweenness, S, P, sigma, s, traffic_matrix, cut_without, nodes):
249
    r"""Accumlate the BC score.
250

  
251
    Implements the Algorithm 1 in [1]_ (line 15-21)
252

  
253
    .. [1] Rami Puzis
254
       Optimization of NIDS Placement for Protection of Intercommunicating Critical Infrastructures
255
    """
256
    delta = dict.fromkeys(S, 0)
257
    while S:
258
        w = S.pop()
259
        h = _get_value_from_traffic_matrix(nodes, traffic_matrix, s, w)
260
        betweenness[s] += h
261
        delta[w] += h
262

  
263
        coeff = delta[w] / sigma[w]
264

  
265
        for v in P[w]:
266
            delta[v] += sigma[v] * coeff
267
        if w != s:
268
            betweenness[w] += delta[w]
269
    return betweenness
270

  
271

  
272
def _accumulate_edges(betweenness, S, P, sigma, s):
273
    delta = dict.fromkeys(S, 0)
274
    while S:
275
        w = S.pop()
276
        coeff = (1.0 + delta[w]) / sigma[w]
277
        for v in P[w]:
278
            c = sigma[v] * coeff
279
            if (v, w) not in betweenness:
280
                betweenness[(w, v)] += c
281
            else:
282
                betweenness[(v, w)] += c
283
            delta[v] += c
284
        if w != s:
285
            betweenness[w] += delta[w]
286
    return betweenness
287

  
288

  
289
def _rescale(betweenness, n, normalized, directed=False, k=None):
290
    if normalized is True:
291
        if n <= 2:
292
            scale = None  # no normalization b=0 for all nodes
293
        else:
294
            scale = 1.0 / ((n - 1) * (n - 2))
295
    else:  # rescale by 2 for undirected graphs
296
        if not directed:
297
            scale = 1.0 / 2.0
298
        else:
299
            scale = None
300
    if scale is not None:
301
        print 'scale = %s' % scale
302
        if k is not None:
303
            scale = scale * n / k
304
        for v in betweenness:
305
            betweenness[v] *= scale
306
    return betweenness
307

  
308

  
309
def _rescale_e(betweenness, n, normalized, directed=False, k=None):
310
    if normalized is True:
311
        if n <= 1:
312
            scale = None  # no normalization b=0 for all nodes
313
        else:
314
            scale = 1.0 / (n * (n - 1))
315
    else:  # rescale by 2 for undirected graphs
316
        if not directed:
317
            scale = 1.0 / 2.0
318
        else:
319
            scale = None
320
    if scale is not None:
321
        if k is not None:
322
            scale = scale * n / k
323
        for v in betweenness:
324
            betweenness[v] *= scale
325
    return betweenness
globecomm/heuristic_bc/centrality_biconnected.py
1
# Implement the section IV in Puzis 2012 paper
2
# straight_lineent the section IV in Puzis 2012 paper
3
# Heuristic Betweenness Centrality - Partitioning to Bi-connected Components
4

  
5
import os
6
import sys
7
import pprint
8
import networkx as nx
9
import betweenness_centrality as centrality
10
import utility
11
from pdb import set_trace as debugger
12

  
13
MAIN_CODE_DIR = os.environ.get('MAIN_CODE_DIR', '')
14

  
15
class HeuristicBetweennessCentrality():
16
    def __init__(self, graph):
17

  
18
        if not nx.is_connected(graph):
19
            print "Graph is not connected"
20
            sys.exit()
21
        else:
22
            print "Graph is connected"
23

  
24
        subgraphs, bicomponents, cutpoints, num_vertices, link_weight, traffic_matrix):
25

  
26

  
27
        self.subgraphs = list(nx.biconnected_component_subgraphs(graph))
28
        self.bicomponents = list(nx.biconnected_components(graph))
29
        self.cutpoints = set(nx.articulation_points(graph))
30
        self.num_vertices = nx.number_of_nodes(graph)
31
        self.link_weight = LinkWeight(graph, bicomponents, cutpoints)
32
        self.traffic_matrix = LinkWeight(graph, bicomponents, cutpoints)RR
33

  
34
        self.bc_components = list()
35
        self.calculate_bc_non_cutpoint()
36
        self.calculate_bc_cutpoint()
37

  
38
        self.bc = dict()
39
        self.finalize()
40

  
41
    def calculate_bc_non_cutpoint(self):
42
        """BC for non cutpoint
43
        """
44
        for i, subgraph in enumerate(self.subgraphs):
45
            traffic_matrix = self.traffic_matrix[i]
46
            cut_without = self.num_vertices - nx.number_of_nodes(subgraph)
47
            results = centrality.weight_betweenness_centrality(subgraph, traffic_matrix, cut_without)
48
            self.bc_components.append(results)
49

  
50
    def calculate_bc_cutpoint(self):
51
        self.bc_cutpoints = dict()
52

  
53
        bc_inter = dict()
54
        for v in self.cutpoints:
55
            inter = 0
56
            for i, comp in enumerate(self.bicomponents):
57
                if v in comp:
58
                    inter += self.link_weight.get_link_weight(i, v
59
                        ) * self.link_weight.get_reverse_link_weight(i, v)
60
            bc_inter[v] = inter
61

  
62
        print 'XXXX bc_components = %s' % self.bc_components
63
        print 'XXXX inter = %s' % bc_inter
64

  
65
        for v in self.cutpoints:
66
            bc_locally = 0
67
            for i, comp in enumerate(self.bicomponents):
68
                if v in comp:
69
                    bc_locally += self.bc_components[i][v]
70
            self.bc_cutpoints[v] = bc_locally - bc_inter[v]
71
            # TODO: do not minus the bc_inter
72
            # self.bc_cutpoints[v] = bc_locally
73

  
74
    def finalize(self):
75
        # Add the bc for non cutpoint vertices
76
        for bc_component in self.bc_components:
77
            for key, value in bc_component.iteritems():
78
                if key not in self.bc:
79
                    self.bc[key] = value
80

  
81
        # Add the bc for cutpoint vertices
82
        for key, value in self.bc_cutpoints.iteritems():
83
            self.bc[key] = value
84

  
85
        # Rescale the bc according to the original graph
86
        factor = 1.0 / ((self.num_vertices - 1) * (self.num_vertices - 2))
87
        # TODO: check the scaling factor, how much should it be?
88
        # factor = 2.0 / (self.num_vertices*self.num_vertices - 3 * self.num_vertices + 2)
89
        for key, value in self.bc.iteritems():
90
            self.bc[key] = value * factor
91

  
92
    def write(self, file_suffix=''):
93
        filepath = '/output/heuristic_%s.csv'  % file_suffix
94
        with open(MAIN_CODE_DIR + filepath, 'w') as output:
95
            for node, centrality in self.bc.iteritems():
96
                output.write('%s, %s\n' % (node, centrality))
97

  
98
    def __str__(self):
99
        return str(self.bc)
100

  
101

  
102
class TrafficMatrix():
103
    def __init__(self, bicomponents, cutpoints, link_weight):
104
        self.bicomponents = bicomponents
105
        self.cutpoints = cutpoints
106
        self.num_components = len(bicomponents)
107
        self.link_weight = link_weight
108

  
109
        self.h = list()
110
        self.generate_empty_traffic_matrix()
111
        self.generate_traffic_matrix()
112

  
113
    def generate_empty_traffic_matrix(self):
114
        for i in range(self.num_components):
115
            l = len(self.bicomponents[i])
116
            matrix = [[1 for x in range(l)] for y in range(l)]
117
            # update the main diagonal
118
            for x in range(l):
119
                matrix[x][x] = 0
120

  
121
            self.h.append(matrix)
122

  
123
    def generate_traffic_matrix(self):
124
        # Update the value when one vertex is a cut-point, another vertex is not a cut-point
125
        for i, components in enumerate(self.bicomponents):
126
            normal_points = components.difference(self.cutpoints)
127
            cutpoints = self.cutpoints.intersection(components)
128

  
129
            for cutpoint in cutpoints:
130
                for normal_point in normal_points:
131
                    communication_intensity = self.link_weight.get_reverse_link_weight(i, cutpoint) + 1
132
                    self.update(i, cutpoint, normal_point, communication_intensity)
133

  
134
        # Update the value when both vertices are cut-points
135
        for i, components in enumerate(self.bicomponents):
136
            cutpoints = list(self.cutpoints.intersection(components))
137
            len_cutpoints = len(cutpoints)
138
            if len_cutpoints > 1:
139
                for k in range(len_cutpoints - 1):
140
                    for l in range(1, len_cutpoints):
141
                        if k == l:
142
                            continue
143
                        communication_intensity = (
144
                            self.link_weight.get_reverse_link_weight(i, cutpoints[k]) + 1) * (
145
                            self.link_weight.get_reverse_link_weight(i, cutpoints[l]) + 1
146
                        )
147
                        self.update(i, cutpoints[k], cutpoints[l], communication_intensity)
148

  
149
    def simple_update(self, comp_index, x_pos, y_pos, value):
150
        self.h[comp_index][x_pos][y_pos] = value
151
        # to keep the symmetric property of Traffic Matrix
152
        self.h[comp_index][y_pos][x_pos] = value
153

  
154
    def update(self, comp_index, x, y, value):
155
        comp = sorted(self.bicomponents[comp_index])
156
        try:
157
            x_pos = list(comp).index(x)
158
            y_pos = list(comp).index(y)
159
        except:
160
            debugger()
161
            a = 2
162

  
163
        self.simple_update(comp_index, x_pos, y_pos, value)
164

  
165
    def __str__(self):
166
        return str(self.h)
167

  
168
    def __getitem__(self, key):
169
        return self.h[key]
170

  
171

  
172
class LinkWeight():
173
    def __init__(self, graph, bicomponents, cutpoints):
174
        self.num_vertices = nx.number_of_nodes(graph)
175
        self.bicomponents = bicomponents
176
        self.num_components = len(bicomponents)
177
        self.cutpoints = cutpoints
178

  
179
        self.Dv_B = [dict() for i in range(self.num_components)] # link weight
180
        self.compute_component_tree_weight()
181

  
182
        self.reverse_Dv_B = [dict() for i in range(self.num_components)] # reverse link weight
183
        self.generate_reverse_link_weight()
184

  
185
    def _components_sharing_cutpoint(self, B_cutpoints, point):
186
        indices = list()
187
        for i, cp in enumerate(B_cutpoints):
188
            if point in cp:
189
                indices.append(i)
190

  
191
        return indices
192

  
193
    def get_link_weight(self, comp_index, point):
194
        Dv_B_comp = self.Dv_B[comp_index]
195

  
196
        if point in Dv_B_comp:
197
            return Dv_B_comp[point]
198
        else:
199
            return 0
200

  
201
    def set_link_weight(self, comp_index, point, value):
202
        self.Dv_B[comp_index][point] = value
203

  
204
    def get_reverse_link_weight(self, comp_index, point):
205
        reverse_Dv_B_comp = self.reverse_Dv_B[comp_index]
206

  
207
        if point in reverse_Dv_B_comp:
208
            return reverse_Dv_B_comp[point]
209
        else:
210
            return 0
211

  
212
    def generate_reverse_link_weight(self):
213
        for i, Dv_B_i in enumerate(self.Dv_B):
214
            for key, value in Dv_B_i.iteritems():
215
                self.reverse_Dv_B[i][key] = self.num_vertices - 1 - self.get_link_weight(i, key)
216

  
217
    def compute_component_tree_weight(self):
218
        """Follows exactly the Algorithm 1 [Puzis 2012]
219
        """
220
        B_cutpoints = list() # number of cutpoints in component B
221
        for comp in self.bicomponents:
222
            points = comp.intersection(self.cutpoints)
223
            B_cutpoints.append(points)
224

  
225
        Q = self._inititalize_component_tree_weight()
226
        while Q:
227
            pair = Q.pop(0)
228
            if pair['type'] == 'component_vertex_pair':
229
                B_index = pair['value'][0]
230
                v = pair['value'][1]
231
                size = len(self.bicomponents[B_index]) - 1;
232
                all_cutpoints = self.bicomponents[B_index].intersection(self.cutpoints)
233
                all_cutpoints.remove(v)
234

  
235
                for cp in all_cutpoints:
236
                    if self.get_link_weight(B_index, cp) != -1:
237
                        size += self.num_vertices - self.get_link_weight(B_index, cp) - 1
238

  
239
                link_weight = size
240
                self._verify_link_weight(B_index, v, link_weight)
241
                self.set_link_weight(B_index, v, link_weight)
242

  
243
                # update Q
244
                Q = self._find_unknown_weight_wrt_cutpoint(v, Q)
245

  
246
            if pair['type'] == 'vertex_component_pair':
247
                size = 0
248
                B_index = pair['value'][0]
249
                v = pair['value'][1]
250
                shared_comp_indices = self._components_sharing_cutpoint(B_cutpoints, v)
251
                shared_comp_indices.remove(B_index)
252

  
253

  
254
                for i in shared_comp_indices:
255
                    if self.get_link_weight(i, v) != - 1:
256
                        size += self.get_link_weight(i, v)
257

  
258
                link_weight = self.num_vertices - 1 - size
259
                self._verify_link_weight(B_index, v, link_weight)
260
                self.set_link_weight(B_index, v, link_weight)
261

  
262
                # update Q
263
                Q = self._find_unknown_weight_wrt_component(B_index, Q)
264

  
265
    def _verify_link_weight(self, B_index, v, value):
266
        """ If the old_value exist in self.Dv_B, then it must be equal to new value
267

  
268
        Otherwise, do nothing
269
        """
270
        old_value = self.get_link_weight(B_index, v)
271

  
272
        if old_value != -1: # -1 is unknown
273
            if old_value != value:
274
                print "BUGS FOUND in _verify_link_weight()"
275
                sys.exit()
276

  
277

  
278
    def _inititalize_component_tree_weight(self):
279
        Q = []
280
        for i, comp in enumerate(self.bicomponents):
281
            current_cutpoints = self.cutpoints.intersection(comp)
282
            if len(current_cutpoints) == 1:
283
                Q.append({
284
                    'type': 'component_vertex_pair',
285
                    'value': (i, list(current_cutpoints)[0]) # (B_i, cutpoint) = (i-th component, the cutpoint name)
286
                    })
287
            for cp in current_cutpoints:
288
                self.set_link_weight(i, cp, -1)
289
        return Q
290

  
291
    def _find_unknown_weight_wrt_cutpoint(self, cutpoint, Q):
292
        num_of_uncomputed_weight = 0
293
        uncomputed_component_index = []
294

  
295
        for i, Dv_B_comp in enumerate(self.Dv_B):
296
            if cutpoint in Dv_B_comp:
297
                if Dv_B_comp[cutpoint] == -1:
298
                    num_of_uncomputed_weight += 1
299
                    uncomputed_component_index.append(i)
300
                if num_of_uncomputed_weight > 1:
301
                    break
302

  
303
        if num_of_uncomputed_weight == 1:
304
            pair = {
305
                'type': 'vertex_component_pair',
306
                'value': (uncomputed_component_index.pop(), cutpoint)
307
            }
308
            Q.append(pair)
309
        return Q
310

  
311
    def _find_unknown_weight_wrt_component(self, comp_index, Q):
312
        # Component B such that weights of all but one of its links in T are already computed.
313
        Dv_B_comp = self.Dv_B[comp_index]
314
        values = Dv_B_comp.values()
315

  
316
        # Check if -1 value appear only 1 time
317
        flag = False
318
        minus_one_value = [x for x in values if x == -1]
319
        if len(minus_one_value) == 1:
320
            for cp, value in Dv_B_comp.iteritems():
321
                if value == -1:
322
                    pair = {
323
                        'type': 'component_vertex_pair',
324
                        'value': (comp_index, cp)
325
                    }
326
                    Q.append(pair)
327
        return Q
328

  
329
    def __str__(self):
330
        return str(self.Dv_B)
331

  
globecomm/heuristic_bc/heuristic_betweenness_centrality.py
1
# Implement the section IV in Puzis 2012 paper
2
# straight_lineent the section IV in Puzis 2012 paper
3
# Heuristic Betweenness Centrality - Partitioning to Bi-connected Components
4

  
5
import os
6
import sys
7
import pprint
8
import networkx as nx
9
import betweenness_centrality as centrality
10
import utility
11
from pdb import set_trace as debugger
12

  
13
MAIN_CODE_DIR = os.environ.get('MAIN_CODE_DIR', '')
14

  
15
class HeuristicBetweennessCentrality():
16
    def __init__(self, graph):
17

  
18
        if not nx.is_connected(graph):
19
            print "Graph is not connected"
20
            sys.exit()
21

  
22
        self.subgraphs = list(nx.biconnected_component_subgraphs(graph))
23
        self.bicomponents = list(nx.biconnected_components(graph))
24
        self.cutpoints = set(nx.articulation_points(graph))
25
        self.num_vertices = nx.number_of_nodes(graph)
26
        self.link_weight = LinkWeight(graph, self.bicomponents, self.cutpoints)
27
        self.traffic_matrix = TrafficMatrix(self.bicomponents, self.cutpoints, self.link_weight)
28

  
29
        self.bc_components = list()
30
        self.calculate_bc_non_cutpoint()
31
        self.calculate_bc_cutpoint()
32

  
33
        self.bc = dict()
34
        self.finalize()
35

  
36
    def calculate_bc_non_cutpoint(self):
37
        """BC for non cutpoint
38
        """
39
        for i, subgraph in enumerate(self.subgraphs):
40
            traffic_matrix = self.traffic_matrix[i]
41
            cut_without = self.num_vertices - nx.number_of_nodes(subgraph)
42
            results = centrality.weight_betweenness_centrality(subgraph, traffic_matrix, cut_without)
43
            self.bc_components.append(results)
44

  
45
    def calculate_bc_cutpoint(self):
46
        self.bc_cutpoints = dict()
47

  
48
        bc_inter = dict()
49
        for v in self.cutpoints:
50
            inter = 0
51
            for i, comp in enumerate(self.bicomponents):
52
                if v in comp:
53
                    inter += self.link_weight.get_link_weight(i, v
54
                        ) * self.link_weight.get_reverse_link_weight(i, v)
55
            bc_inter[v] = inter
56

  
57
        for v in self.cutpoints:
58
            bc_locally = 0
59
            for i, comp in enumerate(self.bicomponents):
60
                if v in comp:
61
                    bc_locally += self.bc_components[i][v]
62
            self.bc_cutpoints[v] = bc_locally - bc_inter[v]
63
            # TODO: do not minus the bc_inter
64
            # self.bc_cutpoints[v] = bc_locally
65

  
66
    def finalize(self):
67
        # Add the bc for non cutpoint vertices
68
        for bc_component in self.bc_components:
69
            for key, value in bc_component.iteritems():
70
                if key not in self.bc:
71
                    self.bc[key] = value
72

  
73
        # Add the bc for cutpoint vertices
74
        for key, value in self.bc_cutpoints.iteritems():
75
            self.bc[key] = value
76

  
77
        # Rescale the bc according to the original graph
78
        factor = 1.0 / ((self.num_vertices - 1) * (self.num_vertices - 2))
79
        # TODO: check the scaling factor, how much should it be?
80
        # factor = 2.0 / (self.num_vertices*self.num_vertices - 3 * self.num_vertices + 2)
81
        for key, value in self.bc.iteritems():
82
            self.bc[key] = value * factor
83

  
84
    def __str__(self):
85
        return str(self.bc)
86

  
87
    def write(self, filepath):
88
        print "HBC score is written to %s" % filepath
89
        with open(filepath, 'w') as output:
90
            for node, centrality in self.bc.iteritems():
91
                output.write('%s, %s\n' % (node, centrality))
92

  
93
class TrafficMatrix():
94
    def __init__(self, bicomponents, cutpoints, link_weight):
95
        self.bicomponents = bicomponents
96
        self.cutpoints = cutpoints
97
        self.num_components = len(bicomponents)
98
        self.link_weight = link_weight
99

  
100
        self.h = list()
101
        self.generate_empty_traffic_matrix()
102
        self.generate_traffic_matrix()
103

  
104
    def generate_empty_traffic_matrix(self):
105
        for i in range(self.num_components):
106
            l = len(self.bicomponents[i])
107
            matrix = [[1 for x in range(l)] for y in range(l)]
108
            # update the main diagonal
109
            for x in range(l):
110
                matrix[x][x] = 0
111

  
112
            self.h.append(matrix)
113

  
114
    def generate_traffic_matrix(self):
115
        # Update the value when one vertex is a cut-point, another vertex is not a cut-point
116
        for i, components in enumerate(self.bicomponents):
117
            normal_points = components.difference(self.cutpoints)
118
            cutpoints = self.cutpoints.intersection(components)
119

  
120
            for cutpoint in cutpoints:
121
                for normal_point in normal_points:
122
                    communication_intensity = self.link_weight.get_reverse_link_weight(i, cutpoint) + 1
123
                    self.update(i, cutpoint, normal_point, communication_intensity)
124

  
125
        # Update the value when both vertices are cut-points
126
        for i, components in enumerate(self.bicomponents):
127
            cutpoints = list(self.cutpoints.intersection(components))
128
            len_cutpoints = len(cutpoints)
129
            if len_cutpoints > 1:
130
                for k in range(len_cutpoints - 1):
131
                    for l in range(1, len_cutpoints):
132
                        if k == l:
133
                            continue
134
                        communication_intensity = (
135
                            self.link_weight.get_reverse_link_weight(i, cutpoints[k]) + 1) * (
136
                            self.link_weight.get_reverse_link_weight(i, cutpoints[l]) + 1
137
                        )
138
                        self.update(i, cutpoints[k], cutpoints[l], communication_intensity)
139

  
140
    def simple_update(self, comp_index, x_pos, y_pos, value):
141
        self.h[comp_index][x_pos][y_pos] = value
142
        # to keep the symmetric property of Traffic Matrix
143
        self.h[comp_index][y_pos][x_pos] = value
144

  
145
    def update(self, comp_index, x, y, value):
146
        comp = sorted(self.bicomponents[comp_index])
147
        try:
148
            x_pos = list(comp).index(x)
149
            y_pos = list(comp).index(y)
150
        except:
151
            debugger()
152
            a = 2
153

  
154
        self.simple_update(comp_index, x_pos, y_pos, value)
155

  
156
    def __str__(self):
157
        return str(self.h)
158

  
159
    def __getitem__(self, key):
160
        return self.h[key]
161

  
162

  
163
class LinkWeight():
164
    def __init__(self, graph, bicomponents, cutpoints):
165
        self.num_vertices = nx.number_of_nodes(graph)
166
        self.bicomponents = bicomponents
167
        self.num_components = len(bicomponents)
168
        self.cutpoints = cutpoints
169

  
170
        self.Dv_B = [dict() for i in range(self.num_components)] # link weight
171
        self.compute_component_tree_weight()
172

  
173
        self.reverse_Dv_B = [dict() for i in range(self.num_components)] # reverse link weight
174
        self.generate_reverse_link_weight()
175

  
176
    def _components_sharing_cutpoint(self, B_cutpoints, point):
177
        indices = list()
178
        for i, cp in enumerate(B_cutpoints):
179
            if point in cp:
180
                indices.append(i)
181

  
182
        return indices
183

  
184
    def get_link_weight(self, comp_index, point):
185
        Dv_B_comp = self.Dv_B[comp_index]
186

  
187
        if point in Dv_B_comp:
188
            return Dv_B_comp[point]
189
        else:
190
            return 0
191

  
192
    def set_link_weight(self, comp_index, point, value):
193
        self.Dv_B[comp_index][point] = value
194

  
195
    def get_reverse_link_weight(self, comp_index, point):
196
        reverse_Dv_B_comp = self.reverse_Dv_B[comp_index]
197

  
198
        if point in reverse_Dv_B_comp:
199
            return reverse_Dv_B_comp[point]
200
        else:
201
            return 0
202

  
203
    def generate_reverse_link_weight(self):
204
        for i, Dv_B_i in enumerate(self.Dv_B):
205
            for key, value in Dv_B_i.iteritems():
206
                self.reverse_Dv_B[i][key] = self.num_vertices - 1 - self.get_link_weight(i, key)
207

  
208
    def compute_component_tree_weight(self):
209
        """Follows exactly the Algorithm 1 [Puzis 2012]
210
        """
211
        B_cutpoints = list() # number of cutpoints in component B
212
        for comp in self.bicomponents:
213
            points = comp.intersection(self.cutpoints)
214
            B_cutpoints.append(points)
215

  
216
        Q = self._inititalize_component_tree_weight()
217
        while Q:
218
            pair = Q.pop(0)
219
            if pair['type'] == 'component_vertex_pair':
220
                B_index = pair['value'][0]
221
                v = pair['value'][1]
222
                size = len(self.bicomponents[B_index]) - 1;
223
                all_cutpoints = self.bicomponents[B_index].intersection(self.cutpoints)
224
                all_cutpoints.remove(v)
225

  
226
                for cp in all_cutpoints:
227
                    if self.get_link_weight(B_index, cp) != -1:
228
                        size += self.num_vertices - self.get_link_weight(B_index, cp) - 1
229

  
230
                link_weight = size
231
                self._verify_link_weight(B_index, v, link_weight)
232
                self.set_link_weight(B_index, v, link_weight)
233

  
234
                # update Q
235
                Q = self._find_unknown_weight_wrt_cutpoint(v, Q)
236

  
237
            if pair['type'] == 'vertex_component_pair':
238
                size = 0
239
                B_index = pair['value'][0]
240
                v = pair['value'][1]
241
                shared_comp_indices = self._components_sharing_cutpoint(B_cutpoints, v)
242
                shared_comp_indices.remove(B_index)
243

  
244

  
245
                for i in shared_comp_indices:
246
                    if self.get_link_weight(i, v) != - 1:
247
                        size += self.get_link_weight(i, v)
248

  
249
                link_weight = self.num_vertices - 1 - size
250
                self._verify_link_weight(B_index, v, link_weight)
251
                self.set_link_weight(B_index, v, link_weight)
252

  
253
                # update Q
254
                Q = self._find_unknown_weight_wrt_component(B_index, Q)
255

  
256
    def _verify_link_weight(self, B_index, v, value):
257
        """ If the old_value exist in self.Dv_B, then it must be equal to new value
258

  
259
        Otherwise, do nothing
260
        """
261
        old_value = self.get_link_weight(B_index, v)
262

  
263
        if old_value != -1: # -1 is unknown
264
            if old_value != value:
265
                print "BUGS FOUND in _verify_link_weight()"
266
                sys.exit()
267

  
268

  
269
    def _inititalize_component_tree_weight(self):
270
        Q = []
271
        for i, comp in enumerate(self.bicomponents):
272
            current_cutpoints = self.cutpoints.intersection(comp)
273
            if len(current_cutpoints) == 1:
274
                Q.append({
275
                    'type': 'component_vertex_pair',
276
                    'value': (i, list(current_cutpoints)[0]) # (B_i, cutpoint) = (i-th component, the cutpoint name)
277
                    })
278
            for cp in current_cutpoints:
279
                self.set_link_weight(i, cp, -1)
280
        return Q
281

  
282
    def _find_unknown_weight_wrt_cutpoint(self, cutpoint, Q):
283
        # Cut-point v such that the weights of all but one of its links in T are already computed.
284
        num_of_uncomputed_weight = 0
285
        uncomputed_component_index = []
286

  
287
        for i, Dv_B_comp in enumerate(self.Dv_B):
288
            if cutpoint in Dv_B_comp:
289
                if Dv_B_comp[cutpoint] == -1:
290
                    num_of_uncomputed_weight += 1
291
                    uncomputed_component_index.append(i)
292
                if num_of_uncomputed_weight > 1:
293
                    break
294

  
295
        if num_of_uncomputed_weight == 1:
296
            pair = {
297
                'type': 'vertex_component_pair',
298
                'value': (uncomputed_component_index.pop(), cutpoint)
299
            }
300
            Q.append(pair)
301
        return Q
302

  
303
    def _find_unknown_weight_wrt_component(self, comp_index, Q):
304
        # Component B such that weights of all but one of its links in T are already computed.
305
        Dv_B_comp = self.Dv_B[comp_index]
306
        values = Dv_B_comp.values()
307

  
308
        # Check if -1 value appear only 1 time
309
        flag = False
310
        minus_one_value = [x for x in values if x == -1]
311
        if len(minus_one_value) == 1:
312
            for cp, value in Dv_B_comp.iteritems():
313
                if value == -1:
314
                    pair = {
315
                        'type': 'component_vertex_pair',
316
                        'value': (comp_index, cp)
317
                    }
318
                    Q.append(pair)
319
        return Q
320

  
321
    def __str__(self):
322
        return str(self.Dv_B)
323

  
globecomm/heuristic_bc/utility.py
1
import json
2
import os
3
import networkx as nx
4
import matplotlib.pyplot as plt
5
import betweenness_centrality as centrality
6

  
7
MAIN_CODE_DIR = os.environ.get('MAIN_CODE_DIR', '')
8

  
9
def initialize():
10
    """
11
    Create all necessary folder
12
    """
13
    output_dir = os.path.join(MAIN_CODE_DIR, 'output')
14
    if not os.path.isdir(output_dir):
15
        os.makedirs(output_dir)
16

  
17
    visualization_dir = os.path.join(output_dir, 'visualization')
18
    if not os.path.isdir(visualization_dir):
19
        os.makedirs(visualization_dir)
20

  
21
def weight_betweenness_centrality(graph, file_suffix):
22
    score = centrality.weight_betweenness_centrality(graph, rescale=True)
23
    filepath = '/output/weight_basic_%s.csv'  % file_suffix
24

  
25
    with open(MAIN_CODE_DIR + filepath, 'w') as output:
26
        for node, bc in score.iteritems():
27
            output.write('%s, %s\n' % (node, bc))
28

  
29
def brandes_betweenness_centrality(graph, file_suffix):
30
    score = nx.betweenness_centrality(graph, endpoints=False, normalized=False)
31
    # score = nx.betweenness_centrality(graph, endpoints=True)
32
    filepath = '/output/networkx_%s.csv'  % file_suffix
33

  
34
    with open(MAIN_CODE_DIR + filepath, 'w') as output:
35
        for node, bc in score.iteritems():
36
            output.write('%s, %s\n' % (node, bc))
37

  
38
def brandes_betweenness_centrality_endpoints(graph, file_suffix):
39
    score = nx.betweenness_centrality(graph, endpoints=True, normalized=True)
40
    filepath = '/output/networkx_%s.csv'  % file_suffix
41

  
42
    with open(MAIN_CODE_DIR + filepath, 'w') as output:
43
        for node, bc in score.iteritems():
44
            output.write('%s, %s\n' % (node, bc))
45

  
46
def read_complex_json(filepath):
47
    """Returns a graph object
48
    """
49
    edges = []
50
    with open(filepath) as f:
51
        doc = json.loads(f.read())
52
        topologies = doc["topology"]
53
        for connection in topologies:
54
            source = connection['lastHopIP']
55
            target = connection['destinationIP']
56
            cost = connection['neighborLinkQuality']
57
            edges.append((source, target, {'weight': cost}))
58

  
59
    graph = nx.Graph()
60
    print "Edges = %i\n" % len(edges)
61
    graph.add_edges_from(edges)
62
    return graph
63

  
64

  
65
def plot_graph(output_filepath, graph, title=""):
66
    plt.figure(1, figsize=(15, 10))
67
    plt.title(title)
68
    pos=nx.spring_layout(graph)
69
    colors=range(30)
70
    nx.draw(graph, pos, node_size=1000, with_label=True)
71
    nx.draw_networkx_labels(graph,pos,font_size=20,font_family='sans-serif')
72
    plt.savefig(output_filepath)
73
    plt.close()
74
    # plt.show()
75

  
globecomm/main.py
1
from os import listdir
2
from os.path import isfile, join, splitext, basename
3
import sys
4
import networkx as nx
5

  
6
sys.path.append("heuristic_bc/")
7
from heuristic_betweenness_centrality import HeuristicBetweennessCentrality as HBC
8

  
9
from pdb import set_trace as debugger
10

  
11

  
12
GRAPH_DIRS='/home/quynh/Thesis/quynhnguyen-ms/experiment_input_graphs/CNGraphs/snapshots'
13
def get_all_files_from_dir(dir):
14
    return [join(dir, f) for f in listdir(dir) if isfile(join(dir, f))]
15

  
16

  
17
if __name__ == '__main__':
18
    onlyfiles = get_all_files_from_dir(GRAPH_DIRS)
19

  
20
    for in_filepath in onlyfiles:
21
        filename = splitext(basename(in_filepath))[0]
22
        graph = nx.read_weighted_edgelist(in_filepath)
23
        hbc = HBC(graph)
24
        out_filepath = join('output/', filename + '.hbcout')
25
        hbc.write(out_filepath)
26

  
27

  
28

  
globecomm/script.sh
1
python main.py
2
python analyse_snapshot.py

Also available in: Unified diff