Statistics
| Branch: | Revision:

root / fiddle / heuristic-betweenness-centrality / centrality_biconnected.py @ 739fe075

History | View | Annotate | Download (15.9 KB)

1
# Implement the section IV in Puzis 2012 paper
2
# Heuristic Betweenness Centrality - Partitioning to Bi-connected Components
3

    
4
import os
5
import sys
6
import pprint
7
import networkx as nx
8
import betweenness_centrality as centrality
9
import utility
10
from pdb import set_trace as debugger
11

    
12
MAIN_CODE_DIR = os.environ.get('MAIN_CODE_DIR', '')
13

    
14

    
15
class HeuristicBetweennessCentrality():
16
    def __init__(self, subgraphs, bicomponents, cutpoints, num_vertices, link_weight, traffic_matrix):
17
        self.subgraphs = subgraphs
18
        self.bicomponents = bicomponents
19
        self.cutpoints = cutpoints
20
        self.num_vertices = num_vertices
21
        self.link_weight = link_weight
22
        self.traffic_matrix = traffic_matrix
23

    
24
        self.bc_components = list()
25
        self.calculate_bc_non_cutpoint()
26
        self.calculate_bc_cutpoint()
27

    
28
        self.bc = dict()
29
        self.finalize()
30

    
31
    def calculate_bc_non_cutpoint(self):
32
        """BC for non cutpoint
33
        """
34
        for i, subgraphs in enumerate(self.subgraphs):
35
            traffic_matrix = self.traffic_matrix[i]
36
            results = centrality.weight_betweenness_centrality(subgraphs, traffic_matrix)
37
            self.bc_components.append(results)
38

    
39
            # print results
40

    
41
    def calculate_bc_cutpoint(self):
42
        self.bc_cutpoints = dict()
43

    
44
        bc_inter = dict()
45
        for v in self.cutpoints:
46
            inter = 0
47
            for i, comp in enumerate(self.bicomponents):
48
                if v in comp:
49
                    inter += self.link_weight.get_link_weight(i, v
50
                        ) * self.link_weight.get_reverse_link_weight(i, v)
51
            bc_inter[v] = inter
52

    
53
        print 'XXXX bc_components = %s' % self.bc_components
54
        print 'XXXX inter = %s' % bc_inter
55

    
56
        for v in self.cutpoints:
57
            bc_locally = 0
58
            for i, comp in enumerate(self.bicomponents):
59
                if v in comp:
60
                    # debugger()
61
                    bc_locally += self.bc_components[i][v]
62
            print 'locally = %s' % bc_locally
63
            # self.bc_cutpoints[v] = bc_locally - bc_inter[v]
64
            # TODO: do not minus the bc_inter
65
            self.bc_cutpoints[v] = bc_locally
66

    
67
    def finalize(self):
68
        # Add the bc for non cutpoint vertices
69
        for bc_component in self.bc_components:
70
            for key, value in bc_component.iteritems():
71
                if key not in self.bc:
72
                    self.bc[key] = value
73

    
74
        # Add the bc for cutpoint vertices
75
        for key, value in self.bc_cutpoints.iteritems():
76
            self.bc[key] = value
77

    
78
        print '*** betweenness = %s' % self.bc
79
        # Rescale the bc according to the original graph
80
        factor = 1.0 / ((self.num_vertices - 1) * (self.num_vertices - 2))
81
        # factor = 2.0 / (self.num_vertices*self.num_vertices - 3 * self.num_vertices + 2)
82
        for key, value in self.bc.iteritems():
83
            self.bc[key] = value * factor
84

    
85
    def write(self, file_suffix=''):
86
        filepath = '/output/heuristic_%s.csv'  % file_suffix
87
        with open(MAIN_CODE_DIR + filepath, 'w') as output:
88
            for node, centrality in self.bc.iteritems():
89
                output.write('%s, %s\n' % (node, centrality))
90

    
91
    def __str__(self):
92
        return str(self.bc)
93

    
94

    
95
class TrafficMatrix():
96
    def __init__(self, bicomponents, cutpoints, link_weight):
97
        self.bicomponents = bicomponents
98
        self.cutpoints = cutpoints
99
        self.num_components = len(bicomponents)
100
        self.link_weight = link_weight
101

    
102
        self.h = list()
103
        self.generate_empty_traffic_matrix()
104
        self.generate_traffic_matrix()
105

    
106
    def generate_empty_traffic_matrix(self):
107
        for i in range(self.num_components):
108
            l = len(self.bicomponents[i])
109
            matrix = [[1 for x in range(l)] for y in range(l)]
110
            # update the main diagonal
111
            for x in range(l):
112
                matrix[x][x] = 0
113

    
114
            self.h.append(matrix)
115

    
116
    def generate_traffic_matrix(self):
117
        # Update the value when one vertex is a cut-point, another vertex is not a cut-point
118
        for i, components in enumerate(self.bicomponents):
119
            normal_points = components.difference(self.cutpoints)
120
            cutpoints = self.cutpoints.intersection(components)
121

    
122
            for cutpoint in cutpoints:
123
                for normal_point in normal_points:
124
                    communication_intensity = self.link_weight.get_reverse_link_weight(i, cutpoint) + 1
125
                    self.update(i, cutpoint, normal_point, communication_intensity)
126

    
127
        # Update the value when both vertices are cut-points
128
        for i, components in enumerate(self.bicomponents):
129
            cutpoints = list(self.cutpoints.intersection(components))
130
            len_cutpoints = len(cutpoints)
131
            if len_cutpoints > 1:
132
                for k in range(len_cutpoints - 1):
133
                    for l in range(1, len_cutpoints):
134
                        if k == l:
135
                            continue
136
                        communication_intensity = (
137
                            self.link_weight.get_reverse_link_weight(i, cutpoints[k]) + 1) * (
138
                            self.link_weight.get_reverse_link_weight(i, cutpoints[l]) + 1
139
                        )
140
                        self.update(i, cutpoints[k], cutpoints[l], communication_intensity)
141

    
142
    def simple_update(self, comp_index, x_pos, y_pos, value):
143
        self.h[comp_index][x_pos][y_pos] = value
144
        # to keep the symmetric property of Traffic Matrix
145
        self.h[comp_index][y_pos][x_pos] = value
146

    
147
    def update(self, comp_index, x, y, value):
148
        comp = self.bicomponents[comp_index]
149
        try:
150
            x_pos = list(comp).index(x)
151
            y_pos = list(comp).index(y)
152
        except:
153
            debugger()
154
            a = 2
155

    
156
        self.simple_update(comp_index, x_pos, y_pos, value)
157

    
158
    def __str__(self):
159
        return str(self.h)
160

    
161
    def __getitem__(self, key):
162
        return self.h[key]
163

    
164

    
165
class LinkWeight():
166
    def __init__(self, graph, bicomponents, cutpoints):
167
        self.num_vertices = nx.number_of_nodes(graph)
168
        self.bicomponents = bicomponents
169
        self.num_components = len(bicomponents)
170
        self.cutpoints = cutpoints
171

    
172
        self.Dv_B = [dict() for i in range(self.num_components)] # link weight
173
        self.compute_component_tree_weight()
174

    
175
        self.reverse_Dv_B = [dict() for i in range(self.num_components)] # reverse link weight
176
        self.generate_reverse_link_weight()
177

    
178
    def _components_sharing_cutpoint(self, B_cutpoints, point):
179
        indices = list()
180
        for i, cp in enumerate(B_cutpoints):
181
            # debugger()
182
            if point in cp:
183
                indices.append(i)
184

    
185
        return indices
186

    
187
    def get_link_weight(self, comp_index, point):
188
        Dv_B_comp = self.Dv_B[comp_index]
189

    
190
        if point in Dv_B_comp:
191
            return Dv_B_comp[point]
192
        else:
193
            return 0
194

    
195
    def set_link_weight(self, comp_index, point, value):
196
        self.Dv_B[comp_index][point] = value
197

    
198
    def get_reverse_link_weight(self, comp_index, point):
199
        reverse_Dv_B_comp = self.reverse_Dv_B[comp_index]
200

    
201
        if point in reverse_Dv_B_comp:
202
            return reverse_Dv_B_comp[point]
203
        else:
204
            return 0
205

    
206
    def generate_link_weight(self):
207
        # How many cutpoints does this component have
208
        B_plus_v = list()
209
        B_cutpoints = list() # number of cutpoints in component B
210
        for comp in self.bicomponents:
211
            points = comp.intersection(self.cutpoints)
212
            B_plus_v.append(comp.difference(points))
213
            B_cutpoints.append(points)
214

    
215
        len_B_plus_v = [len(x) for x in B_plus_v]
216
        len_B_cutpoints = [len(x) for x in B_cutpoints]
217

    
218
        # Calculate the Dv_B
219

    
220
        # For the leaf in the block-cut tree
221
        level = 1
222
        for (i, cp) in enumerate(B_cutpoints):
223
            if len_B_cutpoints[i] == level:
224
                point = list(cp)[0] # there is only 1 element
225
                self.Dv_B[i][point] = len_B_plus_v[i]
226

    
227
        print "Link Weight - For all the leaves: %s" % self.Dv_B
228

    
229
        # For other nodes in the block-cut tree
230
        level += 1
231
        while level <= max(len_B_cutpoints):
232
            if level == 3:
233
                debugger()
234
            for (i, cp) in enumerate(B_cutpoints):
235
                if len_B_cutpoints[i] == level:
236

    
237
                    for point in cp:
238
                        # 1st way
239
                        shared_comp_indices = self._components_sharing_cutpoint(B_cutpoints, point)
240
                        shared_comp_indices.remove(i)
241
                        weight_shared_comp = list()
242

    
243
                        for index in shared_comp_indices:
244
                            weight_shared_comp.append(self.get_link_weight(index, point))
245

    
246
                        weight = self.num_vertices - 1 - sum(weight_shared_comp)
247

    
248
                        self.Dv_B[i][point] = weight
249

    
250
            print "Link Weight - For level %s: %s" % (level, self.Dv_B)
251

    
252
            level += 1
253

    
254
    def generate_reverse_link_weight(self):
255
        for i, Dv_B_i in enumerate(self.Dv_B):
256
            for key, value in Dv_B_i.iteritems():
257
                self.reverse_Dv_B[i][key] = self.num_vertices - 1 - self.get_link_weight(i, key)
258

    
259
    def compute_component_tree_weight(self):
260
        """Follows exactly the Algorithm 1 [Puzis 2012]
261
        """
262
        B_cutpoints = list() # number of cutpoints in component B
263
        for comp in self.bicomponents:
264
            points = comp.intersection(self.cutpoints)
265
            B_cutpoints.append(points)
266

    
267
        Q = self._inititalize_component_tree_weight()
268
        while Q:
269
            pair = Q.pop(0)
270
            if pair['type'] == 'component_vertex_pair':
271
                B_index = pair['value'][0]
272
                v = pair['value'][1]
273
                size = len(self.bicomponents[B_index]) - 1;
274
                all_cutpoints = self.bicomponents[B_index].intersection(self.cutpoints)
275
                all_cutpoints.remove(v)
276

    
277
                for cp in all_cutpoints:
278
                    if self.get_link_weight(B_index, cp) != -1:
279
                        size += self.num_vertices - self.get_link_weight(B_index, cp) - 1
280
                        # size += self.num_vertices - self.get_link_weight(B_index, cp)
281

    
282
                link_weight = size
283
                self._verify_link_weight(B_index, v, link_weight)
284
                self.set_link_weight(B_index, v, link_weight)
285

    
286
                # update Q
287
                Q = self._find_unknown_weight_wrt_cutpoint(v, Q)
288

    
289
            if pair['type'] == 'vertex_component_pair':
290
                size = 0
291
                # size = 1
292
                B_index = pair['value'][0]
293
                v = pair['value'][1]
294
                shared_comp_indices = self._components_sharing_cutpoint(B_cutpoints, v)
295
                shared_comp_indices.remove(B_index)
296

    
297

    
298
                for i in shared_comp_indices:
299
                    if self.get_link_weight(i, v) != - 1:
300
                        size += self.get_link_weight(i, v)
301

    
302
                link_weight = self.num_vertices - 1 - size
303
                self._verify_link_weight(B_index, v, link_weight)
304
                self.set_link_weight(B_index, v, link_weight)
305

    
306
                # update Q
307
                Q = self._find_unknown_weight_wrt_component(B_index, Q)
308

    
309
    def _verify_link_weight(self, B_index, v, value):
310
        """ If the old_value exist in self.Dv_B, then it must be equal to new value
311

312
        Otherwise, do nothing
313
        """
314
        print "VERIFICATION FOR (%s, %s)" % (B_index, v)
315
        old_value = self.get_link_weight(B_index, v)
316
        # -1 is unknown
317
        if old_value != -1:
318
            if old_value != value:
319
                print "BUGS FOUND in _verify_link_weight()"
320
                sys.exit()
321

    
322

    
323
    def _inititalize_component_tree_weight(self):
324
        Q = []
325
        for i, comp in enumerate(self.bicomponents):
326
            current_cutpoints = self.cutpoints.intersection(comp)
327
            if len(current_cutpoints) == 1:
328
                Q.append({
329
                    'type': 'component_vertex_pair',
330
                    'value': (i, list(current_cutpoints)[0]) # (B_i, cutpoint) = (i-th component, the cutpoint name)
331
                    })
332
            for cp in current_cutpoints:
333
                self.set_link_weight(i, cp, -1)
334
        return Q
335

    
336
    def _find_unknown_weight_wrt_cutpoint(self, cutpoint, Q):
337
        # Cut-point v such that the weights of all but one of its links in T are already computed.
338
        num_of_uncomputed_weight = 0
339
        uncomputed_component_index = []
340

    
341
        for i, Dv_B_comp in enumerate(self.Dv_B):
342
            if cutpoint in Dv_B_comp:
343
                if Dv_B_comp[cutpoint] == -1:
344
                    num_of_uncomputed_weight += 1
345
                    uncomputed_component_index.append(i)
346
        if num_of_uncomputed_weight == 1:
347
            pair = {
348
                'type': 'vertex_component_pair',
349
                'value': (uncomputed_component_index.pop(), cutpoint)
350
            }
351
            Q.append(pair)
352
        return Q
353

    
354
    def _find_unknown_weight_wrt_component(self, comp_index, Q):
355
        # Component B such that weights of all but one of its links in T are already computed.
356
        Dv_B_comp = self.Dv_B[comp_index]
357
        values = Dv_B_comp.values()
358

    
359
        # Check if -1 value appear only 1 time
360
        flag = False
361
        minus_one_value = [x for x in values if x == -1]
362
        if len(minus_one_value) == 1:
363
            for cp, value in Dv_B_comp.iteritems():
364
                if value == -1:
365
                    pair = {
366
                        'type': 'component_vertex_pair',
367
                        'value': (comp_index, cp)
368
                    }
369
                    Q.append(pair)
370
        return Q
371

    
372
    def __str__(self):
373
        return str(self.Dv_B)
374

    
375
def analyse_biconnected_component(graph):
376
    bicomponents = list(nx.biconnected_components(graph))
377
    print bicomponents
378
    print [len(x) for x in bicomponents]
379

    
380
def find_heuristic_bc(filepath):
381
    filename = os.splitext(os.path.basename(filepath))[0]
382

    
383
    pp = pprint.PrettyPrinter(indent=4)
384

    
385
    graph = nx.read_weighted_edgelist(filepath)
386

    
387
    if not nx.is_connected(graph):
388
        print "Graph is not connected"
389
        # sys.exit()
390
    else:
391
        print "Graph is connected"
392

    
393

    
394
    # Find biconnected components
395
    analyse_biconnected_component(graph)
396

    
397
    subgraphs = list(nx.biconnected_component_subgraphs(graph))
398
    bicomponents = list(nx.biconnected_components(graph))
399
    component_edges = list(nx.biconnected_component_edges(graph))
400
    cutpoints = set(nx.articulation_points(graph))
401

    
402
    num_vertices = nx.number_of_nodes(graph)
403

    
404
    lw = LinkWeight(graph, bicomponents, cutpoints)
405
    print 'link weight: '
406
    print(lw)
407

    
408
    tm = TrafficMatrix(bicomponents, cutpoints, lw)
409
    print 'traffic matrix:'
410
    pp.pprint(tm.h)
411

    
412
    bc = HeuristicBetweennessCentrality(subgraphs, bicomponents, cutpoints, num_vertices, lw, tm)
413
    bc.write(file_suffix)
414
    print bc
415

    
416
if __name__ == '__main__':
417
    utility.initialize()
418
    input_dir = os.path.join(MAIN_CODE_DIR, 'input')
419

    
420
    # filepath = MAIN_CODE_DIR + '/input/simple_house.edges'
421
    # filepath = MAIN_CODE_DIR + '/input/simple2.edges'
422
    # filepath = MAIN_CODE_DIR + '/input/simple.edges'
423
    # filepath = MAIN_CODE_DIR + '/input/ninux_simplified.edges'
424
    # filepath = MAIN_CODE_DIR + '/input/ninux_simplified_connected.edges'
425
    filepath = MAIN_CODE_DIR + '/input/ninux_30_1.edges'
426
    # filepath = MAIN_CODE_DIR + '/input/ninux_connected.edges'
427
    # filepath = MAIN_CODE_DIR + '/input/ninux_unweighted.edges'
428
    # filepath = MAIN_CODE_DIR + '/input/ninux_unweighted_simplified.edges'
429
    # filepath = MAIN_CODE_DIR + '/input/ninux_unweighted_simplified_connected.edges'
430
    # filepath = MAIN_CODE_DIR + '/input/simple3.edges'
431
    # filepath = MAIN_CODE_DIR + '/input/simple4.edges'
432
    # filepath = MAIN_CODE_DIR + '/input/simple5.edges'
433
    file_suffix = 'edge_list'
434

    
435
    # # Weight betweenness centrality
436
    # utility.weight_betweenness_centrality(filepath, file_suffix)
437

    
438
    # # Brandess betweenness centrality
439
    # utility.brandes_betweenness_centrality(filepath, file_suffix)
440

    
441
    # # Heuristic BC
442
    # find_heuristic_bc(filepath)
443