Statistics
| Branch: | Revision:

## root / globecomm / heuristic_bc / heuristic_betweenness_centrality.py @ bd3d6dca

 1 ```# Implement the section IV in Puzis 2012 paper ``` ```# straight_lineent the section IV in Puzis 2012 paper ``` ```# Heuristic Betweenness Centrality - Partitioning to Bi-connected Components ``` ```import os ``` ```import sys ``` ```import pprint ``` ```import networkx as nx ``` ```import betweenness_centrality as centrality ``` ```import utility ``` ```from pdb import set_trace as debugger ``` ```MAIN_CODE_DIR = os.environ.get('MAIN_CODE_DIR', '') ``` ```class HeuristicBetweennessCentrality(): ``` ``` def __init__(self, graph): ``` ``` if not nx.is_connected(graph): ``` ``` print "Graph is not connected" ``` ``` sys.exit() ``` ``` self.subgraphs = list(nx.biconnected_component_subgraphs(graph)) ``` ``` self.bicomponents = list(nx.biconnected_components(graph)) ``` ``` self.cutpoints = set(nx.articulation_points(graph)) ``` ``` self.num_vertices = nx.number_of_nodes(graph) ``` ``` self.link_weight = LinkWeight(graph, self.bicomponents, self.cutpoints) ``` ``` self.traffic_matrix = TrafficMatrix(self.bicomponents, self.cutpoints, self.link_weight) ``` ``` self.bc_components = list() ``` ``` self.calculate_bc_non_cutpoint() ``` ``` self.calculate_bc_cutpoint() ``` ``` self.bc = dict() ``` ``` self.finalize() ``` ``` def calculate_bc_non_cutpoint(self): ``` ``` """BC for non cutpoint ``` ``` """ ``` ``` for i, subgraph in enumerate(self.subgraphs): ``` ``` traffic_matrix = self.traffic_matrix[i] ``` ``` cut_without = self.num_vertices - nx.number_of_nodes(subgraph) ``` ``` results = centrality.weight_betweenness_centrality(subgraph, traffic_matrix, cut_without) ``` ``` self.bc_components.append(results) ``` ``` def calculate_bc_cutpoint(self): ``` ``` self.bc_cutpoints = dict() ``` ``` bc_inter = dict() ``` ``` for v in self.cutpoints: ``` ``` inter = 0 ``` ``` for i, comp in enumerate(self.bicomponents): ``` ``` if v in comp: ``` ``` inter += self.link_weight.get_link_weight(i, v ``` ``` ) * self.link_weight.get_reverse_link_weight(i, v) ``` ``` bc_inter[v] = inter ``` ``` for v in self.cutpoints: ``` ``` bc_locally = 0 ``` ``` for i, comp in enumerate(self.bicomponents): ``` ``` if v in comp: ``` ``` bc_locally += self.bc_components[i][v] ``` ``` self.bc_cutpoints[v] = bc_locally - bc_inter[v] ``` ``` # TODO: do not minus the bc_inter ``` ``` # self.bc_cutpoints[v] = bc_locally ``` ``` def finalize(self): ``` ``` # Add the bc for non cutpoint vertices ``` ``` for bc_component in self.bc_components: ``` ``` for key, value in bc_component.iteritems(): ``` ``` if key not in self.bc: ``` ``` self.bc[key] = value ``` ``` # Add the bc for cutpoint vertices ``` ``` for key, value in self.bc_cutpoints.iteritems(): ``` ``` self.bc[key] = value ``` ``` # Rescale the bc according to the original graph ``` ``` factor = 1.0 / ((self.num_vertices - 1) * (self.num_vertices - 2)) ``` ``` # TODO: check the scaling factor, how much should it be? ``` ``` # factor = 2.0 / (self.num_vertices*self.num_vertices - 3 * self.num_vertices + 2) ``` ``` for key, value in self.bc.iteritems(): ``` ``` self.bc[key] = value * factor ``` ``` def __str__(self): ``` ``` return str(self.bc) ``` ``` def write(self, filepath): ``` ``` print "HBC score is written to %s" % filepath ``` ``` with open(filepath, 'w') as output: ``` ``` for node, centrality in self.bc.iteritems(): ``` ``` output.write('%s, %s\n' % (node, centrality)) ``` ```class TrafficMatrix(): ``` ``` def __init__(self, bicomponents, cutpoints, link_weight): ``` ``` self.bicomponents = bicomponents ``` ``` self.cutpoints = cutpoints ``` ``` self.num_components = len(bicomponents) ``` ``` self.link_weight = link_weight ``` ``` self.h = list() ``` ``` self.generate_empty_traffic_matrix() ``` ``` self.generate_traffic_matrix() ``` ``` def generate_empty_traffic_matrix(self): ``` ``` for i in range(self.num_components): ``` ``` l = len(self.bicomponents[i]) ``` ``` matrix = [[1 for x in range(l)] for y in range(l)] ``` ``` # update the main diagonal ``` ``` for x in range(l): ``` ``` matrix[x][x] = 0 ``` ``` self.h.append(matrix) ``` ``` def generate_traffic_matrix(self): ``` ``` # Update the value when one vertex is a cut-point, another vertex is not a cut-point ``` ``` for i, components in enumerate(self.bicomponents): ``` ``` normal_points = components.difference(self.cutpoints) ``` ``` cutpoints = self.cutpoints.intersection(components) ``` ``` for cutpoint in cutpoints: ``` ``` for normal_point in normal_points: ``` ``` communication_intensity = self.link_weight.get_reverse_link_weight(i, cutpoint) + 1 ``` ``` self.update(i, cutpoint, normal_point, communication_intensity) ``` ``` # Update the value when both vertices are cut-points ``` ``` for i, components in enumerate(self.bicomponents): ``` ``` cutpoints = list(self.cutpoints.intersection(components)) ``` ``` len_cutpoints = len(cutpoints) ``` ``` if len_cutpoints > 1: ``` ``` for k in range(len_cutpoints - 1): ``` ``` for l in range(1, len_cutpoints): ``` ``` if k == l: ``` ``` continue ``` ``` communication_intensity = ( ``` ``` self.link_weight.get_reverse_link_weight(i, cutpoints[k]) + 1) * ( ``` ``` self.link_weight.get_reverse_link_weight(i, cutpoints[l]) + 1 ``` ``` ) ``` ``` self.update(i, cutpoints[k], cutpoints[l], communication_intensity) ``` ``` def simple_update(self, comp_index, x_pos, y_pos, value): ``` ``` self.h[comp_index][x_pos][y_pos] = value ``` ``` # to keep the symmetric property of Traffic Matrix ``` ``` self.h[comp_index][y_pos][x_pos] = value ``` ``` def update(self, comp_index, x, y, value): ``` ``` comp = sorted(self.bicomponents[comp_index]) ``` ``` try: ``` ``` x_pos = list(comp).index(x) ``` ``` y_pos = list(comp).index(y) ``` ``` except: ``` ``` debugger() ``` ``` a = 2 ``` ``` self.simple_update(comp_index, x_pos, y_pos, value) ``` ``` def __str__(self): ``` ``` return str(self.h) ``` ``` def __getitem__(self, key): ``` ``` return self.h[key] ``` ```class LinkWeight(): ``` ``` def __init__(self, graph, bicomponents, cutpoints): ``` ``` self.num_vertices = nx.number_of_nodes(graph) ``` ``` self.bicomponents = bicomponents ``` ``` self.num_components = len(bicomponents) ``` ``` self.cutpoints = cutpoints ``` ``` self.Dv_B = [dict() for i in range(self.num_components)] # link weight ``` ``` self.compute_component_tree_weight() ``` ``` self.reverse_Dv_B = [dict() for i in range(self.num_components)] # reverse link weight ``` ``` self.generate_reverse_link_weight() ``` ``` def _components_sharing_cutpoint(self, B_cutpoints, point): ``` ``` indices = list() ``` ``` for i, cp in enumerate(B_cutpoints): ``` ``` if point in cp: ``` ``` indices.append(i) ``` ``` return indices ``` ``` def get_link_weight(self, comp_index, point): ``` ``` Dv_B_comp = self.Dv_B[comp_index] ``` ``` if point in Dv_B_comp: ``` ``` return Dv_B_comp[point] ``` ``` else: ``` ``` return 0 ``` ``` def set_link_weight(self, comp_index, point, value): ``` ``` self.Dv_B[comp_index][point] = value ``` ``` def get_reverse_link_weight(self, comp_index, point): ``` ``` reverse_Dv_B_comp = self.reverse_Dv_B[comp_index] ``` ``` if point in reverse_Dv_B_comp: ``` ``` return reverse_Dv_B_comp[point] ``` ``` else: ``` ``` return 0 ``` ``` def generate_reverse_link_weight(self): ``` ``` for i, Dv_B_i in enumerate(self.Dv_B): ``` ``` for key, value in Dv_B_i.iteritems(): ``` ``` self.reverse_Dv_B[i][key] = self.num_vertices - 1 - self.get_link_weight(i, key) ``` ``` def compute_component_tree_weight(self): ``` ``` """Follows exactly the Algorithm 1 [Puzis 2012] ``` ``` """ ``` ``` B_cutpoints = list() # number of cutpoints in component B ``` ``` for comp in self.bicomponents: ``` ``` points = comp.intersection(self.cutpoints) ``` ``` B_cutpoints.append(points) ``` ``` Q = self._inititalize_component_tree_weight() ``` ``` while Q: ``` ``` pair = Q.pop(0) ``` ``` if pair['type'] == 'component_vertex_pair': ``` ``` B_index = pair['value'][0] ``` ``` v = pair['value'][1] ``` ``` size = len(self.bicomponents[B_index]) - 1; ``` ``` all_cutpoints = self.bicomponents[B_index].intersection(self.cutpoints) ``` ``` all_cutpoints.remove(v) ``` ``` for cp in all_cutpoints: ``` ``` if self.get_link_weight(B_index, cp) != -1: ``` ``` size += self.num_vertices - self.get_link_weight(B_index, cp) - 1 ``` ``` link_weight = size ``` ``` self._verify_link_weight(B_index, v, link_weight) ``` ``` self.set_link_weight(B_index, v, link_weight) ``` ``` # update Q ``` ``` Q = self._find_unknown_weight_wrt_cutpoint(v, Q) ``` ``` if pair['type'] == 'vertex_component_pair': ``` ``` size = 0 ``` ``` B_index = pair['value'][0] ``` ``` v = pair['value'][1] ``` ``` shared_comp_indices = self._components_sharing_cutpoint(B_cutpoints, v) ``` ``` shared_comp_indices.remove(B_index) ``` ``` for i in shared_comp_indices: ``` ``` if self.get_link_weight(i, v) != - 1: ``` ``` size += self.get_link_weight(i, v) ``` ``` link_weight = self.num_vertices - 1 - size ``` ``` self._verify_link_weight(B_index, v, link_weight) ``` ``` self.set_link_weight(B_index, v, link_weight) ``` ``` # update Q ``` ``` Q = self._find_unknown_weight_wrt_component(B_index, Q) ``` ``` def _verify_link_weight(self, B_index, v, value): ``` ``` """ If the old_value exist in self.Dv_B, then it must be equal to new value ``` ``` ``` ``` Otherwise, do nothing ``` ``` """ ``` ``` old_value = self.get_link_weight(B_index, v) ``` ``` if old_value != -1: # -1 is unknown ``` ``` if old_value != value: ``` ``` print "BUGS FOUND in _verify_link_weight()" ``` ``` sys.exit() ``` ``` def _inititalize_component_tree_weight(self): ``` ``` Q = [] ``` ``` for i, comp in enumerate(self.bicomponents): ``` ``` current_cutpoints = self.cutpoints.intersection(comp) ``` ``` if len(current_cutpoints) == 1: ``` ``` Q.append({ ``` ``` 'type': 'component_vertex_pair', ``` ``` 'value': (i, list(current_cutpoints)[0]) # (B_i, cutpoint) = (i-th component, the cutpoint name) ``` ``` }) ``` ``` for cp in current_cutpoints: ``` ``` self.set_link_weight(i, cp, -1) ``` ``` return Q ``` ``` def _find_unknown_weight_wrt_cutpoint(self, cutpoint, Q): ``` ``` # Cut-point v such that the weights of all but one of its links in T are already computed. ``` ``` num_of_uncomputed_weight = 0 ``` ``` uncomputed_component_index = [] ``` ``` for i, Dv_B_comp in enumerate(self.Dv_B): ``` ``` if cutpoint in Dv_B_comp: ``` ``` if Dv_B_comp[cutpoint] == -1: ``` ``` num_of_uncomputed_weight += 1 ``` ``` uncomputed_component_index.append(i) ``` ``` if num_of_uncomputed_weight > 1: ``` ``` break ``` ``` if num_of_uncomputed_weight == 1: ``` ``` pair = { ``` ``` 'type': 'vertex_component_pair', ``` ``` 'value': (uncomputed_component_index.pop(), cutpoint) ``` ``` } ``` ``` Q.append(pair) ``` ``` return Q ``` ``` def _find_unknown_weight_wrt_component(self, comp_index, Q): ``` ``` # Component B such that weights of all but one of its links in T are already computed. ``` ``` Dv_B_comp = self.Dv_B[comp_index] ``` ``` values = Dv_B_comp.values() ``` ``` # Check if -1 value appear only 1 time ``` ``` flag = False ``` ``` minus_one_value = [x for x in values if x == -1] ``` ``` if len(minus_one_value) == 1: ``` ``` for cp, value in Dv_B_comp.iteritems(): ``` ``` if value == -1: ``` ``` pair = { ``` ``` 'type': 'component_vertex_pair', ``` ``` 'value': (comp_index, cp) ``` ``` } ``` ``` Q.append(pair) ``` ``` return Q ``` ``` def __str__(self): ``` ``` return str(self.Dv_B) ```