## root / globecomm / heuristic_bc / heuristic_betweenness_centrality.py @ fac6e5a4

History | View | Annotate | Download (11.8 KB)

1 | bd3d6dca | Quynh PX Nguyen | ```
# Implement the section IV in Puzis 2012 paper
``` |
---|---|---|---|

2 | ```
# straight_lineent the section IV in Puzis 2012 paper
``` |
||

3 | ```
# Heuristic Betweenness Centrality - Partitioning to Bi-connected Components
``` |
||

4 | |||

5 | import os |
||

6 | import sys |
||

7 | import pprint |
||

8 | import networkx as nx |
||

9 | import betweenness_centrality as centrality |
||

10 | import utility |
||

11 | from pdb import set_trace as debugger |
||

12 | |||

13 | MAIN_CODE_DIR = os.environ.get('MAIN_CODE_DIR', '') |
||

14 | |||

15 | class HeuristicBetweennessCentrality(): |
||

16 | def __init__(self, graph): |
||

17 | |||

18 | if not nx.is_connected(graph): |
||

19 | print "Graph is not connected" |
||

20 | sys.exit() |
||

21 | |||

22 | self.subgraphs = list(nx.biconnected_component_subgraphs(graph)) |
||

23 | self.bicomponents = list(nx.biconnected_components(graph)) |
||

24 | self.cutpoints = set(nx.articulation_points(graph)) |
||

25 | ```
self.num_vertices = nx.number_of_nodes(graph)
``` |
||

26 | self.link_weight = LinkWeight(graph, self.bicomponents, self.cutpoints) |
||

27 | self.traffic_matrix = TrafficMatrix(self.bicomponents, self.cutpoints, self.link_weight) |
||

28 | |||

29 | self.bc_components = list() |
||

30 | ```
self.calculate_bc_non_cutpoint()
``` |
||

31 | ```
self.calculate_bc_cutpoint()
``` |
||

32 | |||

33 | self.bc = dict() |
||

34 | ```
self.finalize()
``` |
||

35 | |||

36 | def calculate_bc_non_cutpoint(self): |
||

37 | ```
"""BC for non cutpoint
``` |
||

38 | ```
"""
``` |
||

39 | for i, subgraph in enumerate(self.subgraphs): |
||

40 | ```
traffic_matrix = self.traffic_matrix[i]
``` |
||

41 | ```
cut_without = self.num_vertices - nx.number_of_nodes(subgraph)
``` |
||

42 | results = centrality.weight_betweenness_centrality(subgraph, traffic_matrix, cut_without) |
||

43 | ```
self.bc_components.append(results)
``` |
||

44 | |||

45 | def calculate_bc_cutpoint(self): |
||

46 | self.bc_cutpoints = dict() |
||

47 | |||

48 | ```
bc_inter = dict()
``` |
||

49 | for v in self.cutpoints: |
||

50 | ```
inter = 0
``` |
||

51 | for i, comp in enumerate(self.bicomponents): |
||

52 | if v in comp: |
||

53 | ```
inter += self.link_weight.get_link_weight(i, v
``` |
||

54 | ```
) * self.link_weight.get_reverse_link_weight(i, v)
``` |
||

55 | bc_inter[v] = inter |
||

56 | |||

57 | for v in self.cutpoints: |
||

58 | ```
bc_locally = 0
``` |
||

59 | for i, comp in enumerate(self.bicomponents): |
||

60 | if v in comp: |
||

61 | ```
bc_locally += self.bc_components[i][v]
``` |
||

62 | ```
self.bc_cutpoints[v] = bc_locally - bc_inter[v]
``` |
||

63 | ```
# TODO: do not minus the bc_inter
``` |
||

64 | ```
# self.bc_cutpoints[v] = bc_locally
``` |
||

65 | |||

66 | def finalize(self): |
||

67 | ```
# Add the bc for non cutpoint vertices
``` |
||

68 | for bc_component in self.bc_components: |
||

69 | for key, value in bc_component.iteritems(): |
||

70 | if key not in self.bc: |
||

71 | ```
self.bc[key] = value
``` |
||

72 | |||

73 | ```
# Add the bc for cutpoint vertices
``` |
||

74 | for key, value in self.bc_cutpoints.iteritems(): |
||

75 | ```
self.bc[key] = value
``` |
||

76 | |||

77 | ```
# Rescale the bc according to the original graph
``` |
||

78 | factor = 1.0 / ((self.num_vertices - 1) * (self.num_vertices - 2)) |
||

79 | ```
# TODO: check the scaling factor, how much should it be?
``` |
||

80 | ```
# factor = 2.0 / (self.num_vertices*self.num_vertices - 3 * self.num_vertices + 2)
``` |
||

81 | for key, value in self.bc.iteritems(): |
||

82 | ```
self.bc[key] = value * factor
``` |
||

83 | |||

84 | def __str__(self): |
||

85 | return str(self.bc) |
||

86 | |||

87 | def write(self, filepath): |
||

88 | print "HBC score is written to %s" % filepath |
||

89 | with open(filepath, 'w') as output: |
||

90 | for node, centrality in self.bc.iteritems(): |
||

91 | ```
output.write('%s, %s\n' % (node, centrality))
``` |
||

92 | |||

93 | class TrafficMatrix(): |
||

94 | def __init__(self, bicomponents, cutpoints, link_weight): |
||

95 | ```
self.bicomponents = bicomponents
``` |
||

96 | ```
self.cutpoints = cutpoints
``` |
||

97 | self.num_components = len(bicomponents) |
||

98 | ```
self.link_weight = link_weight
``` |
||

99 | |||

100 | self.h = list() |
||

101 | ```
self.generate_empty_traffic_matrix()
``` |
||

102 | ```
self.generate_traffic_matrix()
``` |
||

103 | |||

104 | def generate_empty_traffic_matrix(self): |
||

105 | for i in range(self.num_components): |
||

106 | l = len(self.bicomponents[i]) |
||

107 | matrix = [[1 for x in range(l)] for y in range(l)] |
||

108 | ```
# update the main diagonal
``` |
||

109 | for x in range(l): |
||

110 | ```
matrix[x][x] = 0
``` |
||

111 | |||

112 | ```
self.h.append(matrix)
``` |
||

113 | |||

114 | def generate_traffic_matrix(self): |
||

115 | ```
# Update the value when one vertex is a cut-point, another vertex is not a cut-point
``` |
||

116 | for i, components in enumerate(self.bicomponents): |
||

117 | ```
normal_points = components.difference(self.cutpoints)
``` |
||

118 | ```
cutpoints = self.cutpoints.intersection(components)
``` |
||

119 | |||

120 | for cutpoint in cutpoints: |
||

121 | for normal_point in normal_points: |
||

122 | communication_intensity = self.link_weight.get_reverse_link_weight(i, cutpoint) + 1 |
||

123 | ```
self.update(i, cutpoint, normal_point, communication_intensity)
``` |
||

124 | |||

125 | ```
# Update the value when both vertices are cut-points
``` |
||

126 | for i, components in enumerate(self.bicomponents): |
||

127 | cutpoints = list(self.cutpoints.intersection(components)) |
||

128 | ```
len_cutpoints = len(cutpoints)
``` |
||

129 | if len_cutpoints > 1: |
||

130 | for k in range(len_cutpoints - 1): |
||

131 | for l in range(1, len_cutpoints): |
||

132 | ```
if k == l:
``` |
||

133 | ```
continue
``` |
||

134 | communication_intensity = ( |
||

135 | self.link_weight.get_reverse_link_weight(i, cutpoints[k]) + 1) * ( |
||

136 | self.link_weight.get_reverse_link_weight(i, cutpoints[l]) + 1 |
||

137 | ) |
||

138 | ```
self.update(i, cutpoints[k], cutpoints[l], communication_intensity)
``` |
||

139 | |||

140 | def simple_update(self, comp_index, x_pos, y_pos, value): |
||

141 | ```
self.h[comp_index][x_pos][y_pos] = value
``` |
||

142 | ```
# to keep the symmetric property of Traffic Matrix
``` |
||

143 | ```
self.h[comp_index][y_pos][x_pos] = value
``` |
||

144 | |||

145 | def update(self, comp_index, x, y, value): |
||

146 | comp = sorted(self.bicomponents[comp_index]) |
||

147 | ```
try:
``` |
||

148 | ```
x_pos = list(comp).index(x)
``` |
||

149 | ```
y_pos = list(comp).index(y)
``` |
||

150 | ```
except:
``` |
||

151 | debugger() |
||

152 | ```
a = 2
``` |
||

153 | |||

154 | ```
self.simple_update(comp_index, x_pos, y_pos, value)
``` |
||

155 | |||

156 | def __str__(self): |
||

157 | return str(self.h) |
||

158 | |||

159 | def __getitem__(self, key): |
||

160 | return self.h[key] |
||

161 | |||

162 | |||

163 | class LinkWeight(): |
||

164 | def __init__(self, graph, bicomponents, cutpoints): |
||

165 | ```
self.num_vertices = nx.number_of_nodes(graph)
``` |
||

166 | ```
self.bicomponents = bicomponents
``` |
||

167 | self.num_components = len(bicomponents) |
||

168 | ```
self.cutpoints = cutpoints
``` |
||

169 | |||

170 | self.Dv_B = [dict() for i in range(self.num_components)] # link weight |
||

171 | ```
self.compute_component_tree_weight()
``` |
||

172 | |||

173 | self.reverse_Dv_B = [dict() for i in range(self.num_components)] # reverse link weight |
||

174 | ```
self.generate_reverse_link_weight()
``` |
||

175 | |||

176 | def _components_sharing_cutpoint(self, B_cutpoints, point): |
||

177 | ```
indices = list()
``` |
||

178 | for i, cp in enumerate(B_cutpoints): |
||

179 | if point in cp: |
||

180 | indices.append(i) |
||

181 | |||

182 | ```
return indices
``` |
||

183 | |||

184 | def get_link_weight(self, comp_index, point): |
||

185 | ```
Dv_B_comp = self.Dv_B[comp_index]
``` |
||

186 | |||

187 | if point in Dv_B_comp: |
||

188 | ```
return Dv_B_comp[point]
``` |
||

189 | ```
else:
``` |
||

190 | return 0 |
||

191 | |||

192 | def set_link_weight(self, comp_index, point, value): |
||

193 | ```
self.Dv_B[comp_index][point] = value
``` |
||

194 | |||

195 | def get_reverse_link_weight(self, comp_index, point): |
||

196 | ```
reverse_Dv_B_comp = self.reverse_Dv_B[comp_index]
``` |
||

197 | |||

198 | if point in reverse_Dv_B_comp: |
||

199 | ```
return reverse_Dv_B_comp[point]
``` |
||

200 | ```
else:
``` |
||

201 | return 0 |
||

202 | |||

203 | def generate_reverse_link_weight(self): |
||

204 | for i, Dv_B_i in enumerate(self.Dv_B): |
||

205 | for key, value in Dv_B_i.iteritems(): |
||

206 | self.reverse_Dv_B[i][key] = self.num_vertices - 1 - self.get_link_weight(i, key) |
||

207 | |||

208 | def compute_component_tree_weight(self): |
||

209 | ```
"""Follows exactly the Algorithm 1 [Puzis 2012]
``` |
||

210 | ```
"""
``` |
||

211 | B_cutpoints = list() # number of cutpoints in component B |
||

212 | for comp in self.bicomponents: |
||

213 | ```
points = comp.intersection(self.cutpoints)
``` |
||

214 | B_cutpoints.append(points) |
||

215 | |||

216 | ```
Q = self._inititalize_component_tree_weight()
``` |
||

217 | ```
while Q:
``` |
||

218 | ```
pair = Q.pop(0)
``` |
||

219 | if pair['type'] == 'component_vertex_pair': |
||

220 | B_index = pair['value'][0] |
||

221 | v = pair['value'][1] |
||

222 | size = len(self.bicomponents[B_index]) - 1; |
||

223 | all_cutpoints = self.bicomponents[B_index].intersection(self.cutpoints) |
||

224 | all_cutpoints.remove(v) |
||

225 | |||

226 | for cp in all_cutpoints: |
||

227 | if self.get_link_weight(B_index, cp) != -1: |
||

228 | size += self.num_vertices - self.get_link_weight(B_index, cp) - 1 |
||

229 | |||

230 | link_weight = size |
||

231 | ```
self._verify_link_weight(B_index, v, link_weight)
``` |
||

232 | ```
self.set_link_weight(B_index, v, link_weight)
``` |
||

233 | |||

234 | ```
# update Q
``` |
||

235 | ```
Q = self._find_unknown_weight_wrt_cutpoint(v, Q)
``` |
||

236 | |||

237 | if pair['type'] == 'vertex_component_pair': |
||

238 | ```
size = 0
``` |
||

239 | B_index = pair['value'][0] |
||

240 | v = pair['value'][1] |
||

241 | ```
shared_comp_indices = self._components_sharing_cutpoint(B_cutpoints, v)
``` |
||

242 | shared_comp_indices.remove(B_index) |
||

243 | |||

244 | |||

245 | for i in shared_comp_indices: |
||

246 | if self.get_link_weight(i, v) != - 1: |
||

247 | ```
size += self.get_link_weight(i, v)
``` |
||

248 | |||

249 | link_weight = self.num_vertices - 1 - size |
||

250 | ```
self._verify_link_weight(B_index, v, link_weight)
``` |
||

251 | ```
self.set_link_weight(B_index, v, link_weight)
``` |
||

252 | |||

253 | ```
# update Q
``` |
||

254 | ```
Q = self._find_unknown_weight_wrt_component(B_index, Q)
``` |
||

255 | |||

256 | def _verify_link_weight(self, B_index, v, value): |
||

257 | ```
""" If the old_value exist in self.Dv_B, then it must be equal to new value
``` |
||

258 | ```
``` |
||

259 | ```
Otherwise, do nothing
``` |
||

260 | ```
"""
``` |
||

261 | ```
old_value = self.get_link_weight(B_index, v)
``` |
||

262 | |||

263 | if old_value != -1: # -1 is unknown |
||

264 | ```
if old_value != value:
``` |
||

265 | print "BUGS FOUND in _verify_link_weight()" |
||

266 | sys.exit() |
||

267 | |||

268 | |||

269 | def _inititalize_component_tree_weight(self): |
||

270 | Q = [] |
||

271 | for i, comp in enumerate(self.bicomponents): |
||

272 | ```
current_cutpoints = self.cutpoints.intersection(comp)
``` |
||

273 | if len(current_cutpoints) == 1: |
||

274 | Q.append({ |
||

275 | 'type': 'component_vertex_pair', |
||

276 | 'value': (i, list(current_cutpoints)[0]) # (B_i, cutpoint) = (i-th component, the cutpoint name) |
||

277 | }) |
||

278 | for cp in current_cutpoints: |
||

279 | self.set_link_weight(i, cp, -1) |
||

280 | ```
return Q
``` |
||

281 | |||

282 | def _find_unknown_weight_wrt_cutpoint(self, cutpoint, Q): |
||

283 | ```
# Cut-point v such that the weights of all but one of its links in T are already computed.
``` |
||

284 | ```
num_of_uncomputed_weight = 0
``` |
||

285 | uncomputed_component_index = [] |
||

286 | |||

287 | for i, Dv_B_comp in enumerate(self.Dv_B): |
||

288 | if cutpoint in Dv_B_comp: |
||

289 | if Dv_B_comp[cutpoint] == -1: |
||

290 | ```
num_of_uncomputed_weight += 1
``` |
||

291 | uncomputed_component_index.append(i) |
||

292 | if num_of_uncomputed_weight > 1: |
||

293 | ```
break
``` |
||

294 | |||

295 | if num_of_uncomputed_weight == 1: |
||

296 | pair = { |
||

297 | 'type': 'vertex_component_pair', |
||

298 | ```
'value': (uncomputed_component_index.pop(), cutpoint)
``` |
||

299 | } |
||

300 | Q.append(pair) |
||

301 | ```
return Q
``` |
||

302 | |||

303 | def _find_unknown_weight_wrt_component(self, comp_index, Q): |
||

304 | ```
# Component B such that weights of all but one of its links in T are already computed.
``` |
||

305 | ```
Dv_B_comp = self.Dv_B[comp_index]
``` |
||

306 | values = Dv_B_comp.values() |
||

307 | |||

308 | ```
# Check if -1 value appear only 1 time
``` |
||

309 | ```
flag = False
``` |
||

310 | minus_one_value = [x for x in values if x == -1] |
||

311 | if len(minus_one_value) == 1: |
||

312 | for cp, value in Dv_B_comp.iteritems(): |
||

313 | if value == -1: |
||

314 | pair = { |
||

315 | 'type': 'component_vertex_pair', |
||

316 | ```
'value': (comp_index, cp)
``` |
||

317 | } |
||

318 | Q.append(pair) |
||

319 | ```
return Q
``` |
||

320 | |||

321 | def __str__(self): |
||

322 | return str(self.Dv_B) |