ioftools / networkxMiCe / networkxmaster / networkx / algorithms / clique.py @ 5cef0f13
History  View  Annotate  Download (18.1 KB)
1 
# Copyright (C) 20042019 by


2 
# Aric Hagberg <hagberg@lanl.gov>

3 
# Dan Schult <dschult@colgate.edu>

4 
# Pieter Swart <swart@lanl.gov>

5 
# All rights reserved.

6 
# BSD license.

7 
"""Functions for finding and manipulating cliques.

8 

9 
Finding the largest clique in a graph is NPcomplete problem, so most of

10 
these algorithms have an exponential running time; for more information,

11 
see the Wikipedia article on the clique problem [1]_.

12 

13 
.. [1] clique problem:: https://en.wikipedia.org/wiki/Clique_problem

14 

15 
"""

16 
from collections import deque 
17 
from itertools import chain 
18 
from itertools import combinations 
19 
from itertools import islice 
20 
try:

21 
from itertools import ifilter as filter 
22 
except ImportError: 
23 
pass

24 
import networkx as nx 
25 
from networkx.utils import not_implemented_for 
26 
__author__ = """Dan Schult (dschult@colgate.edu)"""

27 
__all__ = ['find_cliques', 'find_cliques_recursive', 'make_max_clique_graph', 
28 
'make_clique_bipartite', 'graph_clique_number', 
29 
'graph_number_of_cliques', 'node_clique_number', 
30 
'number_of_cliques', 'cliques_containing_node', 
31 
'enumerate_all_cliques']

32  
33  
34 
@not_implemented_for('directed') 
35 
def enumerate_all_cliques(G): 
36 
"""Returns all cliques in an undirected graph.

37 

38 
This function returns an iterator over cliques, each of which is a

39 
list of nodes. The iteration is ordered by cardinality of the

40 
cliques: first all cliques of size one, then all cliques of size

41 
two, etc.

42 

43 
Parameters

44 


45 
G : NetworkX graph

46 
An undirected graph.

47 

48 
Returns

49 


50 
iterator

51 
An iterator over cliques, each of which is a list of nodes in

52 
`G`. The cliques are ordered according to size.

53 

54 
Notes

55 


56 
To obtain a list of all cliques, use

57 
`list(enumerate_all_cliques(G))`. However, be aware that in the

58 
worstcase, the length of this list can be exponential in the number

59 
of nodes in the graph (for example, when the graph is the complete

60 
graph). This function avoids storing all cliques in memory by only

61 
keeping current candidate node lists in memory during its search.

62 

63 
The implementation is adapted from the algorithm by Zhang, et

64 
al. (2005) [1]_ to output all cliques discovered.

65 

66 
This algorithm ignores selfloops and parallel edges, since cliques

67 
are not conventionally defined with such edges.

68 

69 
References

70 


71 
.. [1] Yun Zhang, AbuKhzam, F.N., Baldwin, N.E., Chesler, E.J.,

72 
Langston, M.A., Samatova, N.F.,

73 
"GenomeScale Computational Approaches to MemoryIntensive

74 
Applications in Systems Biology".

75 
*Supercomputing*, 2005. Proceedings of the ACM/IEEE SC 2005

76 
Conference, pp. 12, 1218 Nov. 2005.

77 
<https://doi.org/10.1109/SC.2005.29>.

78 

79 
"""

80 
index = {} 
81 
nbrs = {} 
82 
for u in G: 
83 
index[u] = len(index)

84 
# Neighbors of u that appear after u in the iteration order of G.

85 
nbrs[u] = {v for v in G[u] if v not in index} 
86  
87 
queue = deque(([u], sorted(nbrs[u], key=index.__getitem__)) for u in G) 
88 
# Loop invariants:

89 
# 1. len(base) is nondecreasing.

90 
# 2. (base + cnbrs) is sorted with respect to the iteration order of G.

91 
# 3. cnbrs is a set of common neighbors of nodes in base.

92 
while queue:

93 
base, cnbrs = map(list, queue.popleft()) 
94 
yield base

95 
for i, u in enumerate(cnbrs): 
96 
# Use generators to reduce memory consumption.

97 
queue.append((chain(base, [u]), 
98 
filter(nbrs[u].__contains__,

99 
islice(cnbrs, i + 1, None)))) 
100  
101  
102 
@not_implemented_for('directed') 
103 
def find_cliques(G): 
104 
"""Returns all maximal cliques in an undirected graph.

105 

106 
For each node *v*, a *maximal clique for v* is a largest complete

107 
subgraph containing *v*. The largest maximal clique is sometimes

108 
called the *maximum clique*.

109 

110 
This function returns an iterator over cliques, each of which is a

111 
list of nodes. It is an iterative implementation, so should not

112 
suffer from recursion depth issues.

113 

114 
Parameters

115 


116 
G : NetworkX graph

117 
An undirected graph.

118 

119 
Returns

120 


121 
iterator

122 
An iterator over maximal cliques, each of which is a list of

123 
nodes in `G`. The order of cliques is arbitrary.

124 

125 
See Also

126 


127 
find_cliques_recursive

128 
A recursive version of the same algorithm.

129 

130 
Notes

131 


132 
To obtain a list of all maximal cliques, use

133 
`list(find_cliques(G))`. However, be aware that in the worstcase,

134 
the length of this list can be exponential in the number of nodes in

135 
the graph (for example, when the graph is the complete graph). This

136 
function avoids storing all cliques in memory by only keeping

137 
current candidate node lists in memory during its search.

138 

139 
This implementation is based on the algorithm published by Bron and

140 
Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi

141 
(2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It

142 
essentially unrolls the recursion used in the references to avoid

143 
issues of recursion stack depth (for a recursive implementation, see

144 
:func:`find_cliques_recursive`).

145 

146 
This algorithm ignores selfloops and parallel edges, since cliques

147 
are not conventionally defined with such edges.

148 

149 
References

150 


151 
.. [1] Bron, C. and Kerbosch, J.

152 
"Algorithm 457: finding all cliques of an undirected graph".

153 
*Communications of the ACM* 16, 9 (Sep. 1973), 575577.

154 
<http://portal.acm.org/citation.cfm?doid=362342.362367>

155 

156 
.. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,

157 
"The worstcase time complexity for generating all maximal

158 
cliques and computational experiments",

159 
*Theoretical Computer Science*, Volume 363, Issue 1,

160 
Computing and Combinatorics,

161 
10th Annual International Conference on

162 
Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 2842

163 
<https://doi.org/10.1016/j.tcs.2006.06.015>

164 

165 
.. [3] F. Cazals, C. Karande,

166 
"A note on the problem of reporting maximal cliques",

167 
*Theoretical Computer Science*,

168 
Volume 407, Issues 13, 6 November 2008, Pages 564568,

169 
<https://doi.org/10.1016/j.tcs.2008.05.010>

170 

171 
"""

172 
if len(G) == 0: 
173 
return

174  
175 
adj = {u: {v for v in G[u] if v != u} for u in G} 
176 
Q = [None]

177  
178 
subg = set(G)

179 
cand = set(G)

180 
u = max(subg, key=lambda u: len(cand & adj[u])) 
181 
ext_u = cand  adj[u] 
182 
stack = [] 
183  
184 
try:

185 
while True: 
186 
if ext_u:

187 
q = ext_u.pop() 
188 
cand.remove(q) 
189 
Q[1] = q

190 
adj_q = adj[q] 
191 
subg_q = subg & adj_q 
192 
if not subg_q: 
193 
yield Q[:]

194 
else:

195 
cand_q = cand & adj_q 
196 
if cand_q:

197 
stack.append((subg, cand, ext_u)) 
198 
Q.append(None)

199 
subg = subg_q 
200 
cand = cand_q 
201 
u = max(subg, key=lambda u: len(cand & adj[u])) 
202 
ext_u = cand  adj[u] 
203 
else:

204 
Q.pop() 
205 
subg, cand, ext_u = stack.pop() 
206 
except IndexError: 
207 
pass

208  
209  
210 
# TODO Should this also be not implemented for directed graphs?

211 
def find_cliques_recursive(G): 
212 
"""Returns all maximal cliques in a graph.

213 

214 
For each node *v*, a *maximal clique for v* is a largest complete

215 
subgraph containing *v*. The largest maximal clique is sometimes

216 
called the *maximum clique*.

217 

218 
This function returns an iterator over cliques, each of which is a

219 
list of nodes. It is a recursive implementation, so may suffer from

220 
recursion depth issues.

221 

222 
Parameters

223 


224 
G : NetworkX graph

225 

226 
Returns

227 


228 
iterator

229 
An iterator over maximal cliques, each of which is a list of

230 
nodes in `G`. The order of cliques is arbitrary.

231 

232 
See Also

233 


234 
find_cliques

235 
An iterative version of the same algorithm.

236 

237 
Notes

238 


239 
To obtain a list of all maximal cliques, use

240 
`list(find_cliques_recursive(G))`. However, be aware that in the

241 
worstcase, the length of this list can be exponential in the number

242 
of nodes in the graph (for example, when the graph is the complete

243 
graph). This function avoids storing all cliques in memory by only

244 
keeping current candidate node lists in memory during its search.

245 

246 
This implementation is based on the algorithm published by Bron and

247 
Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi

248 
(2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a

249 
nonrecursive implementation, see :func:`find_cliques`.

250 

251 
This algorithm ignores selfloops and parallel edges, since cliques

252 
are not conventionally defined with such edges.

253 

254 
References

255 


256 
.. [1] Bron, C. and Kerbosch, J.

257 
"Algorithm 457: finding all cliques of an undirected graph".

258 
*Communications of the ACM* 16, 9 (Sep. 1973), 575577.

259 
<http://portal.acm.org/citation.cfm?doid=362342.362367>

260 

261 
.. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,

262 
"The worstcase time complexity for generating all maximal

263 
cliques and computational experiments",

264 
*Theoretical Computer Science*, Volume 363, Issue 1,

265 
Computing and Combinatorics,

266 
10th Annual International Conference on

267 
Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 2842

268 
<https://doi.org/10.1016/j.tcs.2006.06.015>

269 

270 
.. [3] F. Cazals, C. Karande,

271 
"A note on the problem of reporting maximal cliques",

272 
*Theoretical Computer Science*,

273 
Volume 407, Issues 13, 6 November 2008, Pages 564568,

274 
<https://doi.org/10.1016/j.tcs.2008.05.010>

275 

276 
"""

277 
if len(G) == 0: 
278 
return iter([]) 
279  
280 
adj = {u: {v for v in G[u] if v != u} for u in G} 
281 
Q = [] 
282  
283 
def expand(subg, cand): 
284 
u = max(subg, key=lambda u: len(cand & adj[u])) 
285 
for q in cand  adj[u]: 
286 
cand.remove(q) 
287 
Q.append(q) 
288 
adj_q = adj[q] 
289 
subg_q = subg & adj_q 
290 
if not subg_q: 
291 
yield Q[:]

292 
else:

293 
cand_q = cand & adj_q 
294 
if cand_q:

295 
for clique in expand(subg_q, cand_q): 
296 
yield clique

297 
Q.pop() 
298  
299 
return expand(set(G), set(G)) 
300  
301  
302 
def make_max_clique_graph(G, create_using=None): 
303 
"""Returns the maximal clique graph of the given graph.

304 

305 
The nodes of the maximal clique graph of `G` are the cliques of

306 
`G` and an edge joins two cliques if the cliques are not disjoint.

307 

308 
Parameters

309 


310 
G : NetworkX graph

311 

312 
create_using : NetworkX graph constructor, optional (default=nx.Graph)

313 
Graph type to create. If graph instance, then cleared before populated.

314 

315 
Returns

316 


317 
NetworkX graph

318 
A graph whose nodes are the cliques of `G` and whose edges

319 
join two cliques if they are not disjoint.

320 

321 
Notes

322 


323 
This function behaves like the following code::

324 

325 
import networkx as nx

326 
G = nx.make_clique_bipartite(G)

327 
cliques = [v for v in G.nodes() if G.nodes[v]['bipartite'] == 0]

328 
G = nx.bipartite.project(G, cliques)

329 
G = nx.relabel_nodes(G, {v: v  1 for v in G})

330 

331 
It should be faster, though, since it skips all the intermediate

332 
steps.

333 

334 
"""

335 
if create_using is None: 
336 
B = G.__class__() 
337 
else:

338 
B = nx.empty_graph(0, create_using)

339 
cliques = list(enumerate(set(c) for c in find_cliques(G))) 
340 
# Add a numbered node for each clique.

341 
B.add_nodes_from(i for i, c in cliques) 
342 
# Join cliques by an edge if they share a node.

343 
clique_pairs = combinations(cliques, 2)

344 
B.add_edges_from((i, j) for (i, c1), (j, c2) in clique_pairs if c1 & c2) 
345 
return B

346  
347  
348 
def make_clique_bipartite(G, fpos=None, create_using=None, name=None): 
349 
"""Returns the bipartite clique graph corresponding to `G`.

350 

351 
In the returned bipartite graph, the "bottom" nodes are the nodes of

352 
`G` and the "top" nodes represent the maximal cliques of `G`.

353 
There is an edge from node *v* to clique *C* in the returned graph

354 
if and only if *v* is an element of *C*.

355 

356 
Parameters

357 


358 
G : NetworkX graph

359 
An undirected graph.

360 

361 
fpos : bool

362 
If True or not None, the returned graph will have an

363 
additional attribute, `pos`, a dictionary mapping node to

364 
position in the Euclidean plane.

365 

366 
create_using : NetworkX graph constructor, optional (default=nx.Graph)

367 
Graph type to create. If graph instance, then cleared before populated.

368 

369 
Returns

370 


371 
NetworkX graph

372 
A bipartite graph whose "bottom" set is the nodes of the graph

373 
`G`, whose "top" set is the cliques of `G`, and whose edges

374 
join nodes of `G` to the cliques that contain them.

375 

376 
The nodes of the graph `G` have the node attribute

377 
'bipartite' set to 1 and the nodes representing cliques

378 
have the node attribute 'bipartite' set to 0, as is the

379 
convention for bipartite graphs in NetworkX.

380 

381 
"""

382 
B = nx.empty_graph(0, create_using)

383 
B.clear() 
384 
# The "bottom" nodes in the bipartite graph are the nodes of the

385 
# original graph, G.

386 
B.add_nodes_from(G, bipartite=1)

387 
for i, cl in enumerate(find_cliques(G)): 
388 
# The "top" nodes in the bipartite graph are the cliques. These

389 
# nodes get negative numbers as labels.

390 
name = i  1

391 
B.add_node(name, bipartite=0)

392 
B.add_edges_from((v, name) for v in cl) 
393 
return B

394  
395  
396 
def graph_clique_number(G, cliques=None): 
397 
"""Returns the clique number of the graph.

398 

399 
The *clique number* of a graph is the size of the largest clique in

400 
the graph.

401 

402 
Parameters

403 


404 
G : NetworkX graph

405 
An undirected graph.

406 

407 
cliques : list

408 
A list of cliques, each of which is itself a list of nodes. If

409 
not specified, the list of all cliques will be computed, as by

410 
:func:`find_cliques`.

411 

412 
Returns

413 


414 
int

415 
The size of the largest clique in `G`.

416 

417 
Notes

418 


419 
You should provide `cliques` if you have already computed the list

420 
of maximal cliques, in order to avoid an exponential time search for

421 
maximal cliques.

422 

423 
"""

424 
if cliques is None: 
425 
cliques = find_cliques(G) 
426 
if len(G.nodes) < 1: 
427 
return 0 
428 
return max([len(c) for c in cliques] or [1]) 
429  
430  
431 
def graph_number_of_cliques(G, cliques=None): 
432 
"""Returns the number of maximal cliques in the graph.

433 

434 
Parameters

435 


436 
G : NetworkX graph

437 
An undirected graph.

438 

439 
cliques : list

440 
A list of cliques, each of which is itself a list of nodes. If

441 
not specified, the list of all cliques will be computed, as by

442 
:func:`find_cliques`.

443 

444 
Returns

445 


446 
int

447 
The number of maximal cliques in `G`.

448 

449 
Notes

450 


451 
You should provide `cliques` if you have already computed the list

452 
of maximal cliques, in order to avoid an exponential time search for

453 
maximal cliques.

454 

455 
"""

456 
if cliques is None: 
457 
cliques = list(find_cliques(G))

458 
return len(cliques) 
459  
460  
461 
def node_clique_number(G, nodes=None, cliques=None): 
462 
""" Returns the size of the largest maximal clique containing

463 
each given node.

464 

465 
Returns a single or list depending on input nodes.

466 
Optional list of cliques can be input if already computed.

467 
"""

468 
if cliques is None: 
469 
if nodes is not None: 
470 
# Use ego_graph to decrease size of graph

471 
if isinstance(nodes, list): 
472 
d = {} 
473 
for n in nodes: 
474 
H = nx.ego_graph(G, n) 
475 
d[n] = max((len(c) for c in find_cliques(H))) 
476 
else:

477 
H = nx.ego_graph(G, nodes) 
478 
d = max((len(c) for c in find_cliques(H))) 
479 
return d

480 
# nodes is Nonefind all cliques

481 
cliques = list(find_cliques(G))

482  
483 
if nodes is None: 
484 
nodes = list(G.nodes()) # none, get entire graph 
485  
486 
if not isinstance(nodes, list): # check for a list 
487 
v = nodes 
488 
# assume it is a single value

489 
d = max([len(c) for c in cliques if v in c]) 
490 
else:

491 
d = {} 
492 
for v in nodes: 
493 
d[v] = max([len(c) for c in cliques if v in c]) 
494 
return d

495  
496 
# if nodes is None: # none, use entire graph

497 
# nodes=G.nodes()

498 
# elif not isinstance(nodes, list): # check for a list

499 
# nodes=[nodes] # assume it is a single value

500  
501 
# if cliques is None:

502 
# cliques=list(find_cliques(G))

503 
# d={}

504 
# for v in nodes:

505 
# d[v]=max([len(c) for c in cliques if v in c])

506  
507 
# if nodes in G:

508 
# return d[v] #return single value

509 
# return d

510  
511  
512 
def number_of_cliques(G, nodes=None, cliques=None): 
513 
"""Returns the number of maximal cliques for each node.

514 

515 
Returns a single or list depending on input nodes.

516 
Optional list of cliques can be input if already computed.

517 
"""

518 
if cliques is None: 
519 
cliques = list(find_cliques(G))

520  
521 
if nodes is None: 
522 
nodes = list(G.nodes()) # none, get entire graph 
523  
524 
if not isinstance(nodes, list): # check for a list 
525 
v = nodes 
526 
# assume it is a single value

527 
numcliq = len([1 for c in cliques if v in c]) 
528 
else:

529 
numcliq = {} 
530 
for v in nodes: 
531 
numcliq[v] = len([1 for c in cliques if v in c]) 
532 
return numcliq

533  
534  
535 
def cliques_containing_node(G, nodes=None, cliques=None): 
536 
"""Returns a list of cliques containing the given node.

537 

538 
Returns a single list or list of lists depending on input nodes.

539 
Optional list of cliques can be input if already computed.

540 
"""

541 
if cliques is None: 
542 
cliques = list(find_cliques(G))

543  
544 
if nodes is None: 
545 
nodes = list(G.nodes()) # none, get entire graph 
546  
547 
if not isinstance(nodes, list): # check for a list 
548 
v = nodes 
549 
# assume it is a single value

550 
vcliques = [c for c in cliques if v in c] 
551 
else:

552 
vcliques = {} 
553 
for v in nodes: 
554 
vcliques[v] = [c for c in cliques if v in c] 
555 
return vcliques
