ioftools / networkxMiCe / networkxmaster / networkx / algorithms / community / centrality.py @ 5cef0f13
History  View  Annotate  Download (6.64 KB)
1 
# * coding: utf8 *


2 
# centrality.py  functions for computing communities using centrality notions

3 
#

4 
# Copyright 2015, 2016 NetworkX developers.

5 
#

6 
# This file is part of NetworkX.

7 
#

8 
# NetworkX is distributed under a BSD license; see LICENSE.txt for more

9 
# information.

10 
"""Functions for computing communities based on centrality notions."""

11  
12 
import networkx as nx 
13  
14 
__all__ = ['girvan_newman']

15  
16  
17 
def girvan_newman(G, most_valuable_edge=None): 
18 
"""Finds communities in a graph using the Girvan–Newman method.

19 

20 
Parameters

21 


22 
G : NetworkX graph

23 

24 
most_valuable_edge : function

25 
Function that takes a graph as input and outputs an edge. The

26 
edge returned by this function will be recomputed and removed at

27 
each iteration of the algorithm.

28 

29 
If not specified, the edge with the highest

30 
:func:`networkx.edge_betweenness_centrality` will be used.

31 

32 
Returns

33 


34 
iterator

35 
Iterator over tuples of sets of nodes in `G`. Each set of node

36 
is a community, each tuple is a sequence of communities at a

37 
particular level of the algorithm.

38 

39 
Examples

40 


41 
To get the first pair of communities::

42 

43 
>>> G = nx.path_graph(10)

44 
>>> comp = girvan_newman(G)

45 
>>> tuple(sorted(c) for c in next(comp))

46 
([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])

47 

48 
To get only the first *k* tuples of communities, use

49 
:func:`itertools.islice`::

50 

51 
>>> import itertools

52 
>>> G = nx.path_graph(8)

53 
>>> k = 2

54 
>>> comp = girvan_newman(G)

55 
>>> for communities in itertools.islice(comp, k):

56 
... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP

57 
...

58 
([0, 1, 2, 3], [4, 5, 6, 7])

59 
([0, 1], [2, 3], [4, 5, 6, 7])

60 

61 
To stop getting tuples of communities once the number of communities

62 
is greater than *k*, use :func:`itertools.takewhile`::

63 

64 
>>> import itertools

65 
>>> G = nx.path_graph(8)

66 
>>> k = 4

67 
>>> comp = girvan_newman(G)

68 
>>> limited = itertools.takewhile(lambda c: len(c) <= k, comp)

69 
>>> for communities in limited:

70 
... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP

71 
...

72 
([0, 1, 2, 3], [4, 5, 6, 7])

73 
([0, 1], [2, 3], [4, 5, 6, 7])

74 
([0, 1], [2, 3], [4, 5], [6, 7])

75 

76 
To just choose an edge to remove based on the weight::

77 

78 
>>> from operator import itemgetter

79 
>>> G = nx.path_graph(10)

80 
>>> edges = G.edges()

81 
>>> nx.set_edge_attributes(G, {(u, v): v for u, v in edges}, 'weight')

82 
>>> def heaviest(G):

83 
... u, v, w = max(G.edges(data='weight'), key=itemgetter(2))

84 
... return (u, v)

85 
...

86 
>>> comp = girvan_newman(G, most_valuable_edge=heaviest)

87 
>>> tuple(sorted(c) for c in next(comp))

88 
([0, 1, 2, 3, 4, 5, 6, 7, 8], [9])

89 

90 
To utilize edge weights when choosing an edge with, for example, the

91 
highest betweenness centrality::

92 

93 
>>> from networkx import edge_betweenness_centrality as betweenness

94 
>>> def most_central_edge(G):

95 
... centrality = betweenness(G, weight='weight')

96 
... return max(centrality, key=centrality.get)

97 
...

98 
>>> G = nx.path_graph(10)

99 
>>> comp = girvan_newman(G, most_valuable_edge=most_central_edge)

100 
>>> tuple(sorted(c) for c in next(comp))

101 
([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])

102 

103 
To specify a different ranking algorithm for edges, use the

104 
`most_valuable_edge` keyword argument::

105 

106 
>>> from networkx import edge_betweenness_centrality

107 
>>> from random import random

108 
>>> def most_central_edge(G):

109 
... centrality = edge_betweenness_centrality(G)

110 
... max_cent = max(centrality.values())

111 
... # Scale the centrality values so they are between 0 and 1,

112 
... # and add some random noise.

113 
... centrality = {e: c / max_cent for e, c in centrality.items()}

114 
... # Add some random noise.

115 
... centrality = {e: c + random() for e, c in centrality.items()}

116 
... return max(centrality, key=centrality.get)

117 
...

118 
>>> G = nx.path_graph(10)

119 
>>> comp = girvan_newman(G, most_valuable_edge=most_central_edge)

120 

121 
Notes

122 


123 
The Girvan–Newman algorithm detects communities by progressively

124 
removing edges from the original graph. The algorithm removes the

125 
"most valuable" edge, traditionally the edge with the highest

126 
betweenness centrality, at each step. As the graph breaks down into

127 
pieces, the tightly knit community structure is exposed and the

128 
result can be depicted as a dendrogram.

129 

130 
"""

131 
# If the graph is already empty, simply return its connected

132 
# components.

133 
if G.number_of_edges() == 0: 
134 
yield tuple(nx.connected_components(G)) 
135 
return

136 
# If no function is provided for computing the most valuable edge,

137 
# use the edge betweenness centrality.

138 
if most_valuable_edge is None: 
139 
def most_valuable_edge(G): 
140 
"""Returns the edge with the highest betweenness centrality

141 
in the graph `G`.

142 

143 
"""

144 
# We have guaranteed that the graph is nonempty, so this

145 
# dictionary will never be empty.

146 
betweenness = nx.edge_betweenness_centrality(G) 
147 
return max(betweenness, key=betweenness.get) 
148 
# The copy of G here must include the edge weight data.

149 
g = G.copy().to_undirected() 
150 
# Selfloops must be removed because their removal has no effect on

151 
# the connected components of the graph.

152 
g.remove_edges_from(nx.selfloop_edges(g)) 
153 
while g.number_of_edges() > 0: 
154 
yield _without_most_central_edges(g, most_valuable_edge)

155  
156  
157 
def _without_most_central_edges(G, most_valuable_edge): 
158 
"""Returns the connected components of the graph that results from

159 
repeatedly removing the most "valuable" edge in the graph.

160 

161 
`G` must be a nonempty graph. This function modifies the graph `G`

162 
inplace; that is, it removes edges on the graph `G`.

163 

164 
`most_valuable_edge` is a function that takes the graph `G` as input

165 
(or a subgraph with one or more edges of `G` removed) and returns an

166 
edge. That edge will be removed and this process will be repeated

167 
until the number of connected components in the graph increases.

168 

169 
"""

170 
original_num_components = nx.number_connected_components(G) 
171 
num_new_components = original_num_components 
172 
while num_new_components <= original_num_components:

173 
edge = most_valuable_edge(G) 
174 
G.remove_edge(*edge) 
175 
new_components = tuple(nx.connected_components(G))

176 
num_new_components = len(new_components)

177 
return new_components
