Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / community / centrality.py @ 5cef0f13

History | View | Annotate | Download (6.64 KB)

1
# -*- coding: utf-8 -*-
2
# centrality.py - functions for computing communities using centrality notions
3
#
4
# Copyright 2015, 2016 NetworkX developers.
5
#
6
# This file is part of NetworkX.
7
#
8
# NetworkX is distributed under a BSD license; see LICENSE.txt for more
9
# information.
10
"""Functions for computing communities based on centrality notions."""
11

    
12
import networkx as nx
13

    
14
__all__ = ['girvan_newman']
15

    
16

    
17
def girvan_newman(G, most_valuable_edge=None):
18
    """Finds communities in a graph using the Girvan–Newman method.
19

20
    Parameters
21
    ----------
22
    G : NetworkX graph
23

24
    most_valuable_edge : function
25
        Function that takes a graph as input and outputs an edge. The
26
        edge returned by this function will be recomputed and removed at
27
        each iteration of the algorithm.
28

29
        If not specified, the edge with the highest
30
        :func:`networkx.edge_betweenness_centrality` will be used.
31

32
    Returns
33
    -------
34
    iterator
35
        Iterator over tuples of sets of nodes in `G`. Each set of node
36
        is a community, each tuple is a sequence of communities at a
37
        particular level of the algorithm.
38

39
    Examples
40
    --------
41
    To get the first pair of communities::
42

43
        >>> G = nx.path_graph(10)
44
        >>> comp = girvan_newman(G)
45
        >>> tuple(sorted(c) for c in next(comp))
46
        ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
47

48
    To get only the first *k* tuples of communities, use
49
    :func:`itertools.islice`::
50

51
        >>> import itertools
52
        >>> G = nx.path_graph(8)
53
        >>> k = 2
54
        >>> comp = girvan_newman(G)
55
        >>> for communities in itertools.islice(comp, k):
56
        ...     print(tuple(sorted(c) for c in communities)) # doctest: +SKIP
57
        ...
58
        ([0, 1, 2, 3], [4, 5, 6, 7])
59
        ([0, 1], [2, 3], [4, 5, 6, 7])
60

61
    To stop getting tuples of communities once the number of communities
62
    is greater than *k*, use :func:`itertools.takewhile`::
63

64
        >>> import itertools
65
        >>> G = nx.path_graph(8)
66
        >>> k = 4
67
        >>> comp = girvan_newman(G)
68
        >>> limited = itertools.takewhile(lambda c: len(c) <= k, comp)
69
        >>> for communities in limited:
70
        ...     print(tuple(sorted(c) for c in communities)) # doctest: +SKIP
71
        ...
72
        ([0, 1, 2, 3], [4, 5, 6, 7])
73
        ([0, 1], [2, 3], [4, 5, 6, 7])
74
        ([0, 1], [2, 3], [4, 5], [6, 7])
75

76
    To just choose an edge to remove based on the weight::
77

78
        >>> from operator import itemgetter
79
        >>> G = nx.path_graph(10)
80
        >>> edges = G.edges()
81
        >>> nx.set_edge_attributes(G, {(u, v): v for u, v in edges}, 'weight')
82
        >>> def heaviest(G):
83
        ...     u, v, w = max(G.edges(data='weight'), key=itemgetter(2))
84
        ...     return (u, v)
85
        ...
86
        >>> comp = girvan_newman(G, most_valuable_edge=heaviest)
87
        >>> tuple(sorted(c) for c in next(comp))
88
        ([0, 1, 2, 3, 4, 5, 6, 7, 8], [9])
89

90
    To utilize edge weights when choosing an edge with, for example, the
91
    highest betweenness centrality::
92

93
        >>> from networkx import edge_betweenness_centrality as betweenness
94
        >>> def most_central_edge(G):
95
        ...     centrality = betweenness(G, weight='weight')
96
        ...     return max(centrality, key=centrality.get)
97
        ...
98
        >>> G = nx.path_graph(10)
99
        >>> comp = girvan_newman(G, most_valuable_edge=most_central_edge)
100
        >>> tuple(sorted(c) for c in next(comp))
101
        ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
102

103
    To specify a different ranking algorithm for edges, use the
104
    `most_valuable_edge` keyword argument::
105

106
        >>> from networkx import edge_betweenness_centrality
107
        >>> from random import random
108
        >>> def most_central_edge(G):
109
        ...     centrality = edge_betweenness_centrality(G)
110
        ...     max_cent = max(centrality.values())
111
        ...     # Scale the centrality values so they are between 0 and 1,
112
        ...     # and add some random noise.
113
        ...     centrality = {e: c / max_cent for e, c in centrality.items()}
114
        ...     # Add some random noise.
115
        ...     centrality = {e: c + random() for e, c in centrality.items()}
116
        ...     return max(centrality, key=centrality.get)
117
        ...
118
        >>> G = nx.path_graph(10)
119
        >>> comp = girvan_newman(G, most_valuable_edge=most_central_edge)
120

121
    Notes
122
    -----
123
    The Girvan–Newman algorithm detects communities by progressively
124
    removing edges from the original graph. The algorithm removes the
125
    "most valuable" edge, traditionally the edge with the highest
126
    betweenness centrality, at each step. As the graph breaks down into
127
    pieces, the tightly knit community structure is exposed and the
128
    result can be depicted as a dendrogram.
129

130
    """
131
    # If the graph is already empty, simply return its connected
132
    # components.
133
    if G.number_of_edges() == 0:
134
        yield tuple(nx.connected_components(G))
135
        return
136
    # If no function is provided for computing the most valuable edge,
137
    # use the edge betweenness centrality.
138
    if most_valuable_edge is None:
139
        def most_valuable_edge(G):
140
            """Returns the edge with the highest betweenness centrality
141
            in the graph `G`.
142

143
            """
144
            # We have guaranteed that the graph is non-empty, so this
145
            # dictionary will never be empty.
146
            betweenness = nx.edge_betweenness_centrality(G)
147
            return max(betweenness, key=betweenness.get)
148
    # The copy of G here must include the edge weight data.
149
    g = G.copy().to_undirected()
150
    # Self-loops must be removed because their removal has no effect on
151
    # the connected components of the graph.
152
    g.remove_edges_from(nx.selfloop_edges(g))
153
    while g.number_of_edges() > 0:
154
        yield _without_most_central_edges(g, most_valuable_edge)
155

    
156

    
157
def _without_most_central_edges(G, most_valuable_edge):
158
    """Returns the connected components of the graph that results from
159
    repeatedly removing the most "valuable" edge in the graph.
160

161
    `G` must be a non-empty graph. This function modifies the graph `G`
162
    in-place; that is, it removes edges on the graph `G`.
163

164
    `most_valuable_edge` is a function that takes the graph `G` as input
165
    (or a subgraph with one or more edges of `G` removed) and returns an
166
    edge. That edge will be removed and this process will be repeated
167
    until the number of connected components in the graph increases.
168

169
    """
170
    original_num_components = nx.number_connected_components(G)
171
    num_new_components = original_num_components
172
    while num_new_components <= original_num_components:
173
        edge = most_valuable_edge(G)
174
        G.remove_edge(*edge)
175
        new_components = tuple(nx.connected_components(G))
176
        num_new_components = len(new_components)
177
    return new_components