Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / connectivity / kcomponents.py @ 5cef0f13

History | View | Annotate | Download (8.17 KB)

1
# -*- coding: utf-8 -*-
2
"""
3
Moody and White algorithm for k-components
4
"""
5
from collections import defaultdict
6
from itertools import combinations
7
from operator import itemgetter
8

    
9
import networkx as nx
10
from networkx.utils import not_implemented_for
11
# Define the default maximum flow function.
12
from networkx.algorithms.flow import edmonds_karp
13
default_flow_func = edmonds_karp
14

    
15
__author__ = '\n'.join(['Jordi Torrents <jtorrents@milnou.net>'])
16

    
17
__all__ = ['k_components']
18

    
19

    
20
@not_implemented_for('directed')
21
def k_components(G, flow_func=None):
22
    r"""Returns the k-component structure of a graph G.
23

24
    A `k`-component is a maximal subgraph of a graph G that has, at least,
25
    node connectivity `k`: we need to remove at least `k` nodes to break it
26
    into more components. `k`-components have an inherent hierarchical
27
    structure because they are nested in terms of connectivity: a connected
28
    graph can contain several 2-components, each of which can contain
29
    one or more 3-components, and so forth.
30

31
    Parameters
32
    ----------
33
    G : NetworkX graph
34

35
    flow_func : function
36
        Function to perform the underlying flow computations. Default value
37
        :meth:`edmonds_karp`. This function performs better in sparse graphs with
38
        right tailed degree distributions. :meth:`shortest_augmenting_path` will
39
        perform better in denser graphs.
40

41
    Returns
42
    -------
43
    k_components : dict
44
        Dictionary with all connectivity levels `k` in the input Graph as keys
45
        and a list of sets of nodes that form a k-component of level `k` as
46
        values.
47

48
    Raises
49
    ------
50
    NetworkXNotImplemented:
51
        If the input graph is directed.
52

53
    Examples
54
    --------
55
    >>> # Petersen graph has 10 nodes and it is triconnected, thus all
56
    >>> # nodes are in a single component on all three connectivity levels
57
    >>> G = nx.petersen_graph()
58
    >>> k_components = nx.k_components(G)
59

60
    Notes
61
    -----
62
    Moody and White [1]_ (appendix A) provide an algorithm for identifying
63
    k-components in a graph, which is based on Kanevsky's algorithm [2]_
64
    for finding all minimum-size node cut-sets of a graph (implemented in
65
    :meth:`all_node_cuts` function):
66

67
        1. Compute node connectivity, k, of the input graph G.
68

69
        2. Identify all k-cutsets at the current level of connectivity using
70
           Kanevsky's algorithm.
71

72
        3. Generate new graph components based on the removal of
73
           these cutsets. Nodes in a cutset belong to both sides
74
           of the induced cut.
75

76
        4. If the graph is neither complete nor trivial, return to 1;
77
           else end.
78

79
    This implementation also uses some heuristics (see [3]_ for details)
80
    to speed up the computation.
81

82
    See also
83
    --------
84
    node_connectivity
85
    all_node_cuts
86
    biconnected_components : special case of this function when k=2
87
    k_edge_components : similar to this function, but uses edge-connectivity
88
        instead of node-connectivity
89

90
    References
91
    ----------
92
    .. [1]  Moody, J. and D. White (2003). Social cohesion and embeddedness:
93
            A hierarchical conception of social groups.
94
            American Sociological Review 68(1), 103--28.
95
            http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf
96

97
    .. [2]  Kanevsky, A. (1993). Finding all minimum-size separating vertex
98
            sets in a graph. Networks 23(6), 533--541.
99
            http://onlinelibrary.wiley.com/doi/10.1002/net.3230230604/abstract
100

101
    .. [3]  Torrents, J. and F. Ferraro (2015). Structural Cohesion:
102
            Visualization and Heuristics for Fast Computation.
103
            https://arxiv.org/pdf/1503.04476v1
104

105
    """
106
    # Dictionary with connectivity level (k) as keys and a list of
107
    # sets of nodes that form a k-component as values. Note that
108
    # k-compoents can overlap (but only k - 1 nodes).
109
    k_components = defaultdict(list)
110
    # Define default flow function
111
    if flow_func is None:
112
        flow_func = default_flow_func
113
    # Bicomponents as a base to check for higher order k-components
114
    for component in nx.connected_components(G):
115
        # isolated nodes have connectivity 0
116
        comp = set(component)
117
        if len(comp) > 1:
118
            k_components[1].append(comp)
119
    bicomponents = list(nx.biconnected_component_subgraphs(G))
120
    for bicomponent in bicomponents:
121
        bicomp = set(bicomponent)
122
        # avoid considering dyads as bicomponents
123
        if len(bicomp) > 2:
124
            k_components[2].append(bicomp)
125
    for B in bicomponents:
126
        if len(B) <= 2:
127
            continue
128
        k = nx.node_connectivity(B, flow_func=flow_func)
129
        if k > 2:
130
            k_components[k].append(set(B.nodes()))
131
        # Perform cuts in a DFS like order.
132
        cuts = list(nx.all_node_cuts(B, k=k, flow_func=flow_func))
133
        stack = [(k, _generate_partition(B, cuts, k))]
134
        while stack:
135
            (parent_k, partition) = stack[-1]
136
            try:
137
                nodes = next(partition)
138
                C = B.subgraph(nodes)
139
                this_k = nx.node_connectivity(C, flow_func=flow_func)
140
                if this_k > parent_k and this_k > 2:
141
                    k_components[this_k].append(set(C.nodes()))
142
                cuts = list(nx.all_node_cuts(C, k=this_k, flow_func=flow_func))
143
                if cuts:
144
                    stack.append((this_k, _generate_partition(C, cuts, this_k)))
145
            except StopIteration:
146
                stack.pop()
147

    
148
    # This is necessary because k-components may only be reported at their
149
    # maximum k level. But we want to return a dictionary in which keys are
150
    # connectivity levels and values list of sets of components, without
151
    # skipping any connectivity level. Also, it's possible that subsets of
152
    # an already detected k-component appear at a level k. Checking for this
153
    # in the while loop above penalizes the common case. Thus we also have to
154
    # _consolidate all connectivity levels in _reconstruct_k_components.
155
    return _reconstruct_k_components(k_components)
156

    
157

    
158
def _consolidate(sets, k):
159
    """Merge sets that share k or more elements.
160

161
    See: http://rosettacode.org/wiki/Set_consolidation
162

163
    The iterative python implementation posted there is
164
    faster than this because of the overhead of building a
165
    Graph and calling nx.connected_components, but it's not
166
    clear for us if we can use it in NetworkX because there
167
    is no licence for the code.
168

169
    """
170
    G = nx.Graph()
171
    nodes = {i: s for i, s in enumerate(sets)}
172
    G.add_nodes_from(nodes)
173
    G.add_edges_from((u, v) for u, v in combinations(nodes, 2)
174
                     if len(nodes[u] & nodes[v]) >= k)
175
    for component in nx.connected_components(G):
176
        yield set.union(*[nodes[n] for n in component])
177

    
178

    
179
def _generate_partition(G, cuts, k):
180
    def has_nbrs_in_partition(G, node, partition):
181
        for n in G[node]:
182
            if n in partition:
183
                return True
184
        return False
185
    components = []
186
    nodes = ({n for n, d in G.degree() if d > k} -
187
             {n for cut in cuts for n in cut})
188
    H = G.subgraph(nodes)
189
    for cc in nx.connected_components(H):
190
        component = set(cc)
191
        for cut in cuts:
192
            for node in cut:
193
                if has_nbrs_in_partition(G, node, cc):
194
                    component.add(node)
195
        if len(component) < G.order():
196
            components.append(component)
197
    for component in _consolidate(components, k + 1):
198
        yield component
199

    
200

    
201
def _reconstruct_k_components(k_comps):
202
    result = dict()
203
    max_k = max(k_comps)
204
    for k in reversed(range(1, max_k + 1)):
205
        if k == max_k:
206
            result[k] = list(_consolidate(k_comps[k], k))
207
        elif k not in k_comps:
208
            result[k] = list(_consolidate(result[k + 1], k))
209
        else:
210
            nodes_at_k = set.union(*k_comps[k])
211
            to_add = [c for c in result[k + 1] if any(n not in nodes_at_k for n in c)]
212
            if to_add:
213
                result[k] = list(_consolidate(k_comps[k] + to_add, k))
214
            else:
215
                result[k] = list(_consolidate(k_comps[k], k))
216
    return result
217

    
218

    
219
def build_k_number_dict(kcomps):
220
    result = {}
221
    for k, comps in sorted(kcomps.items(), key=itemgetter(0)):
222
        for comp in comps:
223
            for node in comp:
224
                result[node] = k
225
    return result