Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / approximation / kcomponents.py @ 5cef0f13

History | View | Annotate | Download (13.1 KB)

1
""" Fast approximation for k-component structure
2
"""
3
#    Copyright (C) 2015 by
4
#    Jordi Torrents <jtorrents@milnou.net>
5
#    All rights reserved.
6
#    BSD license.
7
import itertools
8
from collections import defaultdict
9
from collections.abc import Mapping
10

    
11
import networkx as nx
12
from networkx.exception import NetworkXError
13
from networkx.utils import not_implemented_for
14

    
15
from networkx.algorithms.approximation import local_node_connectivity
16
from networkx.algorithms.connectivity import \
17
    local_node_connectivity as exact_local_node_connectivity
18

    
19

    
20
__author__ = """\n""".join(['Jordi Torrents <jtorrents@milnou.net>'])
21

    
22
__all__ = ['k_components']
23

    
24

    
25
not_implemented_for('directed')
26

    
27

    
28
def k_components(G, min_density=0.95):
29
    r"""Returns the approximate k-component structure of a graph G.
30

31
    A `k`-component is a maximal subgraph of a graph G that has, at least,
32
    node connectivity `k`: we need to remove at least `k` nodes to break it
33
    into more components. `k`-components have an inherent hierarchical
34
    structure because they are nested in terms of connectivity: a connected
35
    graph can contain several 2-components, each of which can contain
36
    one or more 3-components, and so forth.
37

38
    This implementation is based on the fast heuristics to approximate
39
    the `k`-component structure of a graph [1]_. Which, in turn, it is based on
40
    a fast approximation algorithm for finding good lower bounds of the number
41
    of node independent paths between two nodes [2]_.
42

43
    Parameters
44
    ----------
45
    G : NetworkX graph
46
        Undirected graph
47

48
    min_density : Float
49
        Density relaxation threshold. Default value 0.95
50

51
    Returns
52
    -------
53
    k_components : dict
54
        Dictionary with connectivity level `k` as key and a list of
55
        sets of nodes that form a k-component of level `k` as values.
56

57

58
    Examples
59
    --------
60
    >>> # Petersen graph has 10 nodes and it is triconnected, thus all
61
    >>> # nodes are in a single component on all three connectivity levels
62
    >>> from networkx.algorithms import approximation as apxa
63
    >>> G = nx.petersen_graph()
64
    >>> k_components = apxa.k_components(G)
65

66
    Notes
67
    -----
68
    The logic of the approximation algorithm for computing the `k`-component
69
    structure [1]_ is based on repeatedly applying simple and fast algorithms
70
    for `k`-cores and biconnected components in order to narrow down the
71
    number of pairs of nodes over which we have to compute White and Newman's
72
    approximation algorithm for finding node independent paths [2]_. More
73
    formally, this algorithm is based on Whitney's theorem, which states
74
    an inclusion relation among node connectivity, edge connectivity, and
75
    minimum degree for any graph G. This theorem implies that every
76
    `k`-component is nested inside a `k`-edge-component, which in turn,
77
    is contained in a `k`-core. Thus, this algorithm computes node independent
78
    paths among pairs of nodes in each biconnected part of each `k`-core,
79
    and repeats this procedure for each `k` from 3 to the maximal core number
80
    of a node in the input graph.
81

82
    Because, in practice, many nodes of the core of level `k` inside a
83
    bicomponent actually are part of a component of level k, the auxiliary
84
    graph needed for the algorithm is likely to be very dense. Thus, we use
85
    a complement graph data structure (see `AntiGraph`) to save memory.
86
    AntiGraph only stores information of the edges that are *not* present
87
    in the actual auxiliary graph. When applying algorithms to this
88
    complement graph data structure, it behaves as if it were the dense
89
    version.
90

91
    See also
92
    --------
93
    k_components
94

95
    References
96
    ----------
97
    .. [1]  Torrents, J. and F. Ferraro (2015) Structural Cohesion:
98
            Visualization and Heuristics for Fast Computation.
99
            https://arxiv.org/pdf/1503.04476v1
100

101
    .. [2]  White, Douglas R., and Mark Newman (2001) A Fast Algorithm for
102
            Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035
103
            http://eclectic.ss.uci.edu/~drwhite/working.pdf
104

105
    .. [3]  Moody, J. and D. White (2003). Social cohesion and embeddedness:
106
            A hierarchical conception of social groups.
107
            American Sociological Review 68(1), 103--28.
108
            http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf
109

110
    """
111
    # Dictionary with connectivity level (k) as keys and a list of
112
    # sets of nodes that form a k-component as values
113
    k_components = defaultdict(list)
114
    # make a few functions local for speed
115
    node_connectivity = local_node_connectivity
116
    k_core = nx.k_core
117
    core_number = nx.core_number
118
    biconnected_components = nx.biconnected_components
119
    density = nx.density
120
    combinations = itertools.combinations
121
    # Exact solution for k = {1,2}
122
    # There is a linear time algorithm for triconnectivity, if we had an
123
    # implementation available we could start from k = 4.
124
    for component in nx.connected_components(G):
125
        # isolated nodes have connectivity 0
126
        comp = set(component)
127
        if len(comp) > 1:
128
            k_components[1].append(comp)
129
    for bicomponent in nx.biconnected_components(G):
130
        # avoid considering dyads as bicomponents
131
        bicomp = set(bicomponent)
132
        if len(bicomp) > 2:
133
            k_components[2].append(bicomp)
134
    # There is no k-component of k > maximum core number
135
    # \kappa(G) <= \lambda(G) <= \delta(G)
136
    g_cnumber = core_number(G)
137
    max_core = max(g_cnumber.values())
138
    for k in range(3, max_core + 1):
139
        C = k_core(G, k, core_number=g_cnumber)
140
        for nodes in biconnected_components(C):
141
            # Build a subgraph SG induced by the nodes that are part of
142
            # each biconnected component of the k-core subgraph C.
143
            if len(nodes) < k:
144
                continue
145
            SG = G.subgraph(nodes)
146
            # Build auxiliary graph
147
            H = _AntiGraph()
148
            H.add_nodes_from(SG.nodes())
149
            for u, v in combinations(SG, 2):
150
                K = node_connectivity(SG, u, v, cutoff=k)
151
                if k > K:
152
                    H.add_edge(u, v)
153
            for h_nodes in biconnected_components(H):
154
                if len(h_nodes) <= k:
155
                    continue
156
                SH = H.subgraph(h_nodes)
157
                for Gc in _cliques_heuristic(SG, SH, k, min_density):
158
                    for k_nodes in biconnected_components(Gc):
159
                        Gk = nx.k_core(SG.subgraph(k_nodes), k)
160
                        if len(Gk) <= k:
161
                            continue
162
                        k_components[k].append(set(Gk))
163
    return k_components
164

    
165

    
166
def _cliques_heuristic(G, H, k, min_density):
167
    h_cnumber = nx.core_number(H)
168
    for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)):
169
        cands = set(n for n, c in h_cnumber.items() if c == c_value)
170
        # Skip checking for overlap for the highest core value
171
        if i == 0:
172
            overlap = False
173
        else:
174
            overlap = set.intersection(*[
175
                set(x for x in H[n] if x not in cands)
176
                for n in cands])
177
        if overlap and len(overlap) < k:
178
            SH = H.subgraph(cands | overlap)
179
        else:
180
            SH = H.subgraph(cands)
181
        sh_cnumber = nx.core_number(SH)
182
        SG = nx.k_core(G.subgraph(SH), k)
183
        while not (_same(sh_cnumber) and nx.density(SH) >= min_density):
184
            #!! This subgraph must be writable => .copy()
185
            SH = H.subgraph(SG).copy()
186
            if len(SH) <= k:
187
                break
188
            sh_cnumber = nx.core_number(SH)
189
            sh_deg = dict(SH.degree())
190
            min_deg = min(sh_deg.values())
191
            SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg)
192
            SG = nx.k_core(G.subgraph(SH), k)
193
        else:
194
            yield SG
195

    
196

    
197
def _same(measure, tol=0):
198
    vals = set(measure.values())
199
    if (max(vals) - min(vals)) <= tol:
200
        return True
201
    return False
202

    
203

    
204
class _AntiGraph(nx.Graph):
205
    """
206
    Class for complement graphs.
207

208
    The main goal is to be able to work with big and dense graphs with
209
    a low memory foodprint.
210

211
    In this class you add the edges that *do not exist* in the dense graph,
212
    the report methods of the class return the neighbors, the edges and
213
    the degree as if it was the dense graph. Thus it's possible to use
214
    an instance of this class with some of NetworkX functions. In this
215
    case we only use k-core, connected_components, and biconnected_components.
216
    """
217

    
218
    all_edge_dict = {'weight': 1}
219

    
220
    def single_edge_dict(self):
221
        return self.all_edge_dict
222
    edge_attr_dict_factory = single_edge_dict
223

    
224
    def __getitem__(self, n):
225
        """Returns a dict of neighbors of node n in the dense graph.
226

227
        Parameters
228
        ----------
229
        n : node
230
           A node in the graph.
231

232
        Returns
233
        -------
234
        adj_dict : dictionary
235
           The adjacency dictionary for nodes connected to n.
236

237
        """
238
        all_edge_dict = self.all_edge_dict
239
        return {node: all_edge_dict for node in
240
                set(self._adj) - set(self._adj[n]) - set([n])}
241

    
242
    def neighbors(self, n):
243
        """Returns an iterator over all neighbors of node n in the
244
           dense graph.
245
        """
246
        try:
247
            return iter(set(self._adj) - set(self._adj[n]) - set([n]))
248
        except KeyError:
249
            raise NetworkXError("The node %s is not in the graph." % (n,))
250

    
251
    class AntiAtlasView(Mapping):
252
        """An adjacency inner dict for AntiGraph"""
253

    
254
        def __init__(self, graph, node):
255
            self._graph = graph
256
            self._atlas = graph._adj[node]
257
            self._node = node
258

    
259
        def __len__(self):
260
            return len(self._graph) - len(self._atlas) - 1
261

    
262
        def __iter__(self):
263
            return (n for n in self._graph if n not in self._atlas and n != self._node)
264

    
265
        def __getitem__(self, nbr):
266
            nbrs = set(self._graph._adj) - set(self._atlas) - set([self._node])
267
            if nbr in nbrs:
268
                return self._graph.all_edge_dict
269
            raise KeyError(nbr)
270

    
271
    class AntiAdjacencyView(AntiAtlasView):
272
        """An adjacency outer dict for AntiGraph"""
273

    
274
        def __init__(self, graph):
275
            self._graph = graph
276
            self._atlas = graph._adj
277

    
278
        def __len__(self):
279
            return len(self._atlas)
280

    
281
        def __iter__(self):
282
            return iter(self._graph)
283

    
284
        def __getitem__(self, node):
285
            if node not in self._graph:
286
                raise KeyError(node)
287
            return self._graph.AntiAtlasView(self._graph, node)
288

    
289
    @property
290
    def adj(self):
291
        return self.AntiAdjacencyView(self)
292

    
293
    def subgraph(self, nodes):
294
        """This subgraph method returns a full AntiGraph. Not a View"""
295
        nodes = set(nodes)
296
        G = _AntiGraph()
297
        G.add_nodes_from(nodes)
298
        for n in G:
299
            Gnbrs = G.adjlist_inner_dict_factory()
300
            G._adj[n] = Gnbrs
301
            for nbr, d in self._adj[n].items():
302
                if nbr in G._adj:
303
                    Gnbrs[nbr] = d
304
                    G._adj[nbr][n] = d
305
        G.graph = self.graph
306
        return G
307

    
308
    class AntiDegreeView(nx.reportviews.DegreeView):
309
        def __iter__(self):
310
            all_nodes = set(self._succ)
311
            for n in self._nodes:
312
                nbrs = all_nodes - set(self._succ[n]) - set([n])
313
                yield (n, len(nbrs))
314

    
315
        def __getitem__(self, n):
316
            nbrs = set(self._succ) - set(self._succ[n]) - set([n])
317
            # AntiGraph is a ThinGraph so all edges have weight 1
318
            return len(nbrs) + (n in nbrs)
319

    
320
    @property
321
    def degree(self):
322
        """Returns an iterator for (node, degree) and degree for single node.
323

324
        The node degree is the number of edges adjacent to the node.
325

326
        Parameters
327
        ----------
328
        nbunch : iterable container, optional (default=all nodes)
329
            A container of nodes.  The container will be iterated
330
            through once.
331

332
        weight : string or None, optional (default=None)
333
           The edge attribute that holds the numerical value used
334
           as a weight.  If None, then each edge has weight 1.
335
           The degree is the sum of the edge weights adjacent to the node.
336

337
        Returns
338
        -------
339
        deg:
340
            Degree of the node, if a single node is passed as argument.
341
        nd_iter : an iterator
342
            The iterator returns two-tuples of (node, degree).
343

344
        See Also
345
        --------
346
        degree
347

348
        Examples
349
        --------
350
        >>> G = nx.path_graph(4)
351
        >>> G.degree(0) # node 0 with degree 1
352
        1
353
        >>> list(G.degree([0,1]))
354
        [(0, 1), (1, 2)]
355

356
        """
357
        return self.AntiDegreeView(self)
358

    
359
    def adjacency(self):
360
        """Returns an iterator of (node, adjacency set) tuples for all nodes
361
           in the dense graph.
362

363
        This is the fastest way to look at every edge.
364
        For directed graphs, only outgoing adjacencies are included.
365

366
        Returns
367
        -------
368
        adj_iter : iterator
369
           An iterator of (node, adjacency set) for all nodes in
370
           the graph.
371

372
        """
373
        for n in self._adj:
374
            yield (n, set(self._adj) - set(self._adj[n]) - set([n]))