Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / bipartite / cluster.py @ 5cef0f13

History | View | Annotate | Download (7 KB)

1
#-*- coding: utf-8 -*-
2
#    Copyright (C) 2011 by
3
#    Jordi Torrents <jtorrents@milnou.net>
4
#    Aric Hagberg <hagberg@lanl.gov>
5
#    All rights reserved.
6
#    BSD license.
7
import itertools
8
import networkx as nx
9
__author__ = """\n""".join(['Jordi Torrents <jtorrents@milnou.net>',
10
                            'Aric Hagberg (hagberg@lanl.gov)'])
11
__all__ = ['clustering',
12
           'average_clustering',
13
           'latapy_clustering',
14
           'robins_alexander_clustering']
15

    
16
# functions for computing clustering of pairs
17

    
18

    
19
def cc_dot(nu, nv):
20
    return float(len(nu & nv)) / len(nu | nv)
21

    
22

    
23
def cc_max(nu, nv):
24
    return float(len(nu & nv)) / max(len(nu), len(nv))
25

    
26

    
27
def cc_min(nu, nv):
28
    return float(len(nu & nv)) / min(len(nu), len(nv))
29

    
30

    
31
modes = {'dot': cc_dot,
32
         'min': cc_min,
33
         'max': cc_max}
34

    
35

    
36
def latapy_clustering(G, nodes=None, mode='dot'):
37
    r"""Compute a bipartite clustering coefficient for nodes.
38

39
    The bipartie clustering coefficient is a measure of local density
40
    of connections defined as [1]_:
41

42
    .. math::
43

44
       c_u = \frac{\sum_{v \in N(N(u))} c_{uv} }{|N(N(u))|}
45

46
    where `N(N(u))` are the second order neighbors of `u` in `G` excluding `u`, 
47
    and `c_{uv}` is the pairwise clustering coefficient between nodes 
48
    `u` and `v`.
49

50
    The mode selects the function for `c_{uv}` which can be:
51

52
    `dot`: 
53

54
    .. math::
55

56
       c_{uv}=\frac{|N(u)\cap N(v)|}{|N(u) \cup N(v)|}
57

58
    `min`: 
59

60
    .. math::
61

62
       c_{uv}=\frac{|N(u)\cap N(v)|}{min(|N(u)|,|N(v)|)}
63

64
    `max`: 
65

66
    .. math::
67

68
       c_{uv}=\frac{|N(u)\cap N(v)|}{max(|N(u)|,|N(v)|)}
69

70

71
    Parameters
72
    ----------
73
    G : graph
74
        A bipartite graph
75

76
    nodes : list or iterable (optional)
77
        Compute bipartite clustering for these nodes. The default 
78
        is all nodes in G.
79

80
    mode : string
81
        The pariwise bipartite clustering method to be used in the computation.
82
        It must be "dot", "max", or "min". 
83

84
    Returns
85
    -------
86
    clustering : dictionary
87
        A dictionary keyed by node with the clustering coefficient value.
88

89

90
    Examples
91
    --------
92
    >>> from networkx.algorithms import bipartite
93
    >>> G = nx.path_graph(4) # path graphs are bipartite
94
    >>> c = bipartite.clustering(G) 
95
    >>> c[0]
96
    0.5
97
    >>> c = bipartite.clustering(G,mode='min') 
98
    >>> c[0]
99
    1.0
100

101
    See Also
102
    --------
103
    robins_alexander_clustering
104
    square_clustering
105
    average_clustering
106

107
    References
108
    ----------
109
    .. [1] Latapy, Matthieu, Clémence Magnien, and Nathalie Del Vecchio (2008).
110
       Basic notions for the analysis of large two-mode networks. 
111
       Social Networks 30(1), 31--48.
112
    """
113
    if not nx.algorithms.bipartite.is_bipartite(G):
114
        raise nx.NetworkXError("Graph is not bipartite")
115

    
116
    try:
117
        cc_func = modes[mode]
118
    except KeyError:
119
        raise nx.NetworkXError(
120
            "Mode for bipartite clustering must be: dot, min or max")
121

    
122
    if nodes is None:
123
        nodes = G
124
    ccs = {}
125
    for v in nodes:
126
        cc = 0.0
127
        nbrs2 = set([u for nbr in G[v] for u in G[nbr]]) - set([v])
128
        for u in nbrs2:
129
            cc += cc_func(set(G[u]), set(G[v]))
130
        if cc > 0.0:  # len(nbrs2)>0
131
            cc /= len(nbrs2)
132
        ccs[v] = cc
133
    return ccs
134

    
135

    
136
clustering = latapy_clustering
137

    
138

    
139
def average_clustering(G, nodes=None, mode='dot'):
140
    r"""Compute the average bipartite clustering coefficient.
141

142
    A clustering coefficient for the whole graph is the average, 
143

144
    .. math::
145

146
       C = \frac{1}{n}\sum_{v \in G} c_v,
147

148
    where `n` is the number of nodes in `G`.
149

150
    Similar measures for the two bipartite sets can be defined [1]_
151

152
    .. math::
153

154
       C_X = \frac{1}{|X|}\sum_{v \in X} c_v,
155

156
    where `X` is a bipartite set of `G`.
157

158
    Parameters
159
    ----------
160
    G : graph
161
        a bipartite graph
162

163
    nodes : list or iterable, optional
164
        A container of nodes to use in computing the average.  
165
        The nodes should be either the entire graph (the default) or one of the 
166
        bipartite sets.
167

168
    mode : string
169
        The pariwise bipartite clustering method. 
170
        It must be "dot", "max", or "min" 
171

172
    Returns
173
    -------
174
    clustering : float
175
       The average bipartite clustering for the given set of nodes or the 
176
       entire graph if no nodes are specified.
177

178
    Examples
179
    --------
180
    >>> from networkx.algorithms import bipartite
181
    >>> G=nx.star_graph(3) # star graphs are bipartite
182
    >>> bipartite.average_clustering(G) 
183
    0.75
184
    >>> X,Y=bipartite.sets(G)
185
    >>> bipartite.average_clustering(G,X) 
186
    0.0
187
    >>> bipartite.average_clustering(G,Y) 
188
    1.0
189

190
    See Also
191
    --------
192
    clustering
193

194
    Notes    
195
    -----
196
    The container of nodes passed to this function must contain all of the nodes
197
    in one of the bipartite sets ("top" or "bottom") in order to compute 
198
    the correct average bipartite clustering coefficients.
199
    See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
200
    for further details on how bipartite graphs are handled in NetworkX.
201

202

203
    References
204
    ----------
205
    .. [1] Latapy, Matthieu, Clémence Magnien, and Nathalie Del Vecchio (2008).
206
        Basic notions for the analysis of large two-mode networks. 
207
        Social Networks 30(1), 31--48.
208
    """
209
    if nodes is None:
210
        nodes = G
211
    ccs = latapy_clustering(G, nodes=nodes, mode=mode)
212
    return float(sum(ccs[v] for v in nodes)) / len(nodes)
213

    
214

    
215
def robins_alexander_clustering(G):
216
    r"""Compute the bipartite clustering of G.
217

218
    Robins and Alexander [1]_ defined bipartite clustering coefficient as
219
    four times the number of four cycles `C_4` divided by the number of
220
    three paths `L_3` in a bipartite graph:
221

222
    .. math::
223

224
       CC_4 = \frac{4 * C_4}{L_3}
225

226
    Parameters
227
    ----------
228
    G : graph
229
        a bipartite graph
230

231
    Returns
232
    -------
233
    clustering : float
234
       The Robins and Alexander bipartite clustering for the input graph.
235

236
    Examples
237
    --------
238
    >>> from networkx.algorithms import bipartite
239
    >>> G = nx.davis_southern_women_graph()
240
    >>> print(round(bipartite.robins_alexander_clustering(G), 3))
241
    0.468
242

243
    See Also
244
    --------
245
    latapy_clustering
246
    square_clustering
247

248
    References
249
    ----------
250
    .. [1] Robins, G. and M. Alexander (2004). Small worlds among interlocking 
251
           directors: Network structure and distance in bipartite graphs. 
252
           Computational & Mathematical Organization Theory 10(1), 69–94.
253

254
    """
255
    if G.order() < 4 or G.size() < 3:
256
        return 0
257
    L_3 = _threepaths(G)
258
    if L_3 == 0:
259
        return 0
260
    C_4 = _four_cycles(G)
261
    return (4. * C_4) / L_3
262

    
263

    
264
def _four_cycles(G):
265
    cycles = 0
266
    for v in G:
267
        for u, w in itertools.combinations(G[v], 2):
268
            cycles += len((set(G[u]) & set(G[w])) - set([v]))
269
    return cycles / 4
270

    
271

    
272
def _threepaths(G):
273
    paths = 0
274
    for v in G:
275
        for u in G[v]:
276
            for w in set(G[u]) - set([v]):
277
                paths += len(set(G[w]) - set([v, u]))
278
    # Divide by two because we count each three path twice
279
    # one for each possible starting point
280
    return paths / 2