Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / linalg / attrmatrix.py @ 5cef0f13

History | View | Annotate | Download (15.5 KB)

1
"""
2
    Functions for constructing matrix-like objects from graph attributes.
3
"""
4

    
5
__all__ = ['attr_matrix', 'attr_sparse_matrix']
6

    
7
import networkx as nx
8

    
9

    
10
def _node_value(G, node_attr):
11
    """Returns a function that returns a value from G.nodes[u].
12

13
    We return a function expecting a node as its sole argument. Then, in the
14
    simplest scenario, the returned function will return G.nodes[u][node_attr].
15
    However, we also handle the case when `node_attr` is None or when it is a
16
    function itself.
17

18
    Parameters
19
    ----------
20
    G : graph
21
        A NetworkX graph
22

23
    node_attr : {None, str, callable}
24
        Specification of how the value of the node attribute should be obtained
25
        from the node attribute dictionary.
26

27
    Returns
28
    -------
29
    value : function
30
        A function expecting a node as its sole argument. The function will
31
        returns a value from G.nodes[u] that depends on `edge_attr`.
32

33
    """
34
    if node_attr is None:
35
        def value(u): return u
36
    elif not hasattr(node_attr, '__call__'):
37
        # assume it is a key for the node attribute dictionary
38
        def value(u): return G.nodes[u][node_attr]
39
    else:
40
        # Advanced:  Allow users to specify something else.
41
        #
42
        # For example,
43
        #     node_attr = lambda u: G.nodes[u].get('size', .5) * 3
44
        #
45
        value = node_attr
46

    
47
    return value
48

    
49

    
50
def _edge_value(G, edge_attr):
51
    """Returns a function that returns a value from G[u][v].
52

53
    Suppose there exists an edge between u and v.  Then we return a function
54
    expecting u and v as arguments.  For Graph and DiGraph, G[u][v] is
55
    the edge attribute dictionary, and the function (essentially) returns
56
    G[u][v][edge_attr].  However, we also handle cases when `edge_attr` is None
57
    and when it is a function itself. For MultiGraph and MultiDiGraph, G[u][v]
58
    is a dictionary of all edges between u and v.  In this case, the returned
59
    function sums the value of `edge_attr` for every edge between u and v.
60

61
    Parameters
62
    ----------
63
    G : graph
64
       A NetworkX graph
65

66
    edge_attr : {None, str, callable}
67
        Specification of how the value of the edge attribute should be obtained
68
        from the edge attribute dictionary, G[u][v].  For multigraphs, G[u][v]
69
        is a dictionary of all the edges between u and v.  This allows for
70
        special treatment of multiedges.
71

72
    Returns
73
    -------
74
    value : function
75
        A function expecting two nodes as parameters. The nodes should
76
        represent the from- and to- node of an edge. The function will
77
        return a value from G[u][v] that depends on `edge_attr`.
78

79
    """
80

    
81
    if edge_attr is None:
82
        # topological count of edges
83

    
84
        if G.is_multigraph():
85
            def value(u, v): return len(G[u][v])
86
        else:
87
            def value(u, v): return 1
88

    
89
    elif not hasattr(edge_attr, '__call__'):
90
        # assume it is a key for the edge attribute dictionary
91

    
92
        if edge_attr == 'weight':
93
            # provide a default value
94
            if G.is_multigraph():
95
                def value(u, v): return sum([d.get(edge_attr, 1) for d in G[u][v].values()])
96
            else:
97
                def value(u, v): return G[u][v].get(edge_attr, 1)
98
        else:
99
            # otherwise, the edge attribute MUST exist for each edge
100
            if G.is_multigraph():
101
                def value(u, v): return sum([d[edge_attr] for d in G[u][v].values()])
102
            else:
103
                def value(u, v): return G[u][v][edge_attr]
104

    
105
    else:
106
        # Advanced:  Allow users to specify something else.
107
        #
108
        # Alternative default value:
109
        #     edge_attr = lambda u,v: G[u][v].get('thickness', .5)
110
        #
111
        # Function on an attribute:
112
        #     edge_attr = lambda u,v: abs(G[u][v]['weight'])
113
        #
114
        # Handle Multi(Di)Graphs differently:
115
        #     edge_attr = lambda u,v: numpy.prod([d['size'] for d in G[u][v].values()])
116
        #
117
        # Ignore multiple edges
118
        #     edge_attr = lambda u,v: 1 if len(G[u][v]) else 0
119
        #
120
        value = edge_attr
121

    
122
    return value
123

    
124

    
125
def attr_matrix(G, edge_attr=None, node_attr=None, normalized=False,
126
                rc_order=None, dtype=None, order=None):
127
    """Returns a NumPy matrix using attributes from G.
128

129
    If only `G` is passed in, then the adjacency matrix is constructed.
130

131
    Let A be a discrete set of values for the node attribute `node_attr`. Then
132
    the elements of A represent the rows and columns of the constructed matrix.
133
    Now, iterate through every edge e=(u,v) in `G` and consider the value
134
    of the edge attribute `edge_attr`.  If ua and va are the values of the
135
    node attribute `node_attr` for u and v, respectively, then the value of
136
    the edge attribute is added to the matrix element at (ua, va).
137

138
    Parameters
139
    ----------
140
    G : graph
141
        The NetworkX graph used to construct the NumPy matrix.
142

143
    edge_attr : str, optional
144
        Each element of the matrix represents a running total of the
145
        specified edge attribute for edges whose node attributes correspond
146
        to the rows/cols of the matirx. The attribute must be present for
147
        all edges in the graph. If no attribute is specified, then we
148
        just count the number of edges whose node attributes correspond
149
        to the matrix element.
150

151
    node_attr : str, optional
152
        Each row and column in the matrix represents a particular value
153
        of the node attribute.  The attribute must be present for all nodes
154
        in the graph. Note, the values of this attribute should be reliably
155
        hashable. So, float values are not recommended. If no attribute is
156
        specified, then the rows and columns will be the nodes of the graph.
157

158
    normalized : bool, optional
159
        If True, then each row is normalized by the summation of its values.
160

161
    rc_order : list, optional
162
        A list of the node attribute values. This list specifies the ordering
163
        of rows and columns of the array. If no ordering is provided, then
164
        the ordering will be random (and also, a return value).
165

166
    Other Parameters
167
    ----------------
168
    dtype : NumPy data-type, optional
169
        A valid NumPy dtype used to initialize the array. Keep in mind certain
170
        dtypes can yield unexpected results if the array is to be normalized.
171
        The parameter is passed to numpy.zeros(). If unspecified, the NumPy
172
        default is used.
173

174
    order : {'C', 'F'}, optional
175
        Whether to store multidimensional data in C- or Fortran-contiguous
176
        (row- or column-wise) order in memory. This parameter is passed to
177
        numpy.zeros(). If unspecified, the NumPy default is used.
178

179
    Returns
180
    -------
181
    M : NumPy matrix
182
        The attribute matrix.
183

184
    ordering : list
185
        If `rc_order` was specified, then only the matrix is returned.
186
        However, if `rc_order` was None, then the ordering used to construct
187
        the matrix is returned as well.
188

189
    Examples
190
    --------
191
    Construct an adjacency matrix:
192

193
    >>> G = nx.Graph()
194
    >>> G.add_edge(0, 1, thickness=1, weight=3)
195
    >>> G.add_edge(0, 2, thickness=2)
196
    >>> G.add_edge(1, 2, thickness=3)
197
    >>> nx.attr_matrix(G, rc_order=[0, 1, 2])
198
    matrix([[0., 1., 1.],
199
            [1., 0., 1.],
200
            [1., 1., 0.]])
201

202
    Alternatively, we can obtain the matrix describing edge thickness.
203

204
    >>> nx.attr_matrix(G, edge_attr='thickness', rc_order=[0, 1, 2])
205
    matrix([[0., 1., 2.],
206
            [1., 0., 3.],
207
            [2., 3., 0.]])
208

209
    We can also color the nodes and ask for the probability distribution over
210
    all edges (u,v) describing:
211

212
        Pr(v has color Y | u has color X)
213

214
    >>> G.nodes[0]['color'] = 'red'
215
    >>> G.nodes[1]['color'] = 'red'
216
    >>> G.nodes[2]['color'] = 'blue'
217
    >>> rc = ['red', 'blue']
218
    >>> nx.attr_matrix(G, node_attr='color', normalized=True, rc_order=rc)
219
    matrix([[0.33333333, 0.66666667],
220
            [1.        , 0.        ]])
221

222
    For example, the above tells us that for all edges (u,v):
223

224
        Pr( v is red  | u is red)  = 1/3
225
        Pr( v is blue | u is red)  = 2/3
226

227
        Pr( v is red  | u is blue) = 1
228
        Pr( v is blue | u is blue) = 0
229

230
    Finally, we can obtain the total weights listed by the node colors.
231

232
    >>> nx.attr_matrix(G, edge_attr='weight', node_attr='color', rc_order=rc)
233
    matrix([[3., 2.],
234
            [2., 0.]])
235

236
    Thus, the total weight over all edges (u,v) with u and v having colors:
237

238
        (red, red)   is 3   # the sole contribution is from edge (0,1)
239
        (red, blue)  is 2   # contributions from edges (0,2) and (1,2)
240
        (blue, red)  is 2   # same as (red, blue) since graph is undirected
241
        (blue, blue) is 0   # there are no edges with blue endpoints
242

243
    """
244
    try:
245
        import numpy as np
246
    except ImportError:
247
        raise ImportError(
248
            "attr_matrix() requires numpy: http://scipy.org/ ")
249

    
250
    edge_value = _edge_value(G, edge_attr)
251
    node_value = _node_value(G, node_attr)
252

    
253
    if rc_order is None:
254
        ordering = list(set([node_value(n) for n in G]))
255
    else:
256
        ordering = rc_order
257

    
258
    N = len(ordering)
259
    undirected = not G.is_directed()
260
    index = dict(zip(ordering, range(N)))
261
    M = np.zeros((N, N), dtype=dtype, order=order)
262

    
263
    seen = set([])
264
    for u, nbrdict in G.adjacency():
265
        for v in nbrdict:
266
            # Obtain the node attribute values.
267
            i, j = index[node_value(u)], index[node_value(v)]
268
            if v not in seen:
269
                M[i, j] += edge_value(u, v)
270
                if undirected:
271
                    M[j, i] = M[i, j]
272

    
273
        if undirected:
274
            seen.add(u)
275

    
276
    if normalized:
277
        M /= M.sum(axis=1).reshape((N, 1))
278

    
279
    M = np.asmatrix(M)
280

    
281
    if rc_order is None:
282
        return M, ordering
283
    else:
284
        return M
285

    
286

    
287
def attr_sparse_matrix(G, edge_attr=None, node_attr=None,
288
                       normalized=False, rc_order=None, dtype=None):
289
    """Returns a SciPy sparse matrix using attributes from G.
290

291
    If only `G` is passed in, then the adjacency matrix is constructed.
292

293
    Let A be a discrete set of values for the node attribute `node_attr`. Then
294
    the elements of A represent the rows and columns of the constructed matrix.
295
    Now, iterate through every edge e=(u,v) in `G` and consider the value
296
    of the edge attribute `edge_attr`.  If ua and va are the values of the
297
    node attribute `node_attr` for u and v, respectively, then the value of
298
    the edge attribute is added to the matrix element at (ua, va).
299

300
    Parameters
301
    ----------
302
    G : graph
303
        The NetworkX graph used to construct the NumPy matrix.
304

305
    edge_attr : str, optional
306
        Each element of the matrix represents a running total of the
307
        specified edge attribute for edges whose node attributes correspond
308
        to the rows/cols of the matirx. The attribute must be present for
309
        all edges in the graph. If no attribute is specified, then we
310
        just count the number of edges whose node attributes correspond
311
        to the matrix element.
312

313
    node_attr : str, optional
314
        Each row and column in the matrix represents a particular value
315
        of the node attribute.  The attribute must be present for all nodes
316
        in the graph. Note, the values of this attribute should be reliably
317
        hashable. So, float values are not recommended. If no attribute is
318
        specified, then the rows and columns will be the nodes of the graph.
319

320
    normalized : bool, optional
321
        If True, then each row is normalized by the summation of its values.
322

323
    rc_order : list, optional
324
        A list of the node attribute values. This list specifies the ordering
325
        of rows and columns of the array. If no ordering is provided, then
326
        the ordering will be random (and also, a return value).
327

328
    Other Parameters
329
    ----------------
330
    dtype : NumPy data-type, optional
331
        A valid NumPy dtype used to initialize the array. Keep in mind certain
332
        dtypes can yield unexpected results if the array is to be normalized.
333
        The parameter is passed to numpy.zeros(). If unspecified, the NumPy
334
        default is used.
335

336
    Returns
337
    -------
338
    M : SciPy sparse matrix
339
        The attribute matrix.
340

341
    ordering : list
342
        If `rc_order` was specified, then only the matrix is returned.
343
        However, if `rc_order` was None, then the ordering used to construct
344
        the matrix is returned as well.
345

346
    Examples
347
    --------
348
    Construct an adjacency matrix:
349

350
    >>> G = nx.Graph()
351
    >>> G.add_edge(0,1,thickness=1,weight=3)
352
    >>> G.add_edge(0,2,thickness=2)
353
    >>> G.add_edge(1,2,thickness=3)
354
    >>> M = nx.attr_sparse_matrix(G, rc_order=[0,1,2])
355
    >>> M.todense()
356
    matrix([[0., 1., 1.],
357
            [1., 0., 1.],
358
            [1., 1., 0.]])
359

360
    Alternatively, we can obtain the matrix describing edge thickness.
361

362
    >>> M = nx.attr_sparse_matrix(G, edge_attr='thickness', rc_order=[0,1,2])
363
    >>> M.todense()
364
    matrix([[0., 1., 2.],
365
            [1., 0., 3.],
366
            [2., 3., 0.]])
367

368
    We can also color the nodes and ask for the probability distribution over
369
    all edges (u,v) describing:
370

371
        Pr(v has color Y | u has color X)
372

373
    >>> G.nodes[0]['color'] = 'red'
374
    >>> G.nodes[1]['color'] = 'red'
375
    >>> G.nodes[2]['color'] = 'blue'
376
    >>> rc = ['red', 'blue']
377
    >>> M = nx.attr_sparse_matrix(G, node_attr='color', \
378
                                  normalized=True, rc_order=rc)
379
    >>> M.todense()
380
    matrix([[0.33333333, 0.66666667],
381
            [1.        , 0.        ]])
382

383
    For example, the above tells us that for all edges (u,v):
384

385
        Pr( v is red  | u is red)  = 1/3
386
        Pr( v is blue | u is red)  = 2/3
387

388
        Pr( v is red  | u is blue) = 1
389
        Pr( v is blue | u is blue) = 0
390

391
    Finally, we can obtain the total weights listed by the node colors.
392

393
    >>> M = nx.attr_sparse_matrix(G, edge_attr='weight',\
394
                                  node_attr='color', rc_order=rc)
395
    >>> M.todense()
396
    matrix([[3., 2.],
397
            [2., 0.]])
398

399
    Thus, the total weight over all edges (u,v) with u and v having colors:
400

401
        (red, red)   is 3   # the sole contribution is from edge (0,1)
402
        (red, blue)  is 2   # contributions from edges (0,2) and (1,2)
403
        (blue, red)  is 2   # same as (red, blue) since graph is undirected
404
        (blue, blue) is 0   # there are no edges with blue endpoints
405

406
    """
407
    try:
408
        import numpy as np
409
        from scipy import sparse
410
    except ImportError:
411
        raise ImportError(
412
            "attr_sparse_matrix() requires scipy: http://scipy.org/ ")
413

    
414
    edge_value = _edge_value(G, edge_attr)
415
    node_value = _node_value(G, node_attr)
416

    
417
    if rc_order is None:
418
        ordering = list(set([node_value(n) for n in G]))
419
    else:
420
        ordering = rc_order
421

    
422
    N = len(ordering)
423
    undirected = not G.is_directed()
424
    index = dict(zip(ordering, range(N)))
425
    M = sparse.lil_matrix((N, N), dtype=dtype)
426

    
427
    seen = set([])
428
    for u, nbrdict in G.adjacency():
429
        for v in nbrdict:
430
            # Obtain the node attribute values.
431
            i, j = index[node_value(u)], index[node_value(v)]
432
            if v not in seen:
433
                M[i, j] += edge_value(u, v)
434
                if undirected:
435
                    M[j, i] = M[i, j]
436

    
437
        if undirected:
438
            seen.add(u)
439

    
440
    if normalized:
441
        norms = np.asarray(M.sum(axis=1)).ravel()
442
        for i, norm in enumerate(norms):
443
            M[i, :] /= norm
444

    
445
    if rc_order is None:
446
        return M, ordering
447
    else:
448
        return M
449

    
450

    
451
# fixture for nose tests
452
def setup_module(module):
453
    from nose import SkipTest
454
    try:
455
        import numpy
456
    except:
457
        raise SkipTest("NumPy not available")
458
    try:
459
        import scipy
460
    except:
461
        raise SkipTest("SciPy not available")