Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / convert_matrix.py @ 5cef0f13

History | View | Annotate | Download (43 KB)

1
#    Copyright (C) 2006-2019 by
2
#    Aric Hagberg <hagberg@lanl.gov>
3
#    Dan Schult <dschult@colgate.edu>
4
#    Pieter Swart <swart@lanl.gov>
5
#    All rights reserved.
6
#    BSD license.
7
"""Functions to convert NetworkX graphs to and from numpy/scipy matrices.
8

9
The preferred way of converting data to a NetworkX graph is through the
10
graph constructor.  The constructor calls the to_networkx_graph() function
11
which attempts to guess the input type and convert it automatically.
12

13
Examples
14
--------
15
Create a 10 node random graph from a numpy matrix
16

17
>>> import numpy as np
18
>>> a = np.random.randint(0, 2, size=(10, 10))
19
>>> D = nx.DiGraph(a)
20

21
or equivalently
22

23
>>> D = nx.to_networkx_graph(a, create_using=nx.DiGraph)
24

25
See Also
26
--------
27
nx_agraph, nx_pydot
28
"""
29

    
30
import itertools
31
import networkx as nx
32
from networkx.utils import not_implemented_for
33

    
34
__all__ = ['from_numpy_matrix', 'to_numpy_matrix',
35
           'from_pandas_adjacency', 'to_pandas_adjacency',
36
           'from_pandas_edgelist', 'to_pandas_edgelist',
37
           'to_numpy_recarray',
38
           'from_scipy_sparse_matrix', 'to_scipy_sparse_matrix',
39
           'from_numpy_array', 'to_numpy_array']
40

    
41

    
42
def to_pandas_adjacency(G, nodelist=None, dtype=None, order=None,
43
                        multigraph_weight=sum, weight='weight', nonedge=0.0):
44
    """Returns the graph adjacency matrix as a Pandas DataFrame.
45

46
    Parameters
47
    ----------
48
    G : graph
49
        The NetworkX graph used to construct the Pandas DataFrame.
50

51
    nodelist : list, optional
52
       The rows and columns are ordered according to the nodes in `nodelist`.
53
       If `nodelist` is None, then the ordering is produced by G.nodes().
54

55
    multigraph_weight : {sum, min, max}, optional
56
        An operator that determines how weights in multigraphs are handled.
57
        The default is to sum the weights of the multiple edges.
58

59
    weight : string or None, optional
60
        The edge attribute that holds the numerical value used for
61
        the edge weight.  If an edge does not have that attribute, then the
62
        value 1 is used instead.
63

64
    nonedge : float, optional
65
        The matrix values corresponding to nonedges are typically set to zero.
66
        However, this could be undesirable if there are matrix values
67
        corresponding to actual edges that also have the value zero. If so,
68
        one might prefer nonedges to have some other value, such as nan.
69

70
    Returns
71
    -------
72
    df : Pandas DataFrame
73
       Graph adjacency matrix
74

75
    Notes
76
    -----
77
    The DataFrame entries are assigned to the weight edge attribute. When
78
    an edge does not have a weight attribute, the value of the entry is set to
79
    the number 1.  For multiple (parallel) edges, the values of the entries
80
    are determined by the 'multigraph_weight' parameter.  The default is to
81
    sum the weight attributes for each of the parallel edges.
82

83
    When `nodelist` does not contain every node in `G`, the matrix is built
84
    from the subgraph of `G` that is induced by the nodes in `nodelist`.
85

86
    The convention used for self-loop edges in graphs is to assign the
87
    diagonal matrix entry value to the weight attribute of the edge
88
    (or the number 1 if the edge has no weight attribute).  If the
89
    alternate convention of doubling the edge weight is desired the
90
    resulting Pandas DataFrame can be modified as follows:
91

92
    >>> import pandas as pd
93
    >>> pd.options.display.max_columns = 20
94
    >>> import numpy as np
95
    >>> G = nx.Graph([(1, 1)])
96
    >>> df = nx.to_pandas_adjacency(G, dtype=int)
97
    >>> df
98
       1
99
    1  1
100
    >>> df.values[np.diag_indices_from(df)] *= 2
101
    >>> df
102
       1
103
    1  2
104

105
    Examples
106
    --------
107
    >>> G = nx.MultiDiGraph()
108
    >>> G.add_edge(0, 1, weight=2)
109
    0
110
    >>> G.add_edge(1, 0)
111
    0
112
    >>> G.add_edge(2, 2, weight=3)
113
    0
114
    >>> G.add_edge(2, 2)
115
    1
116
    >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int)
117
       0  1  2
118
    0  0  2  0
119
    1  1  0  0
120
    2  0  0  4
121

122
    """
123
    import pandas as pd
124
    M = to_numpy_matrix(G, nodelist=nodelist, dtype=dtype, order=order,
125
                        multigraph_weight=multigraph_weight, weight=weight,
126
                        nonedge=nonedge)
127
    if nodelist is None:
128
        nodelist = list(G)
129
    return pd.DataFrame(data=M, index=nodelist, columns=nodelist)
130

    
131

    
132
def from_pandas_adjacency(df, create_using=None):
133
    r"""Returns a graph from Pandas DataFrame.
134

135
    The Pandas DataFrame is interpreted as an adjacency matrix for the graph.
136

137
    Parameters
138
    ----------
139
    df : Pandas DataFrame
140
      An adjacency matrix representation of a graph
141

142
    create_using : NetworkX graph constructor, optional (default=nx.Graph)
143
       Graph type to create. If graph instance, then cleared before populated.
144

145
    Notes
146
    -----
147
    If the numpy matrix has a single data type for each matrix entry it
148
    will be converted to an appropriate Python data type.
149

150
    If the numpy matrix has a user-specified compound data type the names
151
    of the data fields will be used as attribute keys in the resulting
152
    NetworkX graph.
153

154
    See Also
155
    --------
156
    to_pandas_adjacency
157

158
    Examples
159
    --------
160
    Simple integer weights on edges:
161

162
    >>> import pandas as pd
163
    >>> pd.options.display.max_columns = 20
164
    >>> df = pd.DataFrame([[1, 1], [2, 1]])
165
    >>> df
166
       0  1
167
    0  1  1
168
    1  2  1
169
    >>> G = nx.from_pandas_adjacency(df)
170
    >>> G.name = 'Graph from pandas adjacency matrix'
171
    >>> print(nx.info(G))
172
    Name: Graph from pandas adjacency matrix
173
    Type: Graph
174
    Number of nodes: 2
175
    Number of edges: 3
176
    Average degree:   3.0000
177

178
    """
179

    
180
    try:
181
        df = df[df.index]
182
    except:
183
        msg = "%s not in columns"
184
        missing = list(set(df.index).difference(set(df.columns)))
185
        raise nx.NetworkXError("Columns must match Indices.", msg % missing)
186

    
187
    A = df.values
188
    G = from_numpy_matrix(A, create_using=create_using)
189

    
190
    nx.relabel.relabel_nodes(G, dict(enumerate(df.columns)), copy=False)
191
    return G
192

    
193

    
194
def to_pandas_edgelist(G, source='source', target='target', nodelist=None,
195
                       dtype=None, order=None):
196
    """Returns the graph edge list as a Pandas DataFrame.
197

198
    Parameters
199
    ----------
200
    G : graph
201
        The NetworkX graph used to construct the Pandas DataFrame.
202

203
    source : str or int, optional
204
        A valid column name (string or integer) for the source nodes (for the
205
        directed case).
206

207
    target : str or int, optional
208
        A valid column name (string or integer) for the target nodes (for the
209
        directed case).
210

211
    nodelist : list, optional
212
       Use only nodes specified in nodelist
213

214
    Returns
215
    -------
216
    df : Pandas DataFrame
217
       Graph edge list
218

219
    Examples
220
    --------
221
    >>> G = nx.Graph([('A', 'B', {'cost': 1, 'weight': 7}),
222
    ...               ('C', 'E', {'cost': 9, 'weight': 10})])
223
    >>> df = nx.to_pandas_edgelist(G, nodelist=['A', 'C'])
224
    >>> df[['source', 'target', 'cost', 'weight']]
225
      source target  cost  weight
226
    0      A      B     1       7
227
    1      C      E     9      10
228

229
    """
230
    import pandas as pd
231
    if nodelist is None:
232
        edgelist = G.edges(data=True)
233
    else:
234
        edgelist = G.edges(nodelist, data=True)
235
    source_nodes = [s for s, t, d in edgelist]
236
    target_nodes = [t for s, t, d in edgelist]
237
    all_keys = set().union(*(d.keys() for s, t, d in edgelist))
238
    edge_attr = {k: [d.get(k, float("nan")) for s, t, d in edgelist]
239
                 for k in all_keys}
240
    edgelistdict = {source: source_nodes, target: target_nodes}
241
    edgelistdict.update(edge_attr)
242
    return pd.DataFrame(edgelistdict)
243

    
244

    
245
def from_pandas_edgelist(df, source='source', target='target', edge_attr=None,
246
                         create_using=None):
247
    """Returns a graph from Pandas DataFrame containing an edge list.
248

249
    The Pandas DataFrame should contain at least two columns of node names and
250
    zero or more columns of edge attributes. Each row will be processed as one
251
    edge instance.
252

253
    Note: This function iterates over DataFrame.values, which is not
254
    guaranteed to retain the data type across columns in the row. This is only
255
    a problem if your row is entirely numeric and a mix of ints and floats. In
256
    that case, all values will be returned as floats. See the
257
    DataFrame.iterrows documentation for an example.
258

259
    Parameters
260
    ----------
261
    df : Pandas DataFrame
262
        An edge list representation of a graph
263

264
    source : str or int
265
        A valid column name (string or integer) for the source nodes (for the
266
        directed case).
267

268
    target : str or int
269
        A valid column name (string or integer) for the target nodes (for the
270
        directed case).
271

272
    edge_attr : str or int, iterable, True
273
        A valid column name (str or integer) or list of column names that will
274
        be used to retrieve items from the row and add them to the graph as
275
        edge attributes. If `True`, all of the remaining columns will be added.
276

277
    create_using : NetworkX graph constructor, optional (default=nx.Graph)
278
       Graph type to create. If graph instance, then cleared before populated.
279

280
    See Also
281
    --------
282
    to_pandas_edgelist
283

284
    Examples
285
    --------
286
    Simple integer weights on edges:
287

288
    >>> import pandas as pd
289
    >>> pd.options.display.max_columns = 20
290
    >>> import numpy as np
291
    >>> rng = np.random.RandomState(seed=5)
292
    >>> ints = rng.randint(1, 11, size=(3,2))
293
    >>> a = ['A', 'B', 'C']
294
    >>> b = ['D', 'A', 'E']
295
    >>> df = pd.DataFrame(ints, columns=['weight', 'cost'])
296
    >>> df[0] = a
297
    >>> df['b'] = b
298
    >>> df[['weight', 'cost', 0, 'b']]
299
       weight  cost  0  b
300
    0       4     7  A  D
301
    1       7     1  B  A
302
    2      10     9  C  E
303
    >>> G = nx.from_pandas_edgelist(df, 0, 'b', ['weight', 'cost'])
304
    >>> G['E']['C']['weight']
305
    10
306
    >>> G['E']['C']['cost']
307
    9
308
    >>> edges = pd.DataFrame({'source': [0, 1, 2],
309
    ...                       'target': [2, 2, 3],
310
    ...                       'weight': [3, 4, 5],
311
    ...                       'color': ['red', 'blue', 'blue']})
312
    >>> G = nx.from_pandas_edgelist(edges, edge_attr=True)
313
    >>> G[0][2]['color']
314
    'red'
315

316
    """
317
    g = nx.empty_graph(0, create_using)
318

    
319
    if edge_attr is None:
320
        g.add_edges_from(zip(df[source], df[target]))
321
        return g
322

    
323
    # Additional columns requested
324
    if edge_attr is True:
325
        cols = [c for c in df.columns if c is not source and c is not target]
326
    elif isinstance(edge_attr, (list, tuple)):
327
        cols = edge_attr
328
    else:
329
        cols = [edge_attr]
330

    
331
    try:
332
        eattrs = zip(*[df[col] for col in cols])
333
    except (KeyError, TypeError) as e:
334
        msg = "Invalid edge_attr argument: %s" % edge_attr
335
        raise nx.NetworkXError(msg)
336
    for s, t, attrs in zip(df[source], df[target], eattrs):
337

    
338
        g.add_edge(s, t)
339

    
340
        if g.is_multigraph():
341
            key = max(g[s][t])  # default keys just count so max is most recent
342
            g[s][t][key].update((attr, val) for attr, val in zip(cols, attrs))
343
        else:
344
            g[s][t].update((attr, val) for attr, val in zip(cols, attrs))
345

    
346
    return g
347

    
348

    
349
def to_numpy_matrix(G, nodelist=None, dtype=None, order=None,
350
                    multigraph_weight=sum, weight='weight', nonedge=0.0):
351
    """Returns the graph adjacency matrix as a NumPy matrix.
352

353
    Parameters
354
    ----------
355
    G : graph
356
        The NetworkX graph used to construct the NumPy matrix.
357

358
    nodelist : list, optional
359
        The rows and columns are ordered according to the nodes in `nodelist`.
360
        If `nodelist` is None, then the ordering is produced by G.nodes().
361

362
    dtype : NumPy data type, optional
363
        A valid single NumPy data type used to initialize the array.
364
        This must be a simple type such as int or numpy.float64 and
365
        not a compound data type (see to_numpy_recarray)
366
        If None, then the NumPy default is used.
367

368
    order : {'C', 'F'}, optional
369
        Whether to store multidimensional data in C- or Fortran-contiguous
370
        (row- or column-wise) order in memory. If None, then the NumPy default
371
        is used.
372

373
    multigraph_weight : {sum, min, max}, optional
374
        An operator that determines how weights in multigraphs are handled.
375
        The default is to sum the weights of the multiple edges.
376

377
    weight : string or None optional (default = 'weight')
378
        The edge attribute that holds the numerical value used for
379
        the edge weight. If an edge does not have that attribute, then the
380
        value 1 is used instead.
381

382
    nonedge : float (default = 0.0)
383
        The matrix values corresponding to nonedges are typically set to zero.
384
        However, this could be undesirable if there are matrix values
385
        corresponding to actual edges that also have the value zero. If so,
386
        one might prefer nonedges to have some other value, such as nan.
387

388
    Returns
389
    -------
390
    M : NumPy matrix
391
        Graph adjacency matrix
392

393
    See Also
394
    --------
395
    to_numpy_recarray, from_numpy_matrix
396

397
    Notes
398
    -----
399
    The matrix entries are assigned to the weight edge attribute. When
400
    an edge does not have a weight attribute, the value of the entry is set to
401
    the number 1.  For multiple (parallel) edges, the values of the entries
402
    are determined by the `multigraph_weight` parameter.  The default is to
403
    sum the weight attributes for each of the parallel edges.
404

405
    When `nodelist` does not contain every node in `G`, the matrix is built
406
    from the subgraph of `G` that is induced by the nodes in `nodelist`.
407

408
    The convention used for self-loop edges in graphs is to assign the
409
    diagonal matrix entry value to the weight attribute of the edge
410
    (or the number 1 if the edge has no weight attribute).  If the
411
    alternate convention of doubling the edge weight is desired the
412
    resulting Numpy matrix can be modified as follows:
413

414
    >>> import numpy as np
415
    >>> G = nx.Graph([(1, 1)])
416
    >>> A = nx.to_numpy_matrix(G)
417
    >>> A
418
    matrix([[1.]])
419
    >>> A.A[np.diag_indices_from(A)] *= 2
420
    >>> A
421
    matrix([[2.]])
422

423
    Examples
424
    --------
425
    >>> G = nx.MultiDiGraph()
426
    >>> G.add_edge(0, 1, weight=2)
427
    0
428
    >>> G.add_edge(1, 0)
429
    0
430
    >>> G.add_edge(2, 2, weight=3)
431
    0
432
    >>> G.add_edge(2, 2)
433
    1
434
    >>> nx.to_numpy_matrix(G, nodelist=[0, 1, 2])
435
    matrix([[0., 2., 0.],
436
            [1., 0., 0.],
437
            [0., 0., 4.]])
438

439
    """
440
    import numpy as np
441

    
442
    A = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order,
443
                       multigraph_weight=multigraph_weight, weight=weight,
444
                       nonedge=nonedge)
445
    M = np.asmatrix(A, dtype=dtype)
446
    return M
447

    
448

    
449
def from_numpy_matrix(A, parallel_edges=False, create_using=None):
450
    """Returns a graph from numpy matrix.
451

452
    The numpy matrix is interpreted as an adjacency matrix for the graph.
453

454
    Parameters
455
    ----------
456
    A : numpy matrix
457
        An adjacency matrix representation of a graph
458

459
    parallel_edges : Boolean
460
        If True, `create_using` is a multigraph, and `A` is an
461
        integer matrix, then entry *(i, j)* in the matrix is interpreted as the
462
        number of parallel edges joining vertices *i* and *j* in the graph.
463
        If False, then the entries in the adjacency matrix are interpreted as
464
        the weight of a single edge joining the vertices.
465

466
    create_using : NetworkX graph constructor, optional (default=nx.Graph)
467
       Graph type to create. If graph instance, then cleared before populated.
468

469
    Notes
470
    -----
471
    If `create_using` is :class:`networkx.MultiGraph` or
472
    :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
473
    entries of `A` are of type :class:`int`, then this function returns a
474
    multigraph (constructed from `create_using`) with parallel edges.
475

476
    If `create_using` indicates an undirected multigraph, then only the edges
477
    indicated by the upper triangle of the matrix `A` will be added to the
478
    graph.
479

480
    If the numpy matrix has a single data type for each matrix entry it
481
    will be converted to an appropriate Python data type.
482

483
    If the numpy matrix has a user-specified compound data type the names
484
    of the data fields will be used as attribute keys in the resulting
485
    NetworkX graph.
486

487
    See Also
488
    --------
489
    to_numpy_matrix, to_numpy_recarray
490

491
    Examples
492
    --------
493
    Simple integer weights on edges:
494

495
    >>> import numpy as np
496
    >>> A = np.matrix([[1, 1], [2, 1]])
497
    >>> G = nx.from_numpy_matrix(A)
498

499
    If `create_using` indicates a multigraph and the matrix has only integer
500
    entries and `parallel_edges` is False, then the entries will be treated
501
    as weights for edges joining the nodes (without creating parallel edges):
502

503
    >>> A = np.matrix([[1, 1], [1, 2]])
504
    >>> G = nx.from_numpy_matrix(A, create_using=nx.MultiGraph)
505
    >>> G[1][1]
506
    AtlasView({0: {'weight': 2}})
507

508
    If `create_using` indicates a multigraph and the matrix has only integer
509
    entries and `parallel_edges` is True, then the entries will be treated
510
    as the number of parallel edges joining those two vertices:
511

512
    >>> A = np.matrix([[1, 1], [1, 2]])
513
    >>> temp = nx.MultiGraph()
514
    >>> G = nx.from_numpy_matrix(A, parallel_edges=True, create_using=temp)
515
    >>> G[1][1]
516
    AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
517

518
    User defined compound data type on edges:
519

520
    >>> dt = [('weight', float), ('cost', int)]
521
    >>> A = np.matrix([[(1.0, 2)]], dtype=dt)
522
    >>> G = nx.from_numpy_matrix(A)
523
    >>> list(G.edges())
524
    [(0, 0)]
525
    >>> G[0][0]['cost']
526
    2
527
    >>> G[0][0]['weight']
528
    1.0
529

530
    """
531
    # This should never fail if you have created a numpy matrix with numpy...
532
    import numpy as np
533
    kind_to_python_type = {'f': float,
534
                           'i': int,
535
                           'u': int,
536
                           'b': bool,
537
                           'c': complex,
538
                           'S': str,
539
                           'V': 'void'}
540
    try:  # Python 3.x
541
        blurb = chr(1245)  # just to trigger the exception
542
        kind_to_python_type['U'] = str
543
    except ValueError:  # Python 2.7
544
        kind_to_python_type['U'] = unicode
545
    G = nx.empty_graph(0, create_using)
546
    n, m = A.shape
547
    if n != m:
548
        raise nx.NetworkXError("Adjacency matrix is not square.",
549
                               "nx,ny=%s" % (A.shape,))
550
    dt = A.dtype
551
    try:
552
        python_type = kind_to_python_type[dt.kind]
553
    except:
554
        raise TypeError("Unknown numpy data type: %s" % dt)
555

    
556
    # Make sure we get even the isolated nodes of the graph.
557
    G.add_nodes_from(range(n))
558
    # Get a list of all the entries in the matrix with nonzero entries. These
559
    # coordinates will become the edges in the graph.
560
    edges = map(lambda e: (int(e[0]), int(e[1])),
561
                zip(*(np.asarray(A).nonzero())))
562
    # handle numpy constructed data type
563
    if python_type is 'void':
564
        # Sort the fields by their offset, then by dtype, then by name.
565
        fields = sorted((offset, dtype, name) for name, (dtype, offset) in
566
                        A.dtype.fields.items())
567
        triples = ((u, v, {name: kind_to_python_type[dtype.kind](val)
568
                           for (_, dtype, name), val in zip(fields, A[u, v])})
569
                   for u, v in edges)
570
    # If the entries in the adjacency matrix are integers, the graph is a
571
    # multigraph, and parallel_edges is True, then create parallel edges, each
572
    # with weight 1, for each entry in the adjacency matrix. Otherwise, create
573
    # one edge for each positive entry in the adjacency matrix and set the
574
    # weight of that edge to be the entry in the matrix.
575
    elif python_type is int and G.is_multigraph() and parallel_edges:
576
        chain = itertools.chain.from_iterable
577
        # The following line is equivalent to:
578
        #
579
        #     for (u, v) in edges:
580
        #         for d in range(A[u, v]):
581
        #             G.add_edge(u, v, weight=1)
582
        #
583
        triples = chain(((u, v, dict(weight=1)) for d in range(A[u, v]))
584
                        for (u, v) in edges)
585
    else:  # basic data type
586
        triples = ((u, v, dict(weight=python_type(A[u, v])))
587
                   for u, v in edges)
588
    # If we are creating an undirected multigraph, only add the edges from the
589
    # upper triangle of the matrix. Otherwise, add all the edges. This relies
590
    # on the fact that the vertices created in the
591
    # `_generated_weighted_edges()` function are actually the row/column
592
    # indices for the matrix `A`.
593
    #
594
    # Without this check, we run into a problem where each edge is added twice
595
    # when `G.add_edges_from()` is invoked below.
596
    if G.is_multigraph() and not G.is_directed():
597
        triples = ((u, v, d) for u, v, d in triples if u <= v)
598
    G.add_edges_from(triples)
599
    return G
600

    
601

    
602
@not_implemented_for('multigraph')
603
def to_numpy_recarray(G, nodelist=None, dtype=None, order=None):
604
    """Returns the graph adjacency matrix as a NumPy recarray.
605

606
    Parameters
607
    ----------
608
    G : graph
609
        The NetworkX graph used to construct the NumPy matrix.
610

611
    nodelist : list, optional
612
       The rows and columns are ordered according to the nodes in `nodelist`.
613
       If `nodelist` is None, then the ordering is produced by G.nodes().
614

615
    dtype : NumPy data-type, optional
616
        A valid NumPy named dtype used to initialize the NumPy recarray.
617
        The data type names are assumed to be keys in the graph edge attribute
618
        dictionary.
619

620
    order : {'C', 'F'}, optional
621
        Whether to store multidimensional data in C- or Fortran-contiguous
622
        (row- or column-wise) order in memory. If None, then the NumPy default
623
        is used.
624

625
    Returns
626
    -------
627
    M : NumPy recarray
628
       The graph with specified edge data as a Numpy recarray
629

630
    Notes
631
    -----
632
    When `nodelist` does not contain every node in `G`, the matrix is built
633
    from the subgraph of `G` that is induced by the nodes in `nodelist`.
634

635
    Examples
636
    --------
637
    >>> G = nx.Graph()
638
    >>> G.add_edge(1, 2, weight=7.0, cost=5)
639
    >>> A = nx.to_numpy_recarray(G, dtype=[('weight', float), ('cost', int)])
640
    >>> print(A.weight)
641
    [[0. 7.]
642
     [7. 0.]]
643
    >>> print(A.cost)
644
    [[0 5]
645
     [5 0]]
646

647
    """
648
    if dtype is None:
649
        dtype = [('weight', float)]
650
    import numpy as np
651
    if nodelist is None:
652
        nodelist = list(G)
653
    nodeset = set(nodelist)
654
    if len(nodelist) != len(nodeset):
655
        msg = "Ambiguous ordering: `nodelist` contained duplicates."
656
        raise nx.NetworkXError(msg)
657
    nlen = len(nodelist)
658
    undirected = not G.is_directed()
659
    index = dict(zip(nodelist, range(nlen)))
660
    M = np.zeros((nlen, nlen), dtype=dtype, order=order)
661

    
662
    names = M.dtype.names
663
    for u, v, attrs in G.edges(data=True):
664
        if (u in nodeset) and (v in nodeset):
665
            i, j = index[u], index[v]
666
            values = tuple([attrs[n] for n in names])
667
            M[i, j] = values
668
            if undirected:
669
                M[j, i] = M[i, j]
670

    
671
    return M.view(np.recarray)
672

    
673

    
674
def to_scipy_sparse_matrix(G, nodelist=None, dtype=None,
675
                           weight='weight', format='csr'):
676
    """Returns the graph adjacency matrix as a SciPy sparse matrix.
677

678
    Parameters
679
    ----------
680
    G : graph
681
        The NetworkX graph used to construct the NumPy matrix.
682

683
    nodelist : list, optional
684
       The rows and columns are ordered according to the nodes in `nodelist`.
685
       If `nodelist` is None, then the ordering is produced by G.nodes().
686

687
    dtype : NumPy data-type, optional
688
        A valid NumPy dtype used to initialize the array. If None, then the
689
        NumPy default is used.
690

691
    weight : string or None   optional (default='weight')
692
        The edge attribute that holds the numerical value used for
693
        the edge weight.  If None then all edge weights are 1.
694

695
    format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
696
        The type of the matrix to be returned (default 'csr').  For
697
        some algorithms different implementations of sparse matrices
698
        can perform better.  See [1]_ for details.
699

700
    Returns
701
    -------
702
    M : SciPy sparse matrix
703
       Graph adjacency matrix.
704

705
    Notes
706
    -----
707
    The matrix entries are populated using the edge attribute held in
708
    parameter weight. When an edge does not have that attribute, the
709
    value of the entry is 1.
710

711
    For multiple edges the matrix values are the sums of the edge weights.
712

713
    When `nodelist` does not contain every node in `G`, the matrix is built
714
    from the subgraph of `G` that is induced by the nodes in `nodelist`.
715

716
    Uses coo_matrix format. To convert to other formats specify the
717
    format= keyword.
718

719
    The convention used for self-loop edges in graphs is to assign the
720
    diagonal matrix entry value to the weight attribute of the edge
721
    (or the number 1 if the edge has no weight attribute).  If the
722
    alternate convention of doubling the edge weight is desired the
723
    resulting Scipy sparse matrix can be modified as follows:
724

725
    >>> import scipy as sp
726
    >>> G = nx.Graph([(1, 1)])
727
    >>> A = nx.to_scipy_sparse_matrix(G)
728
    >>> print(A.todense())
729
    [[1]]
730
    >>> A.setdiag(A.diagonal() * 2)
731
    >>> print(A.todense())
732
    [[2]]
733

734
    Examples
735
    --------
736
    >>> G = nx.MultiDiGraph()
737
    >>> G.add_edge(0, 1, weight=2)
738
    0
739
    >>> G.add_edge(1, 0)
740
    0
741
    >>> G.add_edge(2, 2, weight=3)
742
    0
743
    >>> G.add_edge(2, 2)
744
    1
745
    >>> S = nx.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2])
746
    >>> print(S.todense())
747
    [[0 2 0]
748
     [1 0 0]
749
     [0 0 4]]
750

751
    References
752
    ----------
753
    .. [1] Scipy Dev. References, "Sparse Matrices",
754
       https://docs.scipy.org/doc/scipy/reference/sparse.html
755
    """
756
    from scipy import sparse
757
    if nodelist is None:
758
        nodelist = list(G)
759
    nlen = len(nodelist)
760
    if nlen == 0:
761
        raise nx.NetworkXError("Graph has no nodes or edges")
762

    
763
    if len(nodelist) != len(set(nodelist)):
764
        msg = "Ambiguous ordering: `nodelist` contained duplicates."
765
        raise nx.NetworkXError(msg)
766

    
767
    index = dict(zip(nodelist, range(nlen)))
768
    coefficients = zip(*((index[u], index[v], d.get(weight, 1))
769
                         for u, v, d in G.edges(nodelist, data=True)
770
                         if u in index and v in index))
771
    try:
772
        row, col, data = coefficients
773
    except ValueError:
774
        # there is no edge in the subgraph
775
        row, col, data = [], [], []
776

    
777
    if G.is_directed():
778
        M = sparse.coo_matrix((data, (row, col)),
779
                              shape=(nlen, nlen), dtype=dtype)
780
    else:
781
        # symmetrize matrix
782
        d = data + data
783
        r = row + col
784
        c = col + row
785
        # selfloop entries get double counted when symmetrizing
786
        # so we subtract the data on the diagonal
787
        selfloops = list(nx.selfloop_edges(G, data=True))
788
        if selfloops:
789
            diag_index, diag_data = zip(*((index[u], -d.get(weight, 1))
790
                                          for u, v, d in selfloops
791
                                          if u in index and v in index))
792
            d += diag_data
793
            r += diag_index
794
            c += diag_index
795
        M = sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
796
    try:
797
        return M.asformat(format)
798
    # From Scipy 1.1.0, asformat will throw a ValueError instead of an
799
    # AttributeError if the format if not recognized.
800
    except (AttributeError, ValueError):
801
        raise nx.NetworkXError("Unknown sparse matrix format: %s" % format)
802

    
803

    
804
def _csr_gen_triples(A):
805
    """Converts a SciPy sparse matrix in **Compressed Sparse Row** format to
806
    an iterable of weighted edge triples.
807

808
    """
809
    nrows = A.shape[0]
810
    data, indices, indptr = A.data, A.indices, A.indptr
811
    for i in range(nrows):
812
        for j in range(indptr[i], indptr[i + 1]):
813
            yield i, indices[j], data[j]
814

    
815

    
816
def _csc_gen_triples(A):
817
    """Converts a SciPy sparse matrix in **Compressed Sparse Column** format to
818
    an iterable of weighted edge triples.
819

820
    """
821
    ncols = A.shape[1]
822
    data, indices, indptr = A.data, A.indices, A.indptr
823
    for i in range(ncols):
824
        for j in range(indptr[i], indptr[i + 1]):
825
            yield indices[j], i, data[j]
826

    
827

    
828
def _coo_gen_triples(A):
829
    """Converts a SciPy sparse matrix in **Coordinate** format to an iterable
830
    of weighted edge triples.
831

832
    """
833
    row, col, data = A.row, A.col, A.data
834
    return zip(row, col, data)
835

    
836

    
837
def _dok_gen_triples(A):
838
    """Converts a SciPy sparse matrix in **Dictionary of Keys** format to an
839
    iterable of weighted edge triples.
840

841
    """
842
    for (r, c), v in A.items():
843
        yield r, c, v
844

    
845

    
846
def _generate_weighted_edges(A):
847
    """Returns an iterable over (u, v, w) triples, where u and v are adjacent
848
    vertices and w is the weight of the edge joining u and v.
849

850
    `A` is a SciPy sparse matrix (in any format).
851

852
    """
853
    if A.format == 'csr':
854
        return _csr_gen_triples(A)
855
    if A.format == 'csc':
856
        return _csc_gen_triples(A)
857
    if A.format == 'dok':
858
        return _dok_gen_triples(A)
859
    # If A is in any other format (including COO), convert it to COO format.
860
    return _coo_gen_triples(A.tocoo())
861

    
862

    
863
def from_scipy_sparse_matrix(A, parallel_edges=False, create_using=None,
864
                             edge_attribute='weight'):
865
    """Creates a new graph from an adjacency matrix given as a SciPy sparse
866
    matrix.
867

868
    Parameters
869
    ----------
870
    A: scipy sparse matrix
871
      An adjacency matrix representation of a graph
872

873
    parallel_edges : Boolean
874
      If this is True, `create_using` is a multigraph, and `A` is an
875
      integer matrix, then entry *(i, j)* in the matrix is interpreted as the
876
      number of parallel edges joining vertices *i* and *j* in the graph.
877
      If it is False, then the entries in the matrix are interpreted as
878
      the weight of a single edge joining the vertices.
879

880
    create_using : NetworkX graph constructor, optional (default=nx.Graph)
881
       Graph type to create. If graph instance, then cleared before populated.
882

883
    edge_attribute: string
884
       Name of edge attribute to store matrix numeric value. The data will
885
       have the same type as the matrix entry (int, float, (real,imag)).
886

887
    Notes
888
    -----
889

890
    If `create_using` is :class:`networkx.MultiGraph` or
891
    :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
892
    entries of `A` are of type :class:`int`, then this function returns a
893
    multigraph (constructed from `create_using`) with parallel edges.
894
    In this case, `edge_attribute` will be ignored.
895

896
    If `create_using` indicates an undirected multigraph, then only the edges
897
    indicated by the upper triangle of the matrix `A` will be added to the
898
    graph.
899

900
    Examples
901
    --------
902
    >>> import scipy as sp
903
    >>> A = sp.sparse.eye(2, 2, 1)
904
    >>> G = nx.from_scipy_sparse_matrix(A)
905

906
    If `create_using` indicates a multigraph and the matrix has only integer
907
    entries and `parallel_edges` is False, then the entries will be treated
908
    as weights for edges joining the nodes (without creating parallel edges):
909

910
    >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]])
911
    >>> G = nx.from_scipy_sparse_matrix(A, create_using=nx.MultiGraph)
912
    >>> G[1][1]
913
    AtlasView({0: {'weight': 2}})
914

915
    If `create_using` indicates a multigraph and the matrix has only integer
916
    entries and `parallel_edges` is True, then the entries will be treated
917
    as the number of parallel edges joining those two vertices:
918

919
    >>> A = sp.sparse.csr_matrix([[1, 1], [1, 2]])
920
    >>> G = nx.from_scipy_sparse_matrix(A, parallel_edges=True,
921
    ...                                 create_using=nx.MultiGraph)
922
    >>> G[1][1]
923
    AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
924

925
    """
926
    G = nx.empty_graph(0, create_using)
927
    n, m = A.shape
928
    if n != m:
929
        raise nx.NetworkXError(
930
            "Adjacency matrix is not square. nx,ny=%s" % (A.shape,))
931
    # Make sure we get even the isolated nodes of the graph.
932
    G.add_nodes_from(range(n))
933
    # Create an iterable over (u, v, w) triples and for each triple, add an
934
    # edge from u to v with weight w.
935
    triples = _generate_weighted_edges(A)
936
    # If the entries in the adjacency matrix are integers, the graph is a
937
    # multigraph, and parallel_edges is True, then create parallel edges, each
938
    # with weight 1, for each entry in the adjacency matrix. Otherwise, create
939
    # one edge for each positive entry in the adjacency matrix and set the
940
    # weight of that edge to be the entry in the matrix.
941
    if A.dtype.kind in ('i', 'u') and G.is_multigraph() and parallel_edges:
942
        chain = itertools.chain.from_iterable
943
        # The following line is equivalent to:
944
        #
945
        #     for (u, v) in edges:
946
        #         for d in range(A[u, v]):
947
        #             G.add_edge(u, v, weight=1)
948
        #
949
        triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples)
950
    # If we are creating an undirected multigraph, only add the edges from the
951
    # upper triangle of the matrix. Otherwise, add all the edges. This relies
952
    # on the fact that the vertices created in the
953
    # `_generated_weighted_edges()` function are actually the row/column
954
    # indices for the matrix `A`.
955
    #
956
    # Without this check, we run into a problem where each edge is added twice
957
    # when `G.add_weighted_edges_from()` is invoked below.
958
    if G.is_multigraph() and not G.is_directed():
959
        triples = ((u, v, d) for u, v, d in triples if u <= v)
960
    G.add_weighted_edges_from(triples, weight=edge_attribute)
961
    return G
962

    
963

    
964
def to_numpy_array(G, nodelist=None, dtype=None, order=None,
965
                   multigraph_weight=sum, weight='weight', nonedge=0.0):
966
    """Returns the graph adjacency matrix as a NumPy array.
967

968
    Parameters
969
    ----------
970
    G : graph
971
        The NetworkX graph used to construct the NumPy array.
972

973
    nodelist : list, optional
974
        The rows and columns are ordered according to the nodes in `nodelist`.
975
        If `nodelist` is None, then the ordering is produced by G.nodes().
976

977
    dtype : NumPy data type, optional
978
        A valid single NumPy data type used to initialize the array.
979
        This must be a simple type such as int or numpy.float64 and
980
        not a compound data type (see to_numpy_recarray)
981
        If None, then the NumPy default is used.
982

983
    order : {'C', 'F'}, optional
984
        Whether to store multidimensional data in C- or Fortran-contiguous
985
        (row- or column-wise) order in memory. If None, then the NumPy default
986
        is used.
987

988
    multigraph_weight : {sum, min, max}, optional
989
        An operator that determines how weights in multigraphs are handled.
990
        The default is to sum the weights of the multiple edges.
991

992
    weight : string or None optional (default = 'weight')
993
        The edge attribute that holds the numerical value used for
994
        the edge weight. If an edge does not have that attribute, then the
995
        value 1 is used instead.
996

997
    nonedge : float (default = 0.0)
998
        The array values corresponding to nonedges are typically set to zero.
999
        However, this could be undesirable if there are array values
1000
        corresponding to actual edges that also have the value zero. If so,
1001
        one might prefer nonedges to have some other value, such as nan.
1002

1003
    Returns
1004
    -------
1005
    A : NumPy ndarray
1006
        Graph adjacency matrix
1007

1008
    See Also
1009
    --------
1010
    from_numpy_array
1011

1012
    Notes
1013
    -----
1014
    Entries in the adjacency matrix are assigned to the weight edge attribute.
1015
    When an edge does not have a weight attribute, the value of the entry is
1016
    set to the number 1.  For multiple (parallel) edges, the values of the
1017
    entries are determined by the `multigraph_weight` parameter. The default is
1018
    to sum the weight attributes for each of the parallel edges.
1019

1020
    When `nodelist` does not contain every node in `G`, the adjacency matrix is
1021
    built from the subgraph of `G` that is induced by the nodes in `nodelist`.
1022

1023
    The convention used for self-loop edges in graphs is to assign the
1024
    diagonal array entry value to the weight attribute of the edge
1025
    (or the number 1 if the edge has no weight attribute). If the
1026
    alternate convention of doubling the edge weight is desired the
1027
    resulting NumPy array can be modified as follows:
1028

1029
    >>> import numpy as np
1030
    >>> G = nx.Graph([(1, 1)])
1031
    >>> A = nx.to_numpy_array(G)
1032
    >>> A
1033
    array([[1.]])
1034
    >>> A[np.diag_indices_from(A)] *= 2
1035
    >>> A
1036
    array([[2.]])
1037

1038
    Examples
1039
    --------
1040
    >>> G = nx.MultiDiGraph()
1041
    >>> G.add_edge(0, 1, weight=2)
1042
    0
1043
    >>> G.add_edge(1, 0)
1044
    0
1045
    >>> G.add_edge(2, 2, weight=3)
1046
    0
1047
    >>> G.add_edge(2, 2)
1048
    1
1049
    >>> nx.to_numpy_array(G, nodelist=[0, 1, 2])
1050
    array([[0., 2., 0.],
1051
           [1., 0., 0.],
1052
           [0., 0., 4.]])
1053

1054
    """
1055
    import numpy as np
1056

    
1057
    if nodelist is None:
1058
        nodelist = list(G)
1059
    nodeset = set(nodelist)
1060
    if len(nodelist) != len(nodeset):
1061
        msg = "Ambiguous ordering: `nodelist` contained duplicates."
1062
        raise nx.NetworkXError(msg)
1063

    
1064
    nlen = len(nodelist)
1065
    undirected = not G.is_directed()
1066
    index = dict(zip(nodelist, range(nlen)))
1067

    
1068
    # Initially, we start with an array of nans.  Then we populate the array
1069
    # using data from the graph.  Afterwards, any leftover nans will be
1070
    # converted to the value of `nonedge`.  Note, we use nans initially,
1071
    # instead of zero, for two reasons:
1072
    #
1073
    #   1) It can be important to distinguish a real edge with the value 0
1074
    #      from a nonedge with the value 0.
1075
    #
1076
    #   2) When working with multi(di)graphs, we must combine the values of all
1077
    #      edges between any two nodes in some manner.  This often takes the
1078
    #      form of a sum, min, or max.  Using the value 0 for a nonedge would
1079
    #      have undesirable effects with min and max, but using nanmin and
1080
    #      nanmax with initially nan values is not problematic at all.
1081
    #
1082
    # That said, there are still some drawbacks to this approach. Namely, if
1083
    # a real edge is nan, then that value is a) not distinguishable from
1084
    # nonedges and b) is ignored by the default combinator (nansum, nanmin,
1085
    # nanmax) functions used for multi(di)graphs. If this becomes an issue,
1086
    # an alternative approach is to use masked arrays.  Initially, every
1087
    # element is masked and set to some `initial` value. As we populate the
1088
    # graph, elements are unmasked (automatically) when we combine the initial
1089
    # value with the values given by real edges.  At the end, we convert all
1090
    # masked values to `nonedge`. Using masked arrays fully addresses reason 1,
1091
    # but for reason 2, we would still have the issue with min and max if the
1092
    # initial values were 0.0.  Note: an initial value of +inf is appropriate
1093
    # for min, while an initial value of -inf is appropriate for max. When
1094
    # working with sum, an initial value of zero is appropriate. Ideally then,
1095
    # we'd want to allow users to specify both a value for nonedges and also
1096
    # an initial value.  For multi(di)graphs, the choice of the initial value
1097
    # will, in general, depend on the combinator function---sensible defaults
1098
    # can be provided.
1099

    
1100
    if G.is_multigraph():
1101
        # Handle MultiGraphs and MultiDiGraphs
1102
        A = np.full((nlen, nlen), np.nan, order=order)
1103
        # use numpy nan-aware operations
1104
        operator = {sum: np.nansum, min: np.nanmin, max: np.nanmax}
1105
        try:
1106
            op = operator[multigraph_weight]
1107
        except:
1108
            raise ValueError('multigraph_weight must be sum, min, or max')
1109

    
1110
        for u, v, attrs in G.edges(data=True):
1111
            if (u in nodeset) and (v in nodeset):
1112
                i, j = index[u], index[v]
1113
                e_weight = attrs.get(weight, 1)
1114
                A[i, j] = op([e_weight, A[i, j]])
1115
                if undirected:
1116
                    A[j, i] = A[i, j]
1117
    else:
1118
        # Graph or DiGraph, this is much faster than above
1119
        A = np.full((nlen, nlen), np.nan, order=order)
1120
        for u, nbrdict in G.adjacency():
1121
            for v, d in nbrdict.items():
1122
                try:
1123
                    A[index[u], index[v]] = d.get(weight, 1)
1124
                except KeyError:
1125
                    # This occurs when there are fewer desired nodes than
1126
                    # there are nodes in the graph: len(nodelist) < len(G)
1127
                    pass
1128

    
1129
    A[np.isnan(A)] = nonedge
1130
    A = np.asarray(A, dtype=dtype)
1131
    return A
1132

    
1133

    
1134
def from_numpy_array(A, parallel_edges=False, create_using=None):
1135
    """Returns a graph from NumPy array.
1136

1137
    The NumPy array is interpreted as an adjacency matrix for the graph.
1138

1139
    Parameters
1140
    ----------
1141
    A : NumPy ndarray
1142
        An adjacency matrix representation of a graph
1143

1144
    parallel_edges : Boolean
1145
        If this is True, `create_using` is a multigraph, and `A` is an
1146
        integer array, then entry *(i, j)* in the array is interpreted as the
1147
        number of parallel edges joining vertices *i* and *j* in the graph.
1148
        If it is False, then the entries in the array are interpreted as
1149
        the weight of a single edge joining the vertices.
1150

1151
    create_using : NetworkX graph constructor, optional (default=nx.Graph)
1152
       Graph type to create. If graph instance, then cleared before populated.
1153

1154
    Notes
1155
    -----
1156
    If `create_using` is :class:`networkx.MultiGraph` or
1157
    :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
1158
    entries of `A` are of type :class:`int`, then this function returns a
1159
    multigraph (of the same type as `create_using`) with parallel edges.
1160

1161
    If `create_using` indicates an undirected multigraph, then only the edges
1162
    indicated by the upper triangle of the array `A` will be added to the
1163
    graph.
1164

1165
    If the NumPy array has a single data type for each array entry it
1166
    will be converted to an appropriate Python data type.
1167

1168
    If the NumPy array has a user-specified compound data type the names
1169
    of the data fields will be used as attribute keys in the resulting
1170
    NetworkX graph.
1171

1172
    See Also
1173
    --------
1174
    to_numpy_array
1175

1176
    Examples
1177
    --------
1178
    Simple integer weights on edges:
1179

1180
    >>> import numpy as np
1181
    >>> A = np.array([[1, 1], [2, 1]])
1182
    >>> G = nx.from_numpy_array(A)
1183
    >>> G.edges(data=True)
1184
    EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})])
1185

1186
    If `create_using` indicates a multigraph and the array has only integer
1187
    entries and `parallel_edges` is False, then the entries will be treated
1188
    as weights for edges joining the nodes (without creating parallel edges):
1189

1190
    >>> A = np.array([[1, 1], [1, 2]])
1191
    >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph)
1192
    >>> G[1][1]
1193
    AtlasView({0: {'weight': 2}})
1194

1195
    If `create_using` indicates a multigraph and the array has only integer
1196
    entries and `parallel_edges` is True, then the entries will be treated
1197
    as the number of parallel edges joining those two vertices:
1198

1199
    >>> A = np.array([[1, 1], [1, 2]])
1200
    >>> temp = nx.MultiGraph()
1201
    >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp)
1202
    >>> G[1][1]
1203
    AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
1204

1205
    User defined compound data type on edges:
1206

1207
    >>> dt = [('weight', float), ('cost', int)]
1208
    >>> A = np.array([[(1.0, 2)]], dtype=dt)
1209
    >>> G = nx.from_numpy_array(A)
1210
    >>> G.edges()
1211
    EdgeView([(0, 0)])
1212
    >>> G[0][0]['cost']
1213
    2
1214
    >>> G[0][0]['weight']
1215
    1.0
1216

1217
    """
1218
    return from_numpy_matrix(A, parallel_edges=parallel_edges,
1219
                             create_using=create_using)
1220

    
1221

    
1222
# fixture for nose tests
1223
def setup_module(module):
1224
    from nose import SkipTest
1225
    try:
1226
        import numpy
1227
    except:
1228
        raise SkipTest("NumPy not available")
1229
    try:
1230
        import scipy
1231
    except:
1232
        raise SkipTest("SciPy not available")
1233
    try:
1234
        import pandas
1235
    except:
1236
        raise SkipTest("Pandas not available")