Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / examples / graph / plot_words.py @ 5cef0f13

History | View | Annotate | Download (2.75 KB)

1
"""
2
=====
3
Words
4
=====
5

6
Words/Ladder Graph
7
------------------
8
Generate  an undirected graph over the 5757 5-letter words in the
9
datafile `words_dat.txt.gz`.  Two words are connected by an edge
10
if they differ in one letter, resulting in 14,135 edges. This example
11
is described in Section 1.1 in Knuth's book (see [1]_ and [2]_).
12

13
References
14
----------
15
.. [1] Donald E. Knuth,
16
   "The Stanford GraphBase: A Platform for Combinatorial Computing",
17
   ACM Press, New York, 1993.
18
.. [2] http://www-cs-faculty.stanford.edu/~knuth/sgb.html
19
"""
20
# Authors: Aric Hagberg (hagberg@lanl.gov),
21
#          Brendt Wohlberg,
22
#          hughdbrown@yahoo.com
23

    
24
#    Copyright (C) 2004-2019 by
25
#    Aric Hagberg <hagberg@lanl.gov>
26
#    Dan Schult <dschult@colgate.edu>
27
#    Pieter Swart <swart@lanl.gov>
28
#    All rights reserved.
29
#    BSD license.
30

    
31
import gzip
32
from string import ascii_lowercase as lowercase
33

    
34
import networkx as nx
35

    
36
#-------------------------------------------------------------------
37
#   The Words/Ladder graph of Section 1.1
38
#-------------------------------------------------------------------
39

    
40

    
41
def generate_graph(words):
42
    G = nx.Graph(name="words")
43
    lookup = dict((c, lowercase.index(c)) for c in lowercase)
44

    
45
    def edit_distance_one(word):
46
        for i in range(len(word)):
47
            left, c, right = word[0:i], word[i], word[i + 1:]
48
            j = lookup[c]  # lowercase.index(c)
49
            for cc in lowercase[j + 1:]:
50
                yield left + cc + right
51
    candgen = ((word, cand) for word in sorted(words)
52
               for cand in edit_distance_one(word) if cand in words)
53
    G.add_nodes_from(words)
54
    for word, cand in candgen:
55
        G.add_edge(word, cand)
56
    return G
57

    
58

    
59
def words_graph():
60
    """Return the words example graph from the Stanford GraphBase"""
61
    fh = gzip.open('words_dat.txt.gz', 'r')
62
    words = set()
63
    for line in fh.readlines():
64
        line = line.decode()
65
        if line.startswith('*'):
66
            continue
67
        w = str(line[0:5])
68
        words.add(w)
69
    return generate_graph(words)
70

    
71

    
72
if __name__ == '__main__':
73
    G = words_graph()
74
    print("Loaded words_dat.txt containing 5757 five-letter English words.")
75
    print("Two words are connected if they differ in one letter.")
76
    print("Graph has %d nodes with %d edges"
77
          % (nx.number_of_nodes(G), nx.number_of_edges(G)))
78
    print("%d connected components" % nx.number_connected_components(G))
79

    
80
    for (source, target) in [('chaos', 'order'),
81
                             ('nodes', 'graph'),
82
                             ('pound', 'marks')]:
83
        print("Shortest path between %s and %s is" % (source, target))
84
        try:
85
            sp = nx.shortest_path(G, source, target)
86
            for n in sp:
87
                print(n)
88
        except nx.NetworkXNoPath:
89
            print("None")