Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / community / label_propagation.py @ 5cef0f13

History | View | Annotate | Download (6.9 KB)

1
# -*- coding: utf-8 -*-
2
#    Copyright (C) 2015-2019 Aitor Almeida
3
#    All rights reserved.
4
#    BSD license.
5
#
6
# Author:   Aitor Almeida <aitoralmeida@gmail.com>
7
"""
8
Label propagation community detection algorithms.
9
"""
10
from collections import Counter
11

    
12
import networkx as nx
13
from networkx.utils import groups
14
from networkx.utils import not_implemented_for
15
from networkx.utils import py_random_state
16

    
17
__all__ = ['label_propagation_communities', 'asyn_lpa_communities']
18

    
19

    
20
@py_random_state(2)
21
def asyn_lpa_communities(G, weight=None, seed=None):
22
    """Returns communities in `G` as detected by asynchronous label
23
    propagation.
24

25
    The asynchronous label propagation algorithm is described in
26
    [1]_. The algorithm is probabilistic and the found communities may
27
    vary on different executions.
28

29
    The algorithm proceeds as follows. After initializing each node with
30
    a unique label, the algorithm repeatedly sets the label of a node to
31
    be the label that appears most frequently among that nodes
32
    neighbors. The algorithm halts when each node has the label that
33
    appears most frequently among its neighbors. The algorithm is
34
    asynchronous because each node is updated without waiting for
35
    updates on the remaining nodes.
36

37
    This generalized version of the algorithm in [1]_ accepts edge
38
    weights.
39

40
    Parameters
41
    ----------
42
    G : Graph
43

44
    weight : string
45
        The edge attribute representing the weight of an edge.
46
        If None, each edge is assumed to have weight one. In this
47
        algorithm, the weight of an edge is used in determining the
48
        frequency with which a label appears among the neighbors of a
49
        node: a higher weight means the label appears more often.
50

51
    seed : integer, random_state, or None (default)
52
        Indicator of random number generation state.
53
        See :ref:`Randomness<randomness>`.
54

55
    Returns
56
    -------
57
    communities : iterable
58
        Iterable of communities given as sets of nodes.
59

60
    Notes
61
    ------
62
    Edge weight attributes must be numerical.
63

64
    References
65
    ----------
66
    .. [1] Raghavan, Usha Nandini, RĂ©ka Albert, and Soundar Kumara. "Near
67
           linear time algorithm to detect community structures in large-scale
68
           networks." Physical Review E 76.3 (2007): 036106.
69
    """
70

    
71
    labels = {n: i for i, n in enumerate(G)}
72
    cont = True
73
    while cont:
74
        cont = False
75
        nodes = list(G)
76
        seed.shuffle(nodes)
77
        # Calculate the label for each node
78
        for node in nodes:
79
            if len(G[node]) < 1:
80
                continue
81

    
82
            # Get label frequencies. Depending on the order they are processed
83
            # in some nodes with be in t and others in t-1, making the
84
            # algorithm asynchronous.
85
            label_freq = Counter()
86
            for v in G[node]:
87
                label_freq.update({labels[v]: G.edges[v, node][weight]
88
                                   if weight else 1})
89
            # Choose the label with the highest frecuency. If more than 1 label
90
            # has the highest frecuency choose one randomly.
91
            max_freq = max(label_freq.values())
92
            best_labels = [label for label, freq in label_freq.items()
93
                           if freq == max_freq]
94
            new_label = seed.choice(best_labels)
95
            labels[node] = new_label
96
            # Continue until all nodes have a label that is better than other
97
            # neighbour labels (only one label has max_freq for each node).
98
            cont = cont or len(best_labels) > 1
99

    
100
    # TODO In Python 3.3 or later, this should be `yield from ...`.
101
    return iter(groups(labels).values())
102

    
103

    
104
@not_implemented_for('directed')
105
def label_propagation_communities(G):
106
    """Generates community sets determined by label propagation
107

108
    Finds communities in `G` using a semi-synchronous label propagation
109
    method[1]_. This method combines the advantages of both the synchronous
110
    and asynchronous models. Not implemented for directed graphs.
111

112
    Parameters
113
    ----------
114
    G : graph
115
        An undirected NetworkX graph.
116

117
    Yields
118
    ------
119
    communities : generator
120
        Yields sets of the nodes in each community.
121

122
    Raises
123
    ------
124
    NetworkXNotImplemented
125
       If the graph is directed
126

127
    References
128
    ----------
129
    .. [1] Cordasco, G., & Gargano, L. (2010, December). Community detection
130
       via semi-synchronous label propagation algorithms. In Business
131
       Applications of Social Network Analysis (BASNA), 2010 IEEE International
132
       Workshop on (pp. 1-8). IEEE.
133
    """
134
    coloring = _color_network(G)
135
    # Create a unique label for each node in the graph
136
    labeling = {v: k for k, v in enumerate(G)}
137
    while not _labeling_complete(labeling, G):
138
        # Update the labels of every node with the same color.
139
        for color, nodes in coloring.items():
140
            for n in nodes:
141
                _update_label(n, labeling, G)
142

    
143
    for label in set(labeling.values()):
144
        yield set((x for x in labeling if labeling[x] == label))
145

    
146

    
147
def _color_network(G):
148
    """Colors the network so that neighboring nodes all have distinct colors.
149

150
       Returns a dict keyed by color to a set of nodes with that color.
151
    """
152
    coloring = dict()  # color => set(node)
153
    colors = nx.coloring.greedy_color(G)
154
    for node, color in colors.items():
155
        if color in coloring:
156
            coloring[color].add(node)
157
        else:
158
            coloring[color] = set([node])
159
    return coloring
160

    
161

    
162
def _labeling_complete(labeling, G):
163
    """Determines whether or not LPA is done.
164

165
       Label propagation is complete when all nodes have a label that is
166
       in the set of highest frequency labels amongst its neighbors.
167

168
       Nodes with no neighbors are considered complete.
169
    """
170
    return all(labeling[v] in _most_frequent_labels(v, labeling, G)
171
               for v in G if len(G[v]) > 0)
172

    
173

    
174
def _most_frequent_labels(node, labeling, G):
175
    """Returns a set of all labels with maximum frequency in `labeling`.
176

177
       Input `labeling` should be a dict keyed by node to labels.
178
    """
179
    if not G[node]:
180
        # Nodes with no neighbors are themselves a community and are labeled
181
        # accordingly, hence the immediate if statement.
182
        return {labeling[node]}
183

    
184
    # Compute the frequencies of all neighbours of node
185
    freqs = Counter(labeling[q] for q in G[node])
186
    max_freq = max(freqs.values())
187
    return {label for label, freq in freqs.items() if freq == max_freq}
188

    
189

    
190
def _update_label(node, labeling, G):
191
    """Updates the label of a node using the Prec-Max tie breaking algorithm
192

193
       The algorithm is explained in: 'Community Detection via Semi-Synchronous
194
       Label Propagation Algorithms' Cordasco and Gargano, 2011
195
    """
196
    high_labels = _most_frequent_labels(node, labeling, G)
197
    if len(high_labels) == 1:
198
        labeling[node] = high_labels.pop()
199
    elif len(high_labels) > 1:
200
        # Prec-Max
201
        if labeling[node] not in high_labels:
202
            labeling[node] = max(high_labels)