Statistics
| Branch: | Revision:

## iof-tools / networkxMiCe / networkx-master / networkx / algorithms / community / modularity_max.py @ 5cef0f13

 1 ```# modularity_max.py - functions for finding communities based on modularity ``` ```# ``` ```# Copyright 2018 Edward L. Platt ``` ```# ``` ```# This file is part of NetworkX ``` ```# ``` ```# NetworkX is distributed under a BSD license; see LICENSE.txt for more ``` ```# information. ``` ```# ``` ```# Authors: ``` ```# Edward L. Platt ``` ```# ``` ```# TODO: ``` ```# - Alter equations for weighted case ``` ```# - Write tests for weighted case ``` ```"""Functions for detecting communities based on modularity. ``` ```""" ``` ```from __future__ import division ``` ```import networkx as nx ``` ```from networkx.algorithms.community.quality import modularity ``` ```from networkx.utils.mapped_queue import MappedQueue ``` ```__all__ = [ ``` ``` 'greedy_modularity_communities', ``` ``` '_naive_greedy_modularity_communities'] ``` ```def greedy_modularity_communities(G, weight=None): ``` ``` """Find communities in graph using Clauset-Newman-Moore greedy modularity ``` ``` maximization. This method currently supports the Graph class and does not ``` ``` consider edge weights. ``` ``` ``` ``` Greedy modularity maximization begins with each node in its own community ``` ``` and joins the pair of communities that most increases modularity until no ``` ``` such pair exists. ``` ``` ``` ``` Parameters ``` ``` ---------- ``` ``` G : NetworkX graph ``` ``` ``` ``` Returns ``` ``` ------- ``` ``` Yields sets of nodes, one for each community. ``` ``` ``` ``` Examples ``` ``` -------- ``` ``` >>> from networkx.algorithms.community import greedy_modularity_communities ``` ``` >>> G = nx.karate_club_graph() ``` ``` >>> c = list(greedy_modularity_communities(G)) ``` ``` >>> sorted(c) ``` ``` [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] ``` ``` ``` ``` References ``` ``` ---------- ``` ``` ..  M. E. J Newman 'Networks: An Introduction', page 224 ``` ``` Oxford University Press 2011. ``` ``` ..  Clauset, A., Newman, M. E., & Moore, C. ``` ``` "Finding community structure in very large networks." ``` ``` Physical Review E 70(6), 2004. ``` ``` """ ``` ``` # Count nodes and edges ``` ``` N = len(G.nodes()) ``` ``` m = sum([d.get('weight', 1) for u, v, d in G.edges(data=True)]) ``` ``` q0 = 1.0 / (2.0*m) ``` ``` # Map node labels to contiguous integers ``` ``` label_for_node = dict((i, v) for i, v in enumerate(G.nodes())) ``` ``` node_for_label = dict((label_for_node[i], i) for i in range(N)) ``` ``` # Calculate degrees ``` ``` k_for_label = G.degree(G.nodes(), weight=weight) ``` ``` k = [k_for_label[label_for_node[i]] for i in range(N)] ``` ``` # Initialize community and merge lists ``` ``` communities = dict((i, frozenset([i])) for i in range(N)) ``` ``` merges = [] ``` ``` # Initial modularity ``` ``` partition = [[label_for_node[x] for x in c] for c in communities.values()] ``` ``` q_cnm = modularity(G, partition) ``` ``` # Initialize data structures ``` ``` # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji) ``` ``` # a[i]: fraction of edges within community i ``` ``` # dq_dict[i][j]: dQ for merging community i, j ``` ``` # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ ``` ``` # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij) ``` ``` a = [k[i]*q0 for i in range(N)] ``` ``` dq_dict = dict( ``` ``` (i, dict( ``` ``` (j, 2*q0 - 2*k[i]*k[j]*q0*q0) ``` ``` for j in [ ``` ``` node_for_label[u] ``` ``` for u in G.neighbors(label_for_node[i])] ``` ``` if j != i)) ``` ``` for i in range(N)) ``` ``` dq_heap = [ ``` ``` MappedQueue([ ``` ``` (-dq, i, j) ``` ``` for j, dq in dq_dict[i].items()]) ``` ``` for i in range(N)] ``` ``` H = MappedQueue([ ``` ``` dq_heap[i].h ``` ``` for i in range(N) ``` ``` if len(dq_heap[i]) > 0]) ``` ``` # Merge communities until we can't improve modularity ``` ``` while len(H) > 1: ``` ``` # Find best merge ``` ``` # Remove from heap of row maxes ``` ``` # Ties will be broken by choosing the pair with lowest min community id ``` ``` try: ``` ``` dq, i, j = H.pop() ``` ``` except IndexError: ``` ``` break ``` ``` dq = -dq ``` ``` # Remove best merge from row i heap ``` ``` dq_heap[i].pop() ``` ``` # Push new row max onto H ``` ``` if len(dq_heap[i]) > 0: ``` ``` H.push(dq_heap[i].h) ``` ``` # If this element was also at the root of row j, we need to remove the ``` ``` # duplicate entry from H ``` ``` if dq_heap[j].h == (-dq, j, i): ``` ``` H.remove((-dq, j, i)) ``` ``` # Remove best merge from row j heap ``` ``` dq_heap[j].remove((-dq, j, i)) ``` ``` # Push new row max onto H ``` ``` if len(dq_heap[j]) > 0: ``` ``` H.push(dq_heap[j].h) ``` ``` else: ``` ``` # Duplicate wasn't in H, just remove from row j heap ``` ``` dq_heap[j].remove((-dq, j, i)) ``` ``` # Stop when change is non-positive ``` ``` if dq <= 0: ``` ``` break ``` ``` # Perform merge ``` ``` communities[j] = frozenset(communities[i] | communities[j]) ``` ``` del communities[i] ``` ``` merges.append((i, j, dq)) ``` ``` # New modularity ``` ``` q_cnm += dq ``` ``` # Get list of communities connected to merged communities ``` ``` i_set = set(dq_dict[i].keys()) ``` ``` j_set = set(dq_dict[j].keys()) ``` ``` all_set = (i_set | j_set) - set([i, j]) ``` ``` both_set = i_set & j_set ``` ``` # Merge i into j and update dQ ``` ``` for k in all_set: ``` ``` # Calculate new dq value ``` ``` if k in both_set: ``` ``` dq_jk = dq_dict[j][k] + dq_dict[i][k] ``` ``` elif k in j_set: ``` ``` dq_jk = dq_dict[j][k] - 2.0*a[i]*a[k] ``` ``` else: ``` ``` # k in i_set ``` ``` dq_jk = dq_dict[i][k] - 2.0*a[j]*a[k] ``` ``` # Update rows j and k ``` ``` for row, col in [(j, k), (k, j)]: ``` ``` # Save old value for finding heap index ``` ``` if k in j_set: ``` ``` d_old = (-dq_dict[row][col], row, col) ``` ``` else: ``` ``` d_old = None ``` ``` # Update dict for j,k only (i is removed below) ``` ``` dq_dict[row][col] = dq_jk ``` ``` # Save old max of per-row heap ``` ``` if len(dq_heap[row]) > 0: ``` ``` d_oldmax = dq_heap[row].h ``` ``` else: ``` ``` d_oldmax = None ``` ``` # Add/update heaps ``` ``` d = (-dq_jk, row, col) ``` ``` if d_old is None: ``` ``` # We're creating a new nonzero element, add to heap ``` ``` dq_heap[row].push(d) ``` ``` else: ``` ``` # Update existing element in per-row heap ``` ``` dq_heap[row].update(d_old, d) ``` ``` # Update heap of row maxes if necessary ``` ``` if d_oldmax is None: ``` ``` # No entries previously in this row, push new max ``` ``` H.push(d) ``` ``` else: ``` ``` # We've updated an entry in this row, has the max changed? ``` ``` if dq_heap[row].h != d_oldmax: ``` ``` H.update(d_oldmax, dq_heap[row].h) ``` ``` # Remove row/col i from matrix ``` ``` i_neighbors = dq_dict[i].keys() ``` ``` for k in i_neighbors: ``` ``` # Remove from dict ``` ``` dq_old = dq_dict[k][i] ``` ``` del dq_dict[k][i] ``` ``` # Remove from heaps if we haven't already ``` ``` if k != j: ``` ``` # Remove both row and column ``` ``` for row, col in [(k, i), (i, k)]: ``` ``` # Check if replaced dq is row max ``` ``` d_old = (-dq_old, row, col) ``` ``` if dq_heap[row].h == d_old: ``` ``` # Update per-row heap and heap of row maxes ``` ``` dq_heap[row].remove(d_old) ``` ``` H.remove(d_old) ``` ``` # Update row max ``` ``` if len(dq_heap[row]) > 0: ``` ``` H.push(dq_heap[row].h) ``` ``` else: ``` ``` # Only update per-row heap ``` ``` dq_heap[row].remove(d_old) ``` ``` del dq_dict[i] ``` ``` # Mark row i as deleted, but keep placeholder ``` ``` dq_heap[i] = MappedQueue() ``` ``` # Merge i into j and update a ``` ``` a[j] += a[i] ``` ``` a[i] = 0 ``` ``` communities = [ ``` ``` frozenset([label_for_node[i] for i in c]) ``` ``` for c in communities.values()] ``` ``` return sorted(communities, key=len, reverse=True) ``` ```def _naive_greedy_modularity_communities(G): ``` ``` """Find communities in graph using the greedy modularity maximization. ``` ``` This implementation is O(n^4), much slower than alternatives, but it is ``` ``` provided as an easy-to-understand reference implementation. ``` ``` """ ``` ``` # First create one community for each node ``` ``` communities = list([frozenset([u]) for u in G.nodes()]) ``` ``` # Track merges ``` ``` merges = [] ``` ``` # Greedily merge communities until no improvement is possible ``` ``` old_modularity = None ``` ``` new_modularity = modularity(G, communities) ``` ``` while old_modularity is None or new_modularity > old_modularity: ``` ``` # Save modularity for comparison ``` ``` old_modularity = new_modularity ``` ``` # Find best pair to merge ``` ``` trial_communities = list(communities) ``` ``` to_merge = None ``` ``` for i, u in enumerate(communities): ``` ``` for j, v in enumerate(communities): ``` ``` # Skip i=j and empty communities ``` ``` if j <= i or len(u) == 0 or len(v) == 0: ``` ``` continue ``` ``` # Merge communities u and v ``` ``` trial_communities[j] = u | v ``` ``` trial_communities[i] = frozenset([]) ``` ``` trial_modularity = modularity(G, trial_communities) ``` ``` if trial_modularity >= new_modularity: ``` ``` # Check if strictly better or tie ``` ``` if trial_modularity > new_modularity: ``` ``` # Found new best, save modularity and group indexes ``` ``` new_modularity = trial_modularity ``` ``` to_merge = (i, j, new_modularity - old_modularity) ``` ``` elif ( ``` ``` to_merge and ``` ``` min(i, j) < min(to_merge, to_merge) ``` ``` ): ``` ``` # Break ties by choosing pair with lowest min id ``` ``` new_modularity = trial_modularity ``` ``` to_merge = (i, j, new_modularity - old_modularity) ``` ``` # Un-merge ``` ``` trial_communities[i] = u ``` ``` trial_communities[j] = v ``` ``` if to_merge is not None: ``` ``` # If the best merge improves modularity, use it ``` ``` merges.append(to_merge) ``` ``` i, j, dq = to_merge ``` ``` u, v = communities[i], communities[j] ``` ``` communities[j] = u | v ``` ``` communities[i] = frozenset([]) ``` ``` # Remove empty communities and sort ``` ``` communities = [c for c in communities if len(c) > 0] ``` ``` for com in sorted(communities, key=lambda x: len(x), reverse=True): ``` ``` yield com ```