Statistics
| Branch: | Revision:

iof-tools / networkxMiCe / networkx-master / networkx / algorithms / link_prediction.py @ 5cef0f13

History | View | Annotate | Download (16.4 KB)

1
"""
2
Link prediction algorithms.
3
"""
4

    
5
from __future__ import division
6

    
7
from math import log
8

    
9
import networkx as nx
10
from networkx.utils import not_implemented_for
11

    
12
__all__ = ['resource_allocation_index',
13
           'jaccard_coefficient',
14
           'adamic_adar_index',
15
           'preferential_attachment',
16
           'cn_soundarajan_hopcroft',
17
           'ra_index_soundarajan_hopcroft',
18
           'within_inter_cluster']
19

    
20

    
21
def _apply_prediction(G, func, ebunch=None):
22
    """Applies the given function to each edge in the specified iterable
23
    of edges.
24

25
    `G` is an instance of :class:`networkx.Graph`.
26

27
    `func` is a function on two inputs, each of which is a node in the
28
    graph. The function can return anything, but it should return a
29
    value representing a prediction of the likelihood of a "link"
30
    joining the two nodes.
31

32
    `ebunch` is an iterable of pairs of nodes. If not specified, all
33
    non-edges in the graph `G` will be used.
34

35
    """
36
    if ebunch is None:
37
        ebunch = nx.non_edges(G)
38
    return ((u, v, func(u, v)) for u, v in ebunch)
39

    
40

    
41
@not_implemented_for('directed')
42
@not_implemented_for('multigraph')
43
def resource_allocation_index(G, ebunch=None):
44
    r"""Compute the resource allocation index of all node pairs in ebunch.
45

46
    Resource allocation index of `u` and `v` is defined as
47

48
    .. math::
49

50
        \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{|\Gamma(w)|}
51

52
    where $\Gamma(u)$ denotes the set of neighbors of $u$.
53

54
    Parameters
55
    ----------
56
    G : graph
57
        A NetworkX undirected graph.
58

59
    ebunch : iterable of node pairs, optional (default = None)
60
        Resource allocation index will be computed for each pair of
61
        nodes given in the iterable. The pairs must be given as
62
        2-tuples (u, v) where u and v are nodes in the graph. If ebunch
63
        is None then all non-existent edges in the graph will be used.
64
        Default value: None.
65

66
    Returns
67
    -------
68
    piter : iterator
69
        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
70
        pair of nodes and p is their resource allocation index.
71

72
    Examples
73
    --------
74
    >>> import networkx as nx
75
    >>> G = nx.complete_graph(5)
76
    >>> preds = nx.resource_allocation_index(G, [(0, 1), (2, 3)])
77
    >>> for u, v, p in preds:
78
    ...     '(%d, %d) -> %.8f' % (u, v, p)
79
    ...
80
    '(0, 1) -> 0.75000000'
81
    '(2, 3) -> 0.75000000'
82

83
    References
84
    ----------
85
    .. [1] T. Zhou, L. Lu, Y.-C. Zhang.
86
       Predicting missing links via local information.
87
       Eur. Phys. J. B 71 (2009) 623.
88
       https://arxiv.org/pdf/0901.0553.pdf
89
    """
90
    def predict(u, v):
91
        return sum(1 / G.degree(w) for w in nx.common_neighbors(G, u, v))
92
    return _apply_prediction(G, predict, ebunch)
93

    
94

    
95
@not_implemented_for('directed')
96
@not_implemented_for('multigraph')
97
def jaccard_coefficient(G, ebunch=None):
98
    r"""Compute the Jaccard coefficient of all node pairs in ebunch.
99

100
    Jaccard coefficient of nodes `u` and `v` is defined as
101

102
    .. math::
103

104
        \frac{|\Gamma(u) \cap \Gamma(v)|}{|\Gamma(u) \cup \Gamma(v)|}
105

106
    where $\Gamma(u)$ denotes the set of neighbors of $u$.
107

108
    Parameters
109
    ----------
110
    G : graph
111
        A NetworkX undirected graph.
112

113
    ebunch : iterable of node pairs, optional (default = None)
114
        Jaccard coefficient will be computed for each pair of nodes
115
        given in the iterable. The pairs must be given as 2-tuples
116
        (u, v) where u and v are nodes in the graph. If ebunch is None
117
        then all non-existent edges in the graph will be used.
118
        Default value: None.
119

120
    Returns
121
    -------
122
    piter : iterator
123
        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
124
        pair of nodes and p is their Jaccard coefficient.
125

126
    Examples
127
    --------
128
    >>> import networkx as nx
129
    >>> G = nx.complete_graph(5)
130
    >>> preds = nx.jaccard_coefficient(G, [(0, 1), (2, 3)])
131
    >>> for u, v, p in preds:
132
    ...     '(%d, %d) -> %.8f' % (u, v, p)
133
    ...
134
    '(0, 1) -> 0.60000000'
135
    '(2, 3) -> 0.60000000'
136

137
    References
138
    ----------
139
    .. [1] D. Liben-Nowell, J. Kleinberg.
140
           The Link Prediction Problem for Social Networks (2004).
141
           http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
142
    """
143
    def predict(u, v):
144
        union_size = len(set(G[u]) | set(G[v]))
145
        if union_size == 0:
146
            return 0
147
        return len(list(nx.common_neighbors(G, u, v))) / union_size
148
    return _apply_prediction(G, predict, ebunch)
149

    
150

    
151
@not_implemented_for('directed')
152
@not_implemented_for('multigraph')
153
def adamic_adar_index(G, ebunch=None):
154
    r"""Compute the Adamic-Adar index of all node pairs in ebunch.
155

156
    Adamic-Adar index of `u` and `v` is defined as
157

158
    .. math::
159

160
        \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{1}{\log |\Gamma(w)|}
161

162
    where $\Gamma(u)$ denotes the set of neighbors of $u$.
163
    This index leads to zero-division for nodes only connected via self-loops.
164
    It is intended to be used when no self-loops are present.
165

166
    Parameters
167
    ----------
168
    G : graph
169
        NetworkX undirected graph.
170

171
    ebunch : iterable of node pairs, optional (default = None)
172
        Adamic-Adar index will be computed for each pair of nodes given
173
        in the iterable. The pairs must be given as 2-tuples (u, v)
174
        where u and v are nodes in the graph. If ebunch is None then all
175
        non-existent edges in the graph will be used.
176
        Default value: None.
177

178
    Returns
179
    -------
180
    piter : iterator
181
        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
182
        pair of nodes and p is their Adamic-Adar index.
183

184
    Examples
185
    --------
186
    >>> import networkx as nx
187
    >>> G = nx.complete_graph(5)
188
    >>> preds = nx.adamic_adar_index(G, [(0, 1), (2, 3)])
189
    >>> for u, v, p in preds:
190
    ...     '(%d, %d) -> %.8f' % (u, v, p)
191
    ...
192
    '(0, 1) -> 2.16404256'
193
    '(2, 3) -> 2.16404256'
194

195
    References
196
    ----------
197
    .. [1] D. Liben-Nowell, J. Kleinberg.
198
           The Link Prediction Problem for Social Networks (2004).
199
           http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
200
    """
201
    def predict(u, v):
202
        return sum(1 / log(G.degree(w)) for w in nx.common_neighbors(G, u, v))
203
    return _apply_prediction(G, predict, ebunch)
204

    
205

    
206
@not_implemented_for('directed')
207
@not_implemented_for('multigraph')
208
def preferential_attachment(G, ebunch=None):
209
    r"""Compute the preferential attachment score of all node pairs in ebunch.
210

211
    Preferential attachment score of `u` and `v` is defined as
212

213
    .. math::
214

215
        |\Gamma(u)| |\Gamma(v)|
216

217
    where $\Gamma(u)$ denotes the set of neighbors of $u$.
218

219
    Parameters
220
    ----------
221
    G : graph
222
        NetworkX undirected graph.
223

224
    ebunch : iterable of node pairs, optional (default = None)
225
        Preferential attachment score will be computed for each pair of
226
        nodes given in the iterable. The pairs must be given as
227
        2-tuples (u, v) where u and v are nodes in the graph. If ebunch
228
        is None then all non-existent edges in the graph will be used.
229
        Default value: None.
230

231
    Returns
232
    -------
233
    piter : iterator
234
        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
235
        pair of nodes and p is their preferential attachment score.
236

237
    Examples
238
    --------
239
    >>> import networkx as nx
240
    >>> G = nx.complete_graph(5)
241
    >>> preds = nx.preferential_attachment(G, [(0, 1), (2, 3)])
242
    >>> for u, v, p in preds:
243
    ...     '(%d, %d) -> %d' % (u, v, p)
244
    ...
245
    '(0, 1) -> 16'
246
    '(2, 3) -> 16'
247

248
    References
249
    ----------
250
    .. [1] D. Liben-Nowell, J. Kleinberg.
251
           The Link Prediction Problem for Social Networks (2004).
252
           http://www.cs.cornell.edu/home/kleinber/link-pred.pdf
253
    """
254
    def predict(u, v):
255
        return G.degree(u) * G.degree(v)
256
    return _apply_prediction(G, predict, ebunch)
257

    
258

    
259
@not_implemented_for('directed')
260
@not_implemented_for('multigraph')
261
def cn_soundarajan_hopcroft(G, ebunch=None, community='community'):
262
    r"""Count the number of common neighbors of all node pairs in ebunch
263
        using community information.
264

265
    For two nodes $u$ and $v$, this function computes the number of
266
    common neighbors and bonus one for each common neighbor belonging to
267
    the same community as $u$ and $v$. Mathematically,
268

269
    .. math::
270

271
        |\Gamma(u) \cap \Gamma(v)| + \sum_{w \in \Gamma(u) \cap \Gamma(v)} f(w)
272

273
    where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
274
    and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
275
    neighbors of $u$.
276

277
    Parameters
278
    ----------
279
    G : graph
280
        A NetworkX undirected graph.
281

282
    ebunch : iterable of node pairs, optional (default = None)
283
        The score will be computed for each pair of nodes given in the
284
        iterable. The pairs must be given as 2-tuples (u, v) where u
285
        and v are nodes in the graph. If ebunch is None then all
286
        non-existent edges in the graph will be used.
287
        Default value: None.
288

289
    community : string, optional (default = 'community')
290
        Nodes attribute name containing the community information.
291
        G[u][community] identifies which community u belongs to. Each
292
        node belongs to at most one community. Default value: 'community'.
293

294
    Returns
295
    -------
296
    piter : iterator
297
        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
298
        pair of nodes and p is their score.
299

300
    Examples
301
    --------
302
    >>> import networkx as nx
303
    >>> G = nx.path_graph(3)
304
    >>> G.nodes[0]['community'] = 0
305
    >>> G.nodes[1]['community'] = 0
306
    >>> G.nodes[2]['community'] = 0
307
    >>> preds = nx.cn_soundarajan_hopcroft(G, [(0, 2)])
308
    >>> for u, v, p in preds:
309
    ...     '(%d, %d) -> %d' % (u, v, p)
310
    '(0, 2) -> 2'
311

312
    References
313
    ----------
314
    .. [1] Sucheta Soundarajan and John Hopcroft.
315
       Using community information to improve the precision of link
316
       prediction methods.
317
       In Proceedings of the 21st international conference companion on
318
       World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
319
       http://doi.acm.org/10.1145/2187980.2188150
320
    """
321
    def predict(u, v):
322
        Cu = _community(G, u, community)
323
        Cv = _community(G, v, community)
324
        cnbors = list(nx.common_neighbors(G, u, v))
325
        neighbors = (sum(_community(G, w, community) == Cu for w in cnbors)
326
                     if Cu == Cv else 0)
327
        return len(cnbors) + neighbors
328
    return _apply_prediction(G, predict, ebunch)
329

    
330

    
331
@not_implemented_for('directed')
332
@not_implemented_for('multigraph')
333
def ra_index_soundarajan_hopcroft(G, ebunch=None, community='community'):
334
    r"""Compute the resource allocation index of all node pairs in
335
    ebunch using community information.
336

337
    For two nodes $u$ and $v$, this function computes the resource
338
    allocation index considering only common neighbors belonging to the
339
    same community as $u$ and $v$. Mathematically,
340

341
    .. math::
342

343
        \sum_{w \in \Gamma(u) \cap \Gamma(v)} \frac{f(w)}{|\Gamma(w)|}
344

345
    where $f(w)$ equals 1 if $w$ belongs to the same community as $u$
346
    and $v$ or 0 otherwise and $\Gamma(u)$ denotes the set of
347
    neighbors of $u$.
348

349
    Parameters
350
    ----------
351
    G : graph
352
        A NetworkX undirected graph.
353

354
    ebunch : iterable of node pairs, optional (default = None)
355
        The score will be computed for each pair of nodes given in the
356
        iterable. The pairs must be given as 2-tuples (u, v) where u
357
        and v are nodes in the graph. If ebunch is None then all
358
        non-existent edges in the graph will be used.
359
        Default value: None.
360

361
    community : string, optional (default = 'community')
362
        Nodes attribute name containing the community information.
363
        G[u][community] identifies which community u belongs to. Each
364
        node belongs to at most one community. Default value: 'community'.
365

366
    Returns
367
    -------
368
    piter : iterator
369
        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
370
        pair of nodes and p is their score.
371

372
    Examples
373
    --------
374
    >>> import networkx as nx
375
    >>> G = nx.Graph()
376
    >>> G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 3)])
377
    >>> G.nodes[0]['community'] = 0
378
    >>> G.nodes[1]['community'] = 0
379
    >>> G.nodes[2]['community'] = 1
380
    >>> G.nodes[3]['community'] = 0
381
    >>> preds = nx.ra_index_soundarajan_hopcroft(G, [(0, 3)])
382
    >>> for u, v, p in preds:
383
    ...     '(%d, %d) -> %.8f' % (u, v, p)
384
    '(0, 3) -> 0.50000000'
385

386
    References
387
    ----------
388
    .. [1] Sucheta Soundarajan and John Hopcroft.
389
       Using community information to improve the precision of link
390
       prediction methods.
391
       In Proceedings of the 21st international conference companion on
392
       World Wide Web (WWW '12 Companion). ACM, New York, NY, USA, 607-608.
393
       http://doi.acm.org/10.1145/2187980.2188150
394
    """
395
    def predict(u, v):
396
        Cu = _community(G, u, community)
397
        Cv = _community(G, v, community)
398
        if Cu != Cv:
399
            return 0
400
        cnbors = nx.common_neighbors(G, u, v)
401
        return sum(1 / G.degree(w) for w in cnbors
402
                   if _community(G, w, community) == Cu)
403
    return _apply_prediction(G, predict, ebunch)
404

    
405

    
406
@not_implemented_for('directed')
407
@not_implemented_for('multigraph')
408
def within_inter_cluster(G, ebunch=None, delta=0.001, community='community'):
409
    """Compute the ratio of within- and inter-cluster common neighbors
410
    of all node pairs in ebunch.
411

412
    For two nodes `u` and `v`, if a common neighbor `w` belongs to the
413
    same community as them, `w` is considered as within-cluster common
414
    neighbor of `u` and `v`. Otherwise, it is considered as
415
    inter-cluster common neighbor of `u` and `v`. The ratio between the
416
    size of the set of within- and inter-cluster common neighbors is
417
    defined as the WIC measure. [1]_
418

419
    Parameters
420
    ----------
421
    G : graph
422
        A NetworkX undirected graph.
423

424
    ebunch : iterable of node pairs, optional (default = None)
425
        The WIC measure will be computed for each pair of nodes given in
426
        the iterable. The pairs must be given as 2-tuples (u, v) where
427
        u and v are nodes in the graph. If ebunch is None then all
428
        non-existent edges in the graph will be used.
429
        Default value: None.
430

431
    delta : float, optional (default = 0.001)
432
        Value to prevent division by zero in case there is no
433
        inter-cluster common neighbor between two nodes. See [1]_ for
434
        details. Default value: 0.001.
435

436
    community : string, optional (default = 'community')
437
        Nodes attribute name containing the community information.
438
        G[u][community] identifies which community u belongs to. Each
439
        node belongs to at most one community. Default value: 'community'.
440

441
    Returns
442
    -------
443
    piter : iterator
444
        An iterator of 3-tuples in the form (u, v, p) where (u, v) is a
445
        pair of nodes and p is their WIC measure.
446

447
    Examples
448
    --------
449
    >>> import networkx as nx
450
    >>> G = nx.Graph()
451
    >>> G.add_edges_from([(0, 1), (0, 2), (0, 3), (1, 4), (2, 4), (3, 4)])
452
    >>> G.nodes[0]['community'] = 0
453
    >>> G.nodes[1]['community'] = 1
454
    >>> G.nodes[2]['community'] = 0
455
    >>> G.nodes[3]['community'] = 0
456
    >>> G.nodes[4]['community'] = 0
457
    >>> preds = nx.within_inter_cluster(G, [(0, 4)])
458
    >>> for u, v, p in preds:
459
    ...     '(%d, %d) -> %.8f' % (u, v, p)
460
    ...
461
    '(0, 4) -> 1.99800200'
462
    >>> preds = nx.within_inter_cluster(G, [(0, 4)], delta=0.5)
463
    >>> for u, v, p in preds:
464
    ...     '(%d, %d) -> %.8f' % (u, v, p)
465
    ...
466
    '(0, 4) -> 1.33333333'
467

468
    References
469
    ----------
470
    .. [1] Jorge Carlos Valverde-Rebaza and Alneu de Andrade Lopes.
471
       Link prediction in complex networks based on cluster information.
472
       In Proceedings of the 21st Brazilian conference on Advances in
473
       Artificial Intelligence (SBIA'12)
474
       https://doi.org/10.1007/978-3-642-34459-6_10
475
    """
476
    if delta <= 0:
477
        raise nx.NetworkXAlgorithmError('Delta must be greater than zero')
478

    
479
    def predict(u, v):
480
        Cu = _community(G, u, community)
481
        Cv = _community(G, v, community)
482
        if Cu != Cv:
483
            return 0
484
        cnbors = set(nx.common_neighbors(G, u, v))
485
        within = set(w for w in cnbors
486
                     if _community(G, w, community) == Cu)
487
        inter = cnbors - within
488
        return len(within) / (len(inter) + delta)
489

    
490
    return _apply_prediction(G, predict, ebunch)
491

    
492

    
493
def _community(G, u, community):
494
    """Get the community of the given node."""
495
    node_u = G.nodes[u]
496
    try:
497
        return node_u[community]
498
    except KeyError:
499
        raise nx.NetworkXAlgorithmError('No community information')