Statistics
| Branch: | Revision:

root / globecomm / metrics / percentage_overlap.py @ fac6e5a4

History | View | Annotate | Download (733 Bytes)

1
from __future__ import division
2
from pdb import set_trace as debugger
3

    
4
__all__ = ['percentage_overlap']
5

    
6
def percentage_overlap(matrices, top_k=1):
7
    num_of_rows = len(matrices)
8
    # debugger()
9
    if num_of_rows < 2:
10
        return 100
11

    
12
    x_sorted_indices = _sort_with_index(matrices[0])
13

    
14
    intersection_set = set(x_sorted_indices[:top_k])
15
    for r in range(1, num_of_rows):
16
        y_sorted_indices = _sort_with_index(matrices[r])
17

    
18
        # get top k
19
        y_top_k = set(y_sorted_indices[:top_k])
20

    
21
        # debugger()
22
        intersection_set = intersection_set.intersection(y_top_k)
23

    
24
    return len(intersection_set) * 100. / top_k
25

    
26
def _sort_with_index(arr):
27
    return sorted(range(len(arr)), key=lambda k: arr[k])
28