root / globecomm / metrics / percentage_overlap.py @ fac6e5a4
History  View  Annotate  Download (733 Bytes)
1 
from __future__ import division 

2 
from pdb import set_trace as debugger 
3  
4 
__all__ = ['percentage_overlap']

5  
6 
def percentage_overlap(matrices, top_k=1): 
7 
num_of_rows = len(matrices)

8 
# debugger()

9 
if num_of_rows < 2: 
10 
return 100 
11  
12 
x_sorted_indices = _sort_with_index(matrices[0])

13  
14 
intersection_set = set(x_sorted_indices[:top_k])

15 
for r in range(1, num_of_rows): 
16 
y_sorted_indices = _sort_with_index(matrices[r]) 
17  
18 
# get top k

19 
y_top_k = set(y_sorted_indices[:top_k])

20  
21 
# debugger()

22 
intersection_set = intersection_set.intersection(y_top_k) 
23  
24 
return len(intersection_set) * 100. / top_k 
25  
26 
def _sort_with_index(arr): 
27 
return sorted(range(len(arr)), key=lambda k: arr[k]) 
28 