root / globecomm / analyse_snapshot.py @ bd3d6dca
History  View  Annotate  Download (2.72 KB)
1 
import os 

2 
import sys 
3 
from collections import defaultdict 
4 
import numpy as np 
5 
import scipy 
6 
from scipy import stats 
7 
import matplotlib.pyplot as plt 
8  
9 
from pdb import set_trace as debugger 
10  
11 
class Experiment(): 
12 
def __init__(self, num_of_nodes, num_of_snapshots): 
13 
self.scores = np.zeros((num_of_snapshots, num_of_nodes))

14 
self.index_of_snapshot = 0 
15 
self.num_of_nodes = num_of_nodes

16 
self.num_of_snapshots = num_of_snapshots

17  
18 
def add_new_result(self, in_filepath): 
19 
if self.index_of_snapshot == self.num_of_snapshots: 
20 
print "ERROR: the number of snapshots provided is less than the input files" 
21 
sys.exit() 
22  
23 
data = np.loadtxt(in_filepath, delimiter=',', dtype={

24 
'names': ('node_id', 'score'), 
25 
'formats': ('i4', 'f4') 
26 
}) 
27  
28 
for row in data: 
29 
self.update_score(row[0], row[1]) 
30  
31 
self.index_of_snapshot += 1 
32  
33 
def update_score(self, node_id, score): 
34 
self.scores[self.index_of_snapshot][node_id] = score 
35  
36 
def summarize(self): 
37 
np.average(self.scores, axis=1) 
38  
39 
def spearman_rank_correlation_coef(self): 
40 
self.time_diff = defaultdict(list) 
41 
for j in range(self.num_of_snapshots): 
42 
for i in range(j, self.num_of_snapshots): 
43 
diff = scipy.stats.spearmanr(self.scores[j], self.scores[i]) 
44 
self.time_diff[ij].append(diff[0]) 
45  
46 
max_key = max(self.time_diff.keys()) + 1 
47 
min_diff = [0 for i in range(max_key)] 
48 
max_diff = [0 for i in range(max_key)] 
49 
mean_diff = [0 for i in range(max_key)] 
50  
51 
for i, value in self.time_diff.iteritems(): 
52 
min_diff[i] = np.min(value) 
53 
max_diff[i] = np.max(value) 
54 
mean_diff[i] = np.mean(value) 
55  
56 
# Plot

57 
x_range = sorted(self.time_diff.keys()) 
58 
plt.plot(x_range, min_diff, label='min')

59 
plt.plot(x_range, mean_diff, label='mean')

60 
plt.plot(x_range, max_diff, label='max')

61  
62 
plt.ylabel('Spearman rank correlation coefficient')

63 
plt.xlabel('time diff (?)')

64 
plt.legend() 
65 
plt.title('FFGraz')

66 
plt.show() 
67  
68  
69 
def all_files_for_network(network_name, dir): 
70 
files = [] 
71 
for file in os.listdir(dir): 
72 
prefix = file.split('_')[0] 
73 
if prefix == network_name:

74 
files.append(os.path.join(dir, file)) 
75  
76 
return files

77  
78 
def main(): 
79 
dir = 'output'

80 
network = 'FFGraz'

81 
files = all_files_for_network(network, dir)

82 
num_of_snapshots = len(files)

83 
num_of_nodes = 200

84 
exp = Experiment(num_of_nodes, num_of_snapshots) 
85 
for file in files: 
86 
exp.add_new_result(file)

87  
88 
exp.summarize() 
89 
exp.spearman_rank_correlation_coef() 
90  
91 
if __name__ == '__main__': 
92 
main() 