root / globecomm / analyse_snapshot.py @ bd3d6dca
History | View | Annotate | Download (2.72 KB)
1 |
import os |
---|---|
2 |
import sys |
3 |
from collections import defaultdict |
4 |
import numpy as np |
5 |
import scipy |
6 |
from scipy import stats |
7 |
import matplotlib.pyplot as plt |
8 |
|
9 |
from pdb import set_trace as debugger |
10 |
|
11 |
class Experiment(): |
12 |
def __init__(self, num_of_nodes, num_of_snapshots): |
13 |
self.scores = np.zeros((num_of_snapshots, num_of_nodes))
|
14 |
self.index_of_snapshot = 0 |
15 |
self.num_of_nodes = num_of_nodes
|
16 |
self.num_of_snapshots = num_of_snapshots
|
17 |
|
18 |
def add_new_result(self, in_filepath): |
19 |
if self.index_of_snapshot == self.num_of_snapshots: |
20 |
print "ERROR: the number of snapshots provided is less than the input files" |
21 |
sys.exit() |
22 |
|
23 |
data = np.loadtxt(in_filepath, delimiter=',', dtype={
|
24 |
'names': ('node_id', 'score'), |
25 |
'formats': ('i4', 'f4') |
26 |
}) |
27 |
|
28 |
for row in data: |
29 |
self.update_score(row[0], row[1]) |
30 |
|
31 |
self.index_of_snapshot += 1 |
32 |
|
33 |
def update_score(self, node_id, score): |
34 |
self.scores[self.index_of_snapshot][node_id] = score |
35 |
|
36 |
def summarize(self): |
37 |
np.average(self.scores, axis=1) |
38 |
|
39 |
def spearman_rank_correlation_coef(self): |
40 |
self.time_diff = defaultdict(list) |
41 |
for j in range(self.num_of_snapshots): |
42 |
for i in range(j, self.num_of_snapshots): |
43 |
diff = scipy.stats.spearmanr(self.scores[j], self.scores[i]) |
44 |
self.time_diff[i-j].append(diff[0]) |
45 |
|
46 |
max_key = max(self.time_diff.keys()) + 1 |
47 |
min_diff = [0 for i in range(max_key)] |
48 |
max_diff = [0 for i in range(max_key)] |
49 |
mean_diff = [0 for i in range(max_key)] |
50 |
|
51 |
for i, value in self.time_diff.iteritems(): |
52 |
min_diff[i] = np.min(value) |
53 |
max_diff[i] = np.max(value) |
54 |
mean_diff[i] = np.mean(value) |
55 |
|
56 |
# Plot
|
57 |
x_range = sorted(self.time_diff.keys()) |
58 |
plt.plot(x_range, min_diff, label='min')
|
59 |
plt.plot(x_range, mean_diff, label='mean')
|
60 |
plt.plot(x_range, max_diff, label='max')
|
61 |
|
62 |
plt.ylabel('Spearman rank correlation coefficient')
|
63 |
plt.xlabel('time diff (?)')
|
64 |
plt.legend() |
65 |
plt.title('FFGraz')
|
66 |
plt.show() |
67 |
|
68 |
|
69 |
def all_files_for_network(network_name, dir): |
70 |
files = [] |
71 |
for file in os.listdir(dir): |
72 |
prefix = file.split('_')[0] |
73 |
if prefix == network_name:
|
74 |
files.append(os.path.join(dir, file)) |
75 |
|
76 |
return files
|
77 |
|
78 |
def main(): |
79 |
dir = 'output'
|
80 |
network = 'FFGraz'
|
81 |
files = all_files_for_network(network, dir)
|
82 |
num_of_snapshots = len(files)
|
83 |
num_of_nodes = 200
|
84 |
exp = Experiment(num_of_nodes, num_of_snapshots) |
85 |
for file in files: |
86 |
exp.add_new_result(file)
|
87 |
|
88 |
exp.summarize() |
89 |
exp.spearman_rank_correlation_coef() |
90 |
|
91 |
if __name__ == '__main__': |
92 |
main() |