Statistics
| Branch: | Revision:

root / globecomm / analyse_snapshot.py @ bd3d6dca

History | View | Annotate | Download (2.72 KB)

1
import os
2
import sys
3
from collections import defaultdict
4
import numpy as np
5
import scipy
6
from scipy import stats
7
import matplotlib.pyplot as plt
8

    
9
from pdb import set_trace as debugger
10

    
11
class Experiment():
12
    def __init__(self, num_of_nodes, num_of_snapshots):
13
        self.scores = np.zeros((num_of_snapshots, num_of_nodes))
14
        self.index_of_snapshot = 0
15
        self.num_of_nodes = num_of_nodes
16
        self.num_of_snapshots = num_of_snapshots
17

    
18
    def add_new_result(self, in_filepath):
19
        if self.index_of_snapshot == self.num_of_snapshots:
20
            print "ERROR: the number of snapshots provided is less than the input files"
21
            sys.exit()
22

    
23
        data = np.loadtxt(in_filepath, delimiter=',', dtype={
24
                'names': ('node_id', 'score'),
25
                'formats': ('i4', 'f4')
26
            })
27

    
28
        for row in data:
29
            self.update_score(row[0], row[1])
30

    
31
        self.index_of_snapshot += 1
32

    
33
    def update_score(self, node_id, score):
34
        self.scores[self.index_of_snapshot][node_id] = score
35

    
36
    def summarize(self):
37
        np.average(self.scores, axis=1)
38

    
39
    def spearman_rank_correlation_coef(self):
40
        self.time_diff = defaultdict(list)
41
        for j in range(self.num_of_snapshots):
42
            for i in range(j, self.num_of_snapshots):
43
                diff = scipy.stats.spearmanr(self.scores[j], self.scores[i])
44
                self.time_diff[i-j].append(diff[0])
45

    
46
        max_key = max(self.time_diff.keys()) + 1
47
        min_diff = [0 for i in range(max_key)]
48
        max_diff = [0 for i in range(max_key)]
49
        mean_diff = [0 for i in range(max_key)]
50

    
51
        for i, value in self.time_diff.iteritems():
52
            min_diff[i] = np.min(value)
53
            max_diff[i] = np.max(value)
54
            mean_diff[i] = np.mean(value)
55

    
56
        # Plot
57
        x_range = sorted(self.time_diff.keys())
58
        plt.plot(x_range, min_diff, label='min')
59
        plt.plot(x_range, mean_diff, label='mean')
60
        plt.plot(x_range, max_diff, label='max')
61

    
62
        plt.ylabel('Spearman rank correlation coefficient')
63
        plt.xlabel('time diff (?)')
64
        plt.legend()
65
        plt.title('FFGraz')
66
        plt.show()
67

    
68

    
69
def all_files_for_network(network_name, dir):
70
    files = []
71
    for file in os.listdir(dir):
72
        prefix = file.split('_')[0]
73
        if prefix == network_name:
74
            files.append(os.path.join(dir, file))
75

    
76
    return files
77

    
78
def main():
79
    dir = 'output'
80
    network = 'FFGraz'
81
    files = all_files_for_network(network, dir)
82
    num_of_snapshots = len(files)
83
    num_of_nodes = 200
84
    exp = Experiment(num_of_nodes, num_of_snapshots)
85
    for file in files:
86
        exp.add_new_result(file)
87

    
88
    exp.summarize()
89
    exp.spearman_rank_correlation_coef()
90

    
91
if __name__ == '__main__':
92
    main()