Revision 5f27ee90

View differences:

globecomm/analyse_snapshot.py
6 6
from scipy import stats
7 7
import matplotlib.pyplot as plt
8 8

  
9
import metrics
10

  
9 11
from pdb import set_trace as debugger
10 12

  
11 13
class Experiment():
......
37 39
        np.average(self.scores, axis=1)
38 40

  
39 41
    def spearman_rank_correlation_coef(self):
40
        self.time_diff = defaultdict(list)
42
        time_diff = defaultdict(list)
41 43
        for j in range(self.num_of_snapshots):
42 44
            for i in range(j, self.num_of_snapshots):
43 45
                diff = scipy.stats.spearmanr(self.scores[j], self.scores[i])
44
                self.time_diff[i-j].append(diff[0])
46
                time_diff[i-j].append(diff[0])
47
        self._plot_time_diff(time_diff,
48
                        title='FFGraz',
49
                        xlabel='time diff (?)',
50
                        ylabel='Spearman rank correlation coefficient')
51

  
52
    def percentage_overlap(self, top_k=20):
53
        """
54
        """
55
        time_diff = defaultdict(list)
56
        for j in range(self.num_of_snapshots):
57
            for i in range(j, self.num_of_snapshots):
58
                diff = metrics.percentage_overlap(self.scores[j], self.scores[i], top_k)
59
                time_diff[i-j].append(diff)
60

  
61
        out_filepath = 'output/overlap_%s.png' % top_k
62
        fig = self._plot_time_diff(time_diff,
63
                        title='FFGraz',
64
                        xlabel='time_diff',
65
                        ylabel='Percentage overlap for top-k = %s' % top_k,
66
                        ylim=(40, 101),
67
                        out_filepath=out_filepath)
45 68

  
46
        max_key = max(self.time_diff.keys()) + 1
69

  
70
    def _plot_time_diff(self, time_diff, title='', xlabel='', ylabel='', ylim=None, out_filepath=''):
71
        max_key = max(time_diff.keys()) + 1
47 72
        min_diff = [0 for i in range(max_key)]
48 73
        max_diff = [0 for i in range(max_key)]
49 74
        mean_diff = [0 for i in range(max_key)]
50 75

  
51
        for i, value in self.time_diff.iteritems():
76
        for i, value in time_diff.iteritems():
52 77
            min_diff[i] = np.min(value)
53
            max_diff[i] = np.max(value)
54 78
            mean_diff[i] = np.mean(value)
79
            max_diff[i] = np.max(value)
55 80

  
81
        fig = plt.figure()
56 82
        # Plot
57
        x_range = sorted(self.time_diff.keys())
83
        x_range = sorted(time_diff.keys())
58 84
        plt.plot(x_range, min_diff, label='min')
59 85
        plt.plot(x_range, mean_diff, label='mean')
60 86
        plt.plot(x_range, max_diff, label='max')
61 87

  
62
        plt.ylabel('Spearman rank correlation coefficient')
63
        plt.xlabel('time diff (?)')
88
        plt.ylabel(ylabel)
89
        plt.xlabel(xlabel)
90
        if ylim:
91
            plt.ylim(ylim)
64 92
        plt.legend()
65
        plt.title('FFGraz')
66
        plt.show()
93
        plt.title(title)
67 94

  
95
        if out_filepath:
96
            plt.savefig(out_filepath)
97
        else:
98
            plt.show()
68 99

  
69 100
def all_files_for_network(network_name, dir):
70 101
    files = []
......
86 117
        exp.add_new_result(file)
87 118

  
88 119
    exp.summarize()
89
    exp.spearman_rank_correlation_coef()
120

  
121
    # Show the percentage over lap for multiple snapshots
122
    percentages = [i/10. for i in range(1, 6)]
123
    top_ks = [int(p*num_of_nodes) for p in percentages]
124
    for k in top_ks:
125
        exp.percentage_overlap(k)
90 126

  
91 127
if __name__ == '__main__':
92 128
    main()

Also available in: Unified diff