## root / globecomm / analyse_snapshot.py @ bd3d6dca

History | View | Annotate | Download (2.72 KB)

1 | bd3d6dca | Quynh PX Nguyen | import os |
---|---|---|---|

2 | import sys |
||

3 | from collections import defaultdict |
||

4 | import numpy as np |
||

5 | import scipy |
||

6 | from scipy import stats |
||

7 | import matplotlib.pyplot as plt |
||

8 | |||

9 | from pdb import set_trace as debugger |
||

10 | |||

11 | class Experiment(): |
||

12 | def __init__(self, num_of_nodes, num_of_snapshots): |
||

13 | ```
self.scores = np.zeros((num_of_snapshots, num_of_nodes))
``` |
||

14 | self.index_of_snapshot = 0 |
||

15 | ```
self.num_of_nodes = num_of_nodes
``` |
||

16 | ```
self.num_of_snapshots = num_of_snapshots
``` |
||

17 | |||

18 | def add_new_result(self, in_filepath): |
||

19 | if self.index_of_snapshot == self.num_of_snapshots: |
||

20 | print "ERROR: the number of snapshots provided is less than the input files" |
||

21 | sys.exit() |
||

22 | |||

23 | ```
data = np.loadtxt(in_filepath, delimiter=',', dtype={
``` |
||

24 | 'names': ('node_id', 'score'), |
||

25 | 'formats': ('i4', 'f4') |
||

26 | }) |
||

27 | |||

28 | for row in data: |
||

29 | self.update_score(row[0], row[1]) |
||

30 | |||

31 | self.index_of_snapshot += 1 |
||

32 | |||

33 | def update_score(self, node_id, score): |
||

34 | self.scores[self.index_of_snapshot][node_id] = score |
||

35 | |||

36 | def summarize(self): |
||

37 | np.average(self.scores, axis=1) |
||

38 | |||

39 | def spearman_rank_correlation_coef(self): |
||

40 | self.time_diff = defaultdict(list) |
||

41 | for j in range(self.num_of_snapshots): |
||

42 | for i in range(j, self.num_of_snapshots): |
||

43 | diff = scipy.stats.spearmanr(self.scores[j], self.scores[i]) |
||

44 | self.time_diff[i-j].append(diff[0]) |
||

45 | |||

46 | max_key = max(self.time_diff.keys()) + 1 |
||

47 | min_diff = [0 for i in range(max_key)] |
||

48 | max_diff = [0 for i in range(max_key)] |
||

49 | mean_diff = [0 for i in range(max_key)] |
||

50 | |||

51 | for i, value in self.time_diff.iteritems(): |
||

52 | min_diff[i] = np.min(value) |
||

53 | max_diff[i] = np.max(value) |
||

54 | mean_diff[i] = np.mean(value) |
||

55 | |||

56 | ```
# Plot
``` |
||

57 | x_range = sorted(self.time_diff.keys()) |
||

58 | ```
plt.plot(x_range, min_diff, label='min')
``` |
||

59 | ```
plt.plot(x_range, mean_diff, label='mean')
``` |
||

60 | ```
plt.plot(x_range, max_diff, label='max')
``` |
||

61 | |||

62 | ```
plt.ylabel('Spearman rank correlation coefficient')
``` |
||

63 | ```
plt.xlabel('time diff (?)')
``` |
||

64 | plt.legend() |
||

65 | ```
plt.title('FFGraz')
``` |
||

66 | plt.show() |
||

67 | |||

68 | |||

69 | def all_files_for_network(network_name, dir): |
||

70 | files = [] |
||

71 | for file in os.listdir(dir): |
||

72 | prefix = file.split('_')[0] |
||

73 | ```
if prefix == network_name:
``` |
||

74 | files.append(os.path.join(dir, file)) |
||

75 | |||

76 | ```
return files
``` |
||

77 | |||

78 | def main(): |
||

79 | ```
dir = 'output'
``` |
||

80 | ```
network = 'FFGraz'
``` |
||

81 | ```
files = all_files_for_network(network, dir)
``` |
||

82 | ```
num_of_snapshots = len(files)
``` |
||

83 | ```
num_of_nodes = 200
``` |
||

84 | exp = Experiment(num_of_nodes, num_of_snapshots) |
||

85 | for file in files: |
||

86 | ```
exp.add_new_result(file)
``` |
||

87 | |||

88 | exp.summarize() |
||

89 | exp.spearman_rank_correlation_coef() |
||

90 | |||

91 | if __name__ == '__main__': |
||

92 | main() |