Revision 4b37c1d9
fiddle/count_num_bcc/analyze_count_bcc.py | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
import numpy as np |
|
3 |
import matplotlib.pyplot as plt |
|
4 |
from pdb import set_trace as debugger |
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
def read_countbcc(input_filepath): |
|
9 |
input_data = np.loadtxt(input_filepath, |
|
10 |
dtype = { |
|
11 |
'names': ('filename', 'topotype', 'num_of_bcc', 'num_of_nodes_in_bcc'), |
|
12 |
'formats': ('S40', 'S6', 'i4', 'S2500'), |
|
13 |
}, |
|
14 |
converters = { |
|
15 |
1: lambda s: s.strip(), |
|
16 |
2: lambda s: s.strip(), |
|
17 |
3: lambda s: s.strip(), |
|
18 |
}, |
|
19 |
delimiter = ',') |
|
20 |
|
|
21 |
return input_data |
|
22 |
|
|
23 |
def get_statistical_for_column(col): |
|
24 |
"""Returns the min, max, avg, std |
|
25 |
""" |
|
26 |
stats = [np.amin(col), np.amax(col), np.average(col), np.std(col)] |
|
27 |
percentage = np.std(col) / np.average(col) |
|
28 |
stats.append(percentage) |
|
29 |
return stats |
|
30 |
|
|
31 |
def average_num_of_bcc(input_data, index_column_name, value_column_name, output_filepath): |
|
32 |
categories = np.unique(input_data[index_column_name]) |
|
33 |
|
|
34 |
result_dict = dict() |
|
35 |
for cat in categories: |
|
36 |
values = input_data[input_data[index_column_name] == cat][value_column_name] |
|
37 |
stats = get_statistical_for_column(values) |
|
38 |
result_dict[cat] = stats |
|
39 |
|
|
40 |
return result_dict |
|
41 |
|
|
42 |
def plot_num_of_bcc(result_dict, prefixes): |
|
43 |
min_result_dict = dict() |
|
44 |
max_result_dict = dict() |
|
45 |
avg_result_dict = dict() |
|
46 |
|
|
47 |
for prefix in prefixes: |
|
48 |
min_result_dict[prefix] = [0 for i in range(10)] |
|
49 |
max_result_dict[prefix] = [0 for i in range(10)] |
|
50 |
avg_result_dict[prefix] = [0 for i in range(10)] |
|
51 |
|
|
52 |
for key, value in result_dict.iteritems(): |
|
53 |
topotype = key[0:2] |
|
54 |
num_of_nodes = int(key[2:]) |
|
55 |
idx = int(num_of_nodes / 100) - 1 |
|
56 |
min_result_dict[topotype][idx] = value[0] |
|
57 |
max_result_dict[topotype][idx] = value[1] |
|
58 |
avg_result_dict[topotype][idx] = value[2] |
|
59 |
|
|
60 |
# Plot |
|
61 |
x_label = [i for i in range(100, 1001, 100)] |
|
62 |
for prefix in prefixes: |
|
63 |
output_plot_filepath = './output/num_of_bcc_%s.png' % prefix |
|
64 |
y_values = avg_result_dict[prefix] |
|
65 |
|
|
66 |
plt.plot(x_label, avg_result_dict[prefix], 'ro') |
|
67 |
# plt.axis([, , 0, ]) |
|
68 |
plt.xlabel('Number of nodes in a %s graph' % prefix) |
|
69 |
plt.ylabel('Average number of bi-connected component') |
|
70 |
plt.savefig(output_plot_filepath) |
|
71 |
plt.close() |
|
72 |
|
|
73 |
def analyze_nodes_in_bcc(input_data, index_column_name, value_column_name, output_filepath): |
|
74 |
categories = np.unique(input_data[index_column_name]) |
|
75 |
|
|
76 |
for cat in categories: |
|
77 |
rows = input_data[input_data[index_column_name] == cat][value_column_name] |
|
78 |
|
|
79 |
values = np.array([], dtype='i4') |
|
80 |
for row in rows: |
|
81 |
values = np.append(values, np.asarray(row.split(' '), dtype='i4')) |
|
82 |
|
|
83 |
output_hist_filepath = './output/histogram_num_of_nodes_%s.png' % cat |
|
84 |
plt.hist(values) |
|
85 |
plt.savefig(output_hist_filepath) |
|
86 |
plt.close() |
|
87 |
|
|
88 |
|
|
89 |
if __name__ == '__main__': |
|
90 |
input_filepath = 'countbcc_all_graphs.out' |
|
91 |
input_data = read_countbcc(input_filepath) |
|
92 |
|
|
93 |
avg_bcc_output_filepath = './output/avg_num_of_bcc.out2' |
|
94 |
result_dict = average_num_of_bcc(input_data, 'topotype', 'num_of_bcc', avg_bcc_output_filepath) |
|
95 |
prefixes = ['CN', 'PL'] |
|
96 |
plot_num_of_bcc(result_dict, prefixes) |
|
97 |
|
|
98 |
nodes_in_bcc_output_filepath = './output/nodes_in_bcc.out2' |
|
99 |
analyze_nodes_in_bcc(input_data, 'topotype', 'num_of_nodes_in_bcc', nodes_in_bcc_output_filepath) |
|
100 |
|
Also available in: Unified diff