root / fiddle / count_num_bcc / analyze_count_bcc.py @ 4b37c1d9
History | View | Annotate | Download (3.32 KB)
1 |
#!/usr/bin/env python
|
---|---|
2 |
import numpy as np |
3 |
import matplotlib.pyplot as plt |
4 |
from pdb import set_trace as debugger |
5 |
|
6 |
|
7 |
|
8 |
def read_countbcc(input_filepath): |
9 |
input_data = np.loadtxt(input_filepath, |
10 |
dtype = { |
11 |
'names': ('filename', 'topotype', 'num_of_bcc', 'num_of_nodes_in_bcc'), |
12 |
'formats': ('S40', 'S6', 'i4', 'S2500'), |
13 |
}, |
14 |
converters = { |
15 |
1: lambda s: s.strip(), |
16 |
2: lambda s: s.strip(), |
17 |
3: lambda s: s.strip(), |
18 |
}, |
19 |
delimiter = ',')
|
20 |
|
21 |
return input_data
|
22 |
|
23 |
def get_statistical_for_column(col): |
24 |
"""Returns the min, max, avg, std
|
25 |
"""
|
26 |
stats = [np.amin(col), np.amax(col), np.average(col), np.std(col)] |
27 |
percentage = np.std(col) / np.average(col) |
28 |
stats.append(percentage) |
29 |
return stats
|
30 |
|
31 |
def average_num_of_bcc(input_data, index_column_name, value_column_name, output_filepath): |
32 |
categories = np.unique(input_data[index_column_name]) |
33 |
|
34 |
result_dict = dict()
|
35 |
for cat in categories: |
36 |
values = input_data[input_data[index_column_name] == cat][value_column_name] |
37 |
stats = get_statistical_for_column(values) |
38 |
result_dict[cat] = stats |
39 |
|
40 |
return result_dict
|
41 |
|
42 |
def plot_num_of_bcc(result_dict, prefixes): |
43 |
min_result_dict = dict()
|
44 |
max_result_dict = dict()
|
45 |
avg_result_dict = dict()
|
46 |
|
47 |
for prefix in prefixes: |
48 |
min_result_dict[prefix] = [0 for i in range(10)] |
49 |
max_result_dict[prefix] = [0 for i in range(10)] |
50 |
avg_result_dict[prefix] = [0 for i in range(10)] |
51 |
|
52 |
for key, value in result_dict.iteritems(): |
53 |
topotype = key[0:2] |
54 |
num_of_nodes = int(key[2:]) |
55 |
idx = int(num_of_nodes / 100) - 1 |
56 |
min_result_dict[topotype][idx] = value[0]
|
57 |
max_result_dict[topotype][idx] = value[1]
|
58 |
avg_result_dict[topotype][idx] = value[2]
|
59 |
|
60 |
# Plot
|
61 |
x_label = [i for i in range(100, 1001, 100)] |
62 |
for prefix in prefixes: |
63 |
output_plot_filepath = './output/num_of_bcc_%s.png' % prefix
|
64 |
y_values = avg_result_dict[prefix] |
65 |
|
66 |
plt.plot(x_label, avg_result_dict[prefix], 'ro')
|
67 |
# plt.axis([, , 0, ])
|
68 |
plt.xlabel('Number of nodes in a %s graph' % prefix)
|
69 |
plt.ylabel('Average number of bi-connected component')
|
70 |
plt.savefig(output_plot_filepath) |
71 |
plt.close() |
72 |
|
73 |
def analyze_nodes_in_bcc(input_data, index_column_name, value_column_name, output_filepath): |
74 |
categories = np.unique(input_data[index_column_name]) |
75 |
|
76 |
for cat in categories: |
77 |
rows = input_data[input_data[index_column_name] == cat][value_column_name] |
78 |
|
79 |
values = np.array([], dtype='i4')
|
80 |
for row in rows: |
81 |
values = np.append(values, np.asarray(row.split(' '), dtype='i4')) |
82 |
|
83 |
output_hist_filepath = './output/histogram_num_of_nodes_%s.png' % cat
|
84 |
plt.hist(values) |
85 |
plt.savefig(output_hist_filepath) |
86 |
plt.close() |
87 |
|
88 |
|
89 |
if __name__ == '__main__': |
90 |
input_filepath = 'countbcc_all_graphs.out'
|
91 |
input_data = read_countbcc(input_filepath) |
92 |
|
93 |
avg_bcc_output_filepath = './output/avg_num_of_bcc.out2'
|
94 |
result_dict = average_num_of_bcc(input_data, 'topotype', 'num_of_bcc', avg_bcc_output_filepath) |
95 |
prefixes = ['CN', 'PL'] |
96 |
plot_num_of_bcc(result_dict, prefixes) |
97 |
|
98 |
nodes_in_bcc_output_filepath = './output/nodes_in_bcc.out2'
|
99 |
analyze_nodes_in_bcc(input_data, 'topotype', 'num_of_nodes_in_bcc', nodes_in_bcc_output_filepath) |
100 |
|