fiddle/count_num_bcc/analyze_count_bcc.py  

1 
#!/usr/bin/env python 

2 
import numpy as np 

3 
import matplotlib.pyplot as plt 

4 
from pdb import set_trace as debugger 

5  
6  
7  
8 
def read_countbcc(input_filepath): 

9 
input_data = np.loadtxt(input_filepath, 

10 
dtype = { 

11 
'names': ('filename', 'topotype', 'num_of_bcc', 'num_of_nodes_in_bcc'), 

12 
'formats': ('S40', 'S6', 'i4', 'S2500'), 

13 
}, 

14 
converters = { 

15 
1: lambda s: s.strip(), 

16 
2: lambda s: s.strip(), 

17 
3: lambda s: s.strip(), 

18 
}, 

19 
delimiter = ',') 

20  
21 
return input_data 

22  
23 
def get_statistical_for_column(col): 

24 
"""Returns the min, max, avg, std 

25 
""" 

26 
stats = [np.amin(col), np.amax(col), np.average(col), np.std(col)] 

27 
percentage = np.std(col) / np.average(col) 

28 
stats.append(percentage) 

29 
return stats 

30  
31 
def average_num_of_bcc(input_data, index_column_name, value_column_name, output_filepath): 

32 
categories = np.unique(input_data[index_column_name]) 

33  
34 
result_dict = dict() 

35 
for cat in categories: 

36 
values = input_data[input_data[index_column_name] == cat][value_column_name] 

37 
stats = get_statistical_for_column(values) 

38 
result_dict[cat] = stats 

39  
40 
return result_dict 

41  
42 
def plot_num_of_bcc(result_dict, prefixes): 

43 
min_result_dict = dict() 

44 
max_result_dict = dict() 

45 
avg_result_dict = dict() 

46  
47 
for prefix in prefixes: 

48 
min_result_dict[prefix] = [0 for i in range(10)] 

49 
max_result_dict[prefix] = [0 for i in range(10)] 

50 
avg_result_dict[prefix] = [0 for i in range(10)] 

51  
52 
for key, value in result_dict.iteritems(): 

53 
topotype = key[0:2] 

54 
num_of_nodes = int(key[2:]) 

55 
idx = int(num_of_nodes / 100)  1 

56 
min_result_dict[topotype][idx] = value[0] 

57 
max_result_dict[topotype][idx] = value[1] 

58 
avg_result_dict[topotype][idx] = value[2] 

59  
60 
# Plot 

61 
x_label = [i for i in range(100, 1001, 100)] 

62 
for prefix in prefixes: 

63 
output_plot_filepath = './output/num_of_bcc_%s.png' % prefix 

64 
y_values = avg_result_dict[prefix] 

65  
66 
plt.plot(x_label, avg_result_dict[prefix], 'ro') 

67 
# plt.axis([, , 0, ]) 

68 
plt.xlabel('Number of nodes in a %s graph' % prefix) 

69 
plt.ylabel('Average number of biconnected component') 

70 
plt.savefig(output_plot_filepath) 

71 
plt.close() 

72  
73 
def analyze_nodes_in_bcc(input_data, index_column_name, value_column_name, output_filepath): 

74 
categories = np.unique(input_data[index_column_name]) 

75  
76 
for cat in categories: 

77 
rows = input_data[input_data[index_column_name] == cat][value_column_name] 

78  
79 
values = np.array([], dtype='i4') 

80 
for row in rows: 

81 
values = np.append(values, np.asarray(row.split(' '), dtype='i4')) 

82  
83 
output_hist_filepath = './output/histogram_num_of_nodes_%s.png' % cat 

84 
plt.hist(values) 

85 
plt.savefig(output_hist_filepath) 

86 
plt.close() 

87  
88  
89 
if __name__ == '__main__': 

90 
input_filepath = 'countbcc_all_graphs.out' 

91 
input_data = read_countbcc(input_filepath) 

92  
93 
avg_bcc_output_filepath = './output/avg_num_of_bcc.out2' 

94 
result_dict = average_num_of_bcc(input_data, 'topotype', 'num_of_bcc', avg_bcc_output_filepath) 

95 
prefixes = ['CN', 'PL'] 

96 
plot_num_of_bcc(result_dict, prefixes) 

97  
98 
nodes_in_bcc_output_filepath = './output/nodes_in_bcc.out2' 

99 
analyze_nodes_in_bcc(input_data, 'topotype', 'num_of_nodes_in_bcc', nodes_in_bcc_output_filepath) 

100 
