Revision 4b37c1d9

View differences:

fiddle/count_num_bcc/analyze_count_bcc.py
1
#!/usr/bin/env python
2
import numpy as np
3
import matplotlib.pyplot as plt
4
from pdb import set_trace as debugger
5

  
6

  
7

  
8
def read_countbcc(input_filepath):
9
    input_data = np.loadtxt(input_filepath,
10
        dtype = {
11
            'names': ('filename', 'topotype', 'num_of_bcc', 'num_of_nodes_in_bcc'),
12
            'formats': ('S40', 'S6', 'i4', 'S2500'),
13
        },
14
        converters = {
15
            1: lambda s: s.strip(),
16
            2: lambda s: s.strip(),
17
            3: lambda s: s.strip(),
18
        },
19
        delimiter = ',')
20

  
21
    return input_data
22

  
23
def get_statistical_for_column(col):
24
    """Returns the min, max, avg, std
25
    """
26
    stats = [np.amin(col), np.amax(col), np.average(col), np.std(col)]
27
    percentage = np.std(col) / np.average(col)
28
    stats.append(percentage)
29
    return stats
30

  
31
def average_num_of_bcc(input_data, index_column_name, value_column_name, output_filepath):
32
    categories = np.unique(input_data[index_column_name])
33

  
34
    result_dict = dict()
35
    for cat in categories:
36
        values = input_data[input_data[index_column_name] == cat][value_column_name]
37
        stats = get_statistical_for_column(values)
38
        result_dict[cat] = stats
39

  
40
    return result_dict
41

  
42
def plot_num_of_bcc(result_dict, prefixes):
43
    min_result_dict = dict()
44
    max_result_dict = dict()
45
    avg_result_dict = dict()
46

  
47
    for prefix in prefixes:
48
        min_result_dict[prefix] = [0 for i in range(10)]
49
        max_result_dict[prefix] = [0 for i in range(10)]
50
        avg_result_dict[prefix] = [0 for i in range(10)]
51

  
52
    for key, value in result_dict.iteritems():
53
        topotype = key[0:2]
54
        num_of_nodes = int(key[2:])
55
        idx = int(num_of_nodes / 100) - 1
56
        min_result_dict[topotype][idx] = value[0]
57
        max_result_dict[topotype][idx] = value[1]
58
        avg_result_dict[topotype][idx] = value[2]
59

  
60
    # Plot
61
    x_label = [i for i in range(100, 1001, 100)]
62
    for prefix in prefixes:
63
        output_plot_filepath = './output/num_of_bcc_%s.png' % prefix
64
        y_values = avg_result_dict[prefix]
65

  
66
        plt.plot(x_label, avg_result_dict[prefix], 'ro')
67
        # plt.axis([, , 0, ])
68
        plt.xlabel('Number of nodes in a %s graph' % prefix)
69
        plt.ylabel('Average number of bi-connected component')
70
        plt.savefig(output_plot_filepath)
71
        plt.close()
72

  
73
def analyze_nodes_in_bcc(input_data, index_column_name, value_column_name, output_filepath):
74
    categories = np.unique(input_data[index_column_name])
75

  
76
    for cat in categories:
77
        rows = input_data[input_data[index_column_name] == cat][value_column_name]
78

  
79
        values = np.array([], dtype='i4')
80
        for row in rows:
81
            values = np.append(values, np.asarray(row.split(' '), dtype='i4'))
82

  
83
        output_hist_filepath = './output/histogram_num_of_nodes_%s.png' % cat
84
        plt.hist(values)
85
        plt.savefig(output_hist_filepath)
86
        plt.close()
87

  
88

  
89
if __name__ == '__main__':
90
    input_filepath = 'countbcc_all_graphs.out'
91
    input_data = read_countbcc(input_filepath)
92

  
93
    avg_bcc_output_filepath = './output/avg_num_of_bcc.out2'
94
    result_dict = average_num_of_bcc(input_data, 'topotype', 'num_of_bcc', avg_bcc_output_filepath)
95
    prefixes = ['CN', 'PL']
96
    plot_num_of_bcc(result_dict, prefixes)
97

  
98
    nodes_in_bcc_output_filepath = './output/nodes_in_bcc.out2'
99
    analyze_nodes_in_bcc(input_data, 'topotype', 'num_of_nodes_in_bcc', nodes_in_bcc_output_filepath)
100

  

Also available in: Unified diff