Statistics
| Branch: | Revision:

peerstreamer-logs-analyzer / py_logs_correlated_visualizer.py @ c0eac366

History | View | Annotate | Download (9.28 KB)

1
#!/usr/bin/python
2
from __future__ import division
3
import getopt,os,sys
4
import numpy as nm
5
from pandas import *
6
from py_logs_analizer import *
7
import matplotlib as mpl
8
mpl.use( "agg" )
9
import matplotlib.pyplot as plt
10
import networkx as nx
11
import pygraphviz  as pgz
12
import numpy as np
13

    
14
sys.path.insert(0,'lib')
15
import process_manager as pmgr
16
from plot_utilities import *
17
from utilities import *
18
from peerstreamer_logs import *
19

    
20
ImageExtension=".pdf"
21

    
22
def save_figure(folder,filename):
23
        img_folder = folder + '/img/'
24
        if not os.path.exists(img_folder):
25
                os.makedirs(img_folder)
26
        plt.savefig(img_folder+filename+ImageExtension,bbox_inches='tight')
27

    
28

    
29
def get_params(argv):
30
        save = False
31
        try:
32
                opts,args = getopt.getopt(argv,"shf:",["help","folder"])
33
        except getopt.GetoptError:
34
                sys.exit(2)
35
        for opt,arg in opts:
36
                if opt in ("-h","--help"):
37
                        sys.exit()
38
                elif opt in ("-s","--save"):
39
                        save = True
40
                elif opt in ("-f","--folder"):
41
                        folder = arg
42
        
43
        try:
44
                return [folder, save]
45
        except NameError:
46
                print "[Error] folder name parameter required."
47
                sys.exit()
48

    
49
def get_folders_names(folder):
50
        groups = []
51
        for element in os.listdir(folder):
52
                if os.path.isdir(folder+'/'+element):
53
                        groups.append(element)
54
        return groups
55

    
56
def purify_groups_names(groups):
57
        if 'img' in groups:
58
                groups.remove('img')
59
        groups.sort()
60
        return groups
61

    
62
def set_plot_labels(plot,title=None,xlabel=None,ylabel=None):
63
        if title:
64
                plt.title(title)
65
        if xlabel:
66
                plt.xlabel(xlabel)
67
        if ylabel:
68
                plt.ylabel(ylabel)
69

    
70
def simple_bar_graph(x,y,xlabel=None,ylabel=None,ylims=None,title=None):
71
        fig,ax = plt.subplots()
72
        values = {}
73
        tag2num = {}
74
        couples = zip(x,y)
75
        print couples
76
        available_colors = ['#002EB8','#FFFF99','r','g','o','p']
77
        colors = []
78
        width = 0.35
79

    
80
        i=0
81
        for couple in couples:
82
                if len(couple[0].split('-')) > 1:
83
                        groupkey = couple[0].split('-')[0]
84
                        tag = couple[0].split('-')[1]
85
                else:
86
                        groupkey = ''
87
                        tag = couple[0]
88
                #assign a number to each category
89
                if tag not in tag2num.keys():
90
                        tag2num[tag] = i
91
                        i = i+1
92
                #assign couples of (category,value) to the group
93
                if groupkey not in values.keys():
94
                        values[groupkey] = []
95
                values[groupkey].append(couple)
96
        
97
        i=0
98
        for key in values.keys():
99
                nx,ny = zip(*values[key]) 
100
                if len(nx[0].split('-')) > 1:
101
                        ind = [ tag2num[x.split('-')[1]] for x in nx]
102
                else:
103
                        ind = [ tag2num[x] for x in nx]
104
                ind = [ el+width*i for el in ind]
105
                colors.append(ax.bar(ind, ny,width,color=available_colors[i],linewidth=2.5))
106
                i = i + 1
107

    
108
        xlabels = tag2num.keys()
109
        a,xlabels = sort_list([tag2num[t] for t in xlabels],xlabels)
110
        xlabels = [ l.replace('_','\n') for l in xlabels ]
111
        ax.set_xticks(ind)
112
        ax.set_xticklabels(xlabels) #,rotation=90)
113
#        function to auto-rotate the x axis labels
114
#        fig.autofmt_xdate()
115
        set_plot_labels(ax,title,xlabel,ylabel)
116
        ax.yaxis.grid(True, linestyle='-', which='major', color='grey', alpha=0.5)
117
        ax.legend(colors,values.keys(),loc='lower right')
118

    
119
# ind = range(0,len(x))
120
#        fig = plt.figure()
121
#        ax = fig.add_subplot(111)
122
#        ax.yaxis.grid(True, linestyle='-', which='major', color='grey', alpha=0.5)
123
#        rects = ax.bar(ind, y,  align='center', color='blue')
124
#        ax.set_xticks(ind)
125
#        set_plot_labels(ax,title,xlabel,ylabel)
126
#        ax.set_xticklabels(x,rotation=90)
127
#        #function to auto-rotate the x axis labels
128
#        #fig.autofmt_xdate()
129

    
130
#def get_groups_dataframe(folder,groups,expfile):
131
#        data = []
132
#        for g in groups:
133
#                filo = folder+'/'+g+'/'+expfile
134
#                if os.path.isfile(filo):
135
#                        data.append(read_csv(filo))
136
#                else
137
#                        raise 'file not exists! ('+filo+')'
138
#        return data
139

    
140
def corr_visual_hops(folder,groups):
141
        avg_hops = []
142
        for g in groups:
143
                delay_data = []
144
                for filo in os.listdir(folder+'/'+g):
145
                        if filo.endswith('_session_delay.exp'):
146
                                data = (read_csv(folder+'/'+g+'/'+filo))
147
                                if data is not None:
148
                                        delay_data.append(data)
149
                if len(delay_data) > 0:
150
                        data = concat(delay_data)
151
                        avg_hops.append(data['hops'].mean())
152
                else:
153
                        avg_hops.append(0)
154
        simple_bar_graph(groups,avg_hops,ylabel='# hops')#title='Chunks average hops')
155
        save_figure(folder,'average_hops')
156

    
157
def corr_visual_delay(folder,groups):
158
        files = []
159
        delays = []
160
        for g in groups:
161
                files.append(folder+'/'+g+'/packets_delay.exp')
162
        for f in files:
163
                data = read_csv(f)
164
                delays.append(data['avg_delay'].mean()/1000)
165
        simple_bar_graph(groups,delays,ylabel='ms')#title='Chunks average delays (ms)')
166
        save_figure(folder,'overall_delays')
167

    
168
def delayVisualize2(folder,groups):
169
        plt.figure()
170
        linestyles = ['-','--']
171
        markers = ['.','.']
172
        colors = ['blue','black']
173
        data = {}
174
        for g in groups:
175
                for elm in os.listdir(folder+'/'+g):
176
                        if elm.endswith("_session_delay.exp"):
177
                                try:
178
                                        data[g].append(read_csv(folder+'/'+g+'/'+elm),ignore_index=True)
179
                                except:
180
                                        data[g] = read_csv(folder+'/'+g+'/'+elm)
181

    
182
        i=0
183
        sb = plt.subplot(1,1,1)
184

    
185
        plt.xlabel('ms')
186
        sb.yaxis.tick_left()
187
        sb.yaxis.set_label_position("left")
188
        sb.set_ylim([0,12])
189
#        sb.set_xlim([0,2000])
190
        sb.set_xscale('log')
191
        plt.ylabel('% of samples')
192
        sbSum=sb.twinx()
193
        sbSum.yaxis.tick_right()
194
        sbSum.yaxis.set_label_position("right")
195
        sbSum.set_ylim([0,1])
196
#        sbSum.set_xlim([0,2000])
197
        sbSum.set_xscale('log')
198
        plt.ylabel('probability')
199
        sbSum.yaxis.grid(color='gray', linestyle='dashed')
200
        sbSum.set_axisbelow(True)
201
        for g in  data.keys()[::-1]:
202
                data1 = DataFrame({'hostname' : data[g]['peer_hostname'], 'delay (msec)' : data[g]['delay']/1000, 'session' : data[g]['session_id']})
203
#                data1 = data1[data1['delay (msec)'] < 2000]
204
                data1['delay (msec)'] = data1['delay (msec)'].clip_lower(0) # dirty trick!!
205
                h,b = np.histogram(data1['delay (msec)'],bins=500)
206
                h = [ 100*el/h.sum() for el in h]
207
                #plotHistFrequencies(h,b,"msec",log_scale=False,ylabel='% of samples',linestyle=linestyles[i])#,title="Delay distribution (frequency, ECDF)")
208

    
209
        #        sb.set_ylim([0,1])
210
        #        plot1 = sb.plot(b[:-1],h,label="Frequency distribution")
211
                plot1 = sb.plot(b[:-1],h,linewidth=2,color=colors[i],linestyle='--',marker=markers[i])
212
                plt.legend(loc='lower right')
213
                plt.xlabel('ms')
214

    
215
                sumSamples = []
216
                partialSum = 0.0
217
                for v in h:
218
                                partialSum += v
219
                                sumSamples.append(partialSum)
220
                plot2 = sbSum.plot(b[:-1], (np.array(sumSamples)/sum(h)),color=colors[i],linewidth=2,linestyle='-',label=g.split('-')[1],marker=markers[i])
221

    
222
                i+=1
223

    
224

    
225
        plt.legend(loc='lower right')
226
        save_figure(folder,"delayVisual2")
227

    
228
def sort_list(index,elements):
229
        # elements is a generic list, index is a sortable list used to sort elements
230
        # returns the index list sorted and the list elements sorted by index
231
        return (list(t) for t in zip(*sorted(zip(index, elements))))
232

    
233
def corr_visual_loss(folder,groups):
234
        files = []
235
        accuracy = []
236
        for g in groups:
237
                files.append(folder+'/'+g+'/packets_loss.exp')
238
        for f in files:
239
                data = read_csv(f)
240
                chunks = sum(data['chunks'])
241
                losts = sum(data['losts'])
242
                if chunks > 0:
243
                        accuracy.append(1-(losts/chunks))
244
#        accuracy, groups = sort_list(accuracy,groups)
245
        simple_bar_graph(groups,accuracy,ylabel='receiving ratio')#,title='Chunks received ratio')
246
        save_figure(folder,'overall_loss')
247

    
248
def corr_neigh_impact_evolution(folder,groups):
249
  fig = plt.figure()
250
  for g in groups:
251
    filename = folder+'/'+g+'/network_impact.exp'
252
    if os.path.exists(filename):
253
      data = read_csv(filename).drop(['info_type','session_id'],1)
254
      data = data.groupby('time').median() # WARNING : with multiple sessions this value does no longer make sense
255
      plt.plot(data.index,data['netimpact'],label=g)
256
  
257
  plt.legend(loc='best')
258
  set_plot_labels(fig,'neighbourhood_evolution','time (s)','network impact')
259
  save_figure(folder,'neighbourhood_evolution')
260

    
261
def corr_neigh_impact_over_tag(folder,groups):
262
  fig = plt.figure()
263
  plot_data = {}
264
  for g in groups:
265
    if len(g.split('-')) > 1: 
266
      groupkey = g.split('-')[0]
267
      tag = g.split('-')[1]
268
      if groupkey not in plot_data.keys():
269
        plot_data[groupkey] = {}
270

    
271
      filename = folder+'/'+g+'/network_impact.exp'
272
      if os.path.exists(filename):
273
        data = read_csv(filename).drop(['info_type','session_id'],1)
274
        tag = int(''.join([c for c in list(tag) if c.isdigit()]))
275
        plot_data[groupkey][tag] = data['netimpact'].mean()
276
  
277
  if len(plot_data) > 0:
278
    for g in plot_data.keys():
279
      s = Series(plot_data[g])
280
      plt.plot(s.index,s,label=g)
281
    
282
    plt.legend(loc='best')
283
    set_plot_labels(fig,'','tag','network impact')
284
    save_figure(folder,'corr_netimpact')
285

    
286
def corr_visual_loss_active_peers(folder,groups):
287
        files = []
288
        accuracy = []
289
        for g in groups:
290
                files.append(folder+'/'+g+'/packets_loss.exp')
291
        for f in files:
292
                data = read_csv(f)
293
                data = data[data['losts']/data['chunks'] < 0.5]
294
                chunks = sum(data['chunks'])
295
                losts = sum(data['losts'])
296
                if chunks > 0:
297
                        accuracy.append(1-(losts/chunks))
298
#        accuracy, groups = sort_list(accuracy,groups)
299
        simple_bar_graph(groups,accuracy,title='Chunks received ratio of active peers')
300
        save_figure(folder,'overall_loss_active_peers')
301

    
302
def corr_visual_main(argv):
303
        [folder, save] = get_params(argv)
304
        print "folder is " + folder
305
        groups = get_folders_names(folder)
306
        groups = purify_groups_names(groups)
307
        print "groups are: "+str(groups)
308
        
309
        mpl.rcParams.update({'font.size': 16})
310
        corr_visual_loss(folder,groups)
311
        corr_visual_loss_active_peers(folder,groups)
312
        corr_neigh_impact_evolution(folder,groups)
313
        corr_neigh_impact_over_tag(folder,groups)
314
        #corr_visual_hops(folder,groups)
315
        #corr_visual_delay(folder,groups)
316
        #delayVisualize2(folder,groups)
317

    
318
        if not save:
319
                plt.show()
320

    
321

    
322
if __name__ == "__main__":
323
        corr_visual_main(sys.argv[1:])