Statistics
| Branch: | Revision:

peerstreamer-logs-analyzer / py_logs_correlated_visualizer.py @ c39a768e

History | View | Annotate | Download (11.4 KB)

1
#!/usr/bin/python
2
from __future__ import division
3
import getopt,os,sys
4
import numpy as nm
5
from pandas import *
6
from py_logs_analizer import *
7
import matplotlib as mpl
8
mpl.use( "agg" )
9
import matplotlib.pyplot as plt
10
import networkx as nx
11
import pygraphviz  as pgz
12
import numpy as np
13
import collections
14

    
15
sys.path.insert(0,'lib')
16
import process_manager as pmgr
17
from plot_utilities import *
18
from utilities import *
19
from peerstreamer_logs import *
20

    
21
ImageExtension=".pdf"
22

    
23
def save_figure(folder,filename):
24
        img_folder = folder + '/img/'
25
        if not os.path.exists(img_folder):
26
                os.makedirs(img_folder)
27
        plt.savefig(img_folder+filename+ImageExtension,bbox_inches='tight')
28

    
29

    
30
def get_params(argv):
31
        save = False
32
        try:
33
                opts,args = getopt.getopt(argv,"shf:",["help","folder"])
34
        except getopt.GetoptError:
35
                sys.exit(2)
36
        for opt,arg in opts:
37
                if opt in ("-h","--help"):
38
                        sys.exit()
39
                elif opt in ("-s","--save"):
40
                        save = True
41
                elif opt in ("-f","--folder"):
42
                        folder = arg
43
        
44
        try:
45
                return [folder, save]
46
        except NameError:
47
                print "[Error] folder name parameter required."
48
                sys.exit()
49

    
50
def get_folders_names(folder):
51
        groups = []
52
        for element in os.listdir(folder):
53
                if os.path.isdir(folder+'/'+element):
54
                        groups.append(element)
55
        return groups
56

    
57
def purify_groups_names(groups):
58
        if 'img' in groups:
59
                groups.remove('img')
60
        groups.sort()
61
        return groups
62

    
63
def set_plot_labels(plot,title=None,xlabel=None,ylabel=None):
64
        if title:
65
                plt.title(title)
66
        if xlabel:
67
                plt.xlabel(xlabel)
68
        if ylabel:
69
                plt.ylabel(ylabel)
70

    
71
def simple_bar_graph(x,y,xlabel=None,ylabel=None,ylims=None,title=None):
72
        fig,ax = plt.subplots()
73
        values = {}
74
        tag2num = {}
75
        couples = zip(x,y)
76
        #print couples
77
        available_colors = ['#002EB8','#FFFF99','r','g','o','p']
78
        colors = []
79
        width = 0.35
80

    
81
        i=0
82
        for couple in couples:
83
                if len(couple[0].split('-')) > 1:
84
                        groupkey = couple[0].split('-')[0]
85
                        tag = couple[0].split('-')[1]
86
                else:
87
                        groupkey = ''
88
                        tag = couple[0]
89
                #assign a number to each category
90
                if tag not in tag2num.keys():
91
                        tag2num[tag] = i
92
                        i = i+1
93
                #assign couples of (category,value) to the group
94
                if groupkey not in values.keys():
95
                        values[groupkey] = []
96
                values[groupkey].append(couple)
97
        
98
        i=0
99
        for key in values.keys():
100
                nx,ny = zip(*values[key]) 
101
                if len(nx[0].split('-')) > 1:
102
                        ind = [ tag2num[x.split('-')[1]] for x in nx]
103
                else:
104
                        ind = [ tag2num[x] for x in nx]
105
                ind = [ el+width*i for el in ind]
106
                colors.append(ax.bar(ind, ny,width,color=available_colors[i],linewidth=2.5))
107
                i = i + 1
108

    
109
        xlabels = tag2num.keys()
110
        a,xlabels = sort_list([tag2num[t] for t in xlabels],xlabels)
111
        xlabels = [ l.replace('_','\n') for l in xlabels ]
112
        ax.set_xticks(ind)
113
        ax.set_xticklabels(xlabels) #,rotation=90)
114
#        function to auto-rotate the x axis labels
115
#        fig.autofmt_xdate()
116
        set_plot_labels(ax,title,xlabel,ylabel)
117
        ax.yaxis.grid(True, linestyle='-', which='major', color='grey', alpha=0.5)
118
        ax.legend(colors,values.keys(),loc='lower right')
119

    
120
# ind = range(0,len(x))
121
#        fig = plt.figure()
122
#        ax = fig.add_subplot(111)
123
#        ax.yaxis.grid(True, linestyle='-', which='major', color='grey', alpha=0.5)
124
#        rects = ax.bar(ind, y,  align='center', color='blue')
125
#        ax.set_xticks(ind)
126
#        set_plot_labels(ax,title,xlabel,ylabel)
127
#        ax.set_xticklabels(x,rotation=90)
128
#        #function to auto-rotate the x axis labels
129
#        #fig.autofmt_xdate()
130

    
131
#def get_groups_dataframe(folder,groups,expfile):
132
#        data = []
133
#        for g in groups:
134
#                filo = folder+'/'+g+'/'+expfile
135
#                if os.path.isfile(filo):
136
#                        data.append(read_csv(filo))
137
#                else
138
#                        raise 'file not exists! ('+filo+')'
139
#        return data
140

    
141
def corr_visual_hops(folder,groups):
142
        avg_hops = []
143
        for g in groups:
144
                delay_data = []
145
                for filo in os.listdir(folder+'/'+g):
146
                        if filo.endswith('_session_delay.exp'):
147
                                data = (read_csv(folder+'/'+g+'/'+filo))
148
                                if data is not None:
149
                                        delay_data.append(data)
150
                if len(delay_data) > 0:
151
                        data = concat(delay_data)
152
                        avg_hops.append(data['hops'].mean())
153
                else:
154
                        avg_hops.append(0)
155
        simple_bar_graph(groups,avg_hops,ylabel='# hops')#title='Chunks average hops')
156
        save_figure(folder,'average_hops')
157

    
158
def corr_visual_delay(folder,groups):
159
        files = []
160
        delays = []
161
        for g in groups:
162
                files.append(folder+'/'+g+'/packets_delay.exp')
163
        for f in files:
164
                data = read_csv(f)
165
                delays.append(data['avg_delay'].mean()/1000)
166
        simple_bar_graph(groups,delays,ylabel='ms')#title='Chunks average delays (ms)')
167
        save_figure(folder,'overall_delays')
168

    
169
def delayVisualize2(folder,groups):
170
        plt.figure()
171
        linestyles = ['-','--']
172
        markers = ['.','.']
173
        colors = ['blue','black']
174
        data = {}
175
        for g in groups:
176
                for elm in os.listdir(folder+'/'+g):
177
                        if elm.endswith("_session_delay.exp"):
178
                                try:
179
                                        data[g].append(read_csv(folder+'/'+g+'/'+elm),ignore_index=True)
180
                                except:
181
                                        data[g] = read_csv(folder+'/'+g+'/'+elm)
182

    
183
        i=0
184
        sb = plt.subplot(1,1,1)
185

    
186
        plt.xlabel('ms')
187
        sb.yaxis.tick_left()
188
        sb.yaxis.set_label_position("left")
189
        sb.set_ylim([0,12])
190
#        sb.set_xlim([0,2000])
191
        sb.set_xscale('log')
192
        plt.ylabel('% of samples')
193
        sbSum=sb.twinx()
194
        sbSum.yaxis.tick_right()
195
        sbSum.yaxis.set_label_position("right")
196
        sbSum.set_ylim([0,1])
197
#        sbSum.set_xlim([0,2000])
198
        sbSum.set_xscale('log')
199
        plt.ylabel('probability')
200
        sbSum.yaxis.grid(color='gray', linestyle='dashed')
201
        sbSum.set_axisbelow(True)
202
        for g in  data.keys()[::-1]:
203
                data1 = DataFrame({'hostname' : data[g]['peer_hostname'], 'delay (msec)' : data[g]['delay']/1000, 'session' : data[g]['session_id']})
204
#                data1 = data1[data1['delay (msec)'] < 2000]
205
                data1['delay (msec)'] = data1['delay (msec)'].clip_lower(0) # dirty trick!!
206
                h,b = np.histogram(data1['delay (msec)'],bins=500)
207
                h = [ 100*el/h.sum() for el in h]
208
                #plotHistFrequencies(h,b,"msec",log_scale=False,ylabel='% of samples',linestyle=linestyles[i])#,title="Delay distribution (frequency, ECDF)")
209

    
210
        #        sb.set_ylim([0,1])
211
        #        plot1 = sb.plot(b[:-1],h,label="Frequency distribution")
212
                plot1 = sb.plot(b[:-1],h,linewidth=2,color=colors[i],linestyle='--',marker=markers[i])
213
                plt.legend(loc='lower right')
214
                plt.xlabel('ms')
215

    
216
                sumSamples = []
217
                partialSum = 0.0
218
                for v in h:
219
                                partialSum += v
220
                                sumSamples.append(partialSum)
221
                plot2 = sbSum.plot(b[:-1], (np.array(sumSamples)/sum(h)),color=colors[i],linewidth=2,linestyle='-',label=g.split('-')[1],marker=markers[i])
222

    
223
                i+=1
224

    
225

    
226
        plt.legend(loc='lower right')
227
        save_figure(folder,"delayVisual2")
228

    
229
def sort_list(index,elements):
230
        # elements is a generic list, index is a sortable list used to sort elements
231
        # returns the index list sorted and the list elements sorted by index
232
        return (list(t) for t in zip(*sorted(zip(index, elements))))
233

    
234
def corr_visual_loss(folder,groups):
235
        files = []
236
        accuracy = []
237
        for g in groups:
238
                files.append(folder+'/'+g+'/packets_loss.exp')
239
        for f in files:
240
                data = read_csv(f)
241
                chunks = sum(data['chunks'])
242
                losts = sum(data['losts'])
243
                if chunks > 0:
244
                        accuracy.append(1-(losts/chunks))
245
#        accuracy, groups = sort_list(accuracy,groups)
246
        simple_bar_graph(groups,accuracy,ylabel='receiving ratio')#,title='Chunks received ratio')
247
        save_figure(folder,'overall_loss')
248

    
249
def corr_neigh_fairness(folder,groups):
250
  fig = plt.figure()
251
  res_mat = {}
252
  for g in groups:
253
    filename = folder+'/'+g+'/network_fairness.exp'
254
    res_mat[g] = {}
255
    
256
    if os.path.exists(filename):
257
      data = read_csv(filename)
258
      for session in set(data['session_id']): 
259
        sdata = data[data['session_id'] == session]
260
        base_fairness = sdata['complete_overlay_fairness'][sdata.first_valid_index()]
261
        ind = sdata.last_valid_index()
262
        stable_fairness = sdata.ix[ind]['fairness']
263
        
264
        res_mat[g][base_fairness] = stable_fairness
265

    
266
  for g in res_mat.keys():
267
    x = res_mat[g].keys()
268
    y = res_mat[g].values()
269
    perm = sorted(range(len(x)), key=lambda k: x[k])
270
    x = [x[i] for i in perm]
271
    y = [y[i] for i in perm]
272
    plt.plot(x,y,label=g,linestyle="dashed",marker='o')
273
    #plotOrderedErrorBar(res_mat[g].keys(),res_mat[g].values(),label=g,on_x=True)
274
  plt.legend(loc='best')
275
  set_plot_labels(fig,'neighbourhood_fairness','base fairness','fairness')
276
  save_figure(folder,'neighbourhood_fairness')
277

    
278
def corr_convergence_time(folder,groups):
279
  fig = plt.figure()
280
  times = {}
281
  for g in groups:
282
    filename = folder+'/'+g+'/network_impact.exp'
283
    if os.path.exists(filename):
284
      data = read_csv(filename)
285
      min_times = []
286
      for session in set(data['session_id']):
287
        ses_data = data[(data['session_id'] == session)]
288
        ind = ses_data.last_valid_index()
289
        #min_imp = ses_data[ind]['netimpact']
290
        min_imp = ses_data.ix[ind]['netimpact']
291
        min_time = ses_data[ses_data['netimpact'] == min_imp]['time'].min()
292
        min_times.append(min_time)
293
      times[g] = min_times
294
  #s = {x:times[x] for x in sorted(times)}
295
  s = collections.OrderedDict(sorted(times.items()))
296
  plotDictAsBox(fig,s)
297
  set_plot_labels(fig,'','','Convergence time (s)')
298
  save_figure(folder,'neighbourhood_convergence')
299

    
300
def corr_neigh_impact_evolution(folder,groups):
301
  fig = plt.figure()
302
  for g in groups:
303
    filename = folder+'/'+g+'/network_impact.exp'
304
    if os.path.exists(filename):
305
      data = read_csv(filename).drop(['info_type','session_id'],1)
306
      data = data.groupby('time').median() # WARNING : with multiple sessions this value does no longer make sense
307
      plt.plot(data.index,data['netimpact'],label=g)
308
  
309
  plt.legend(loc='best')
310
  set_plot_labels(fig,'neighbourhood_evolution','time (s)','network impact')
311
  save_figure(folder,'neighbourhood_evolution')
312

    
313
def corr_neigh_impact_over_tag(folder,groups):
314
  fig = plt.figure()
315
  plot_data = {}
316
  for g in groups:
317
    if len(g.split('-')) > 1: 
318
      groupkey = g.split('-')[0]
319
      tag = g.split('-')[1]
320
      if groupkey not in plot_data.keys():
321
        plot_data[groupkey] = {}
322

    
323
      filename = folder+'/'+g+'/network_fairness.exp'
324
      if os.path.exists(filename):
325
        data = read_csv(filename) #.drop(['info_type','session_id'],1)
326
        tag = int(''.join([c for c in list(tag) if c.isdigit()]))
327
        
328
        values = []
329
        for sess in set(data['session_id']):
330
          sdata = data[data['session_id'] == sess]
331
          ind = sdata.last_valid_index()
332
          values.append(sdata.ix[ind]['fairness'])
333

    
334
        plot_data[groupkey][tag] = np.mean(values)
335
          
336
  if len(plot_data) > 0:
337
    for g in plot_data.keys():
338
      s = Series(plot_data[g])
339
      plt.plot(s.index,s,label=g)
340
    
341
    plt.legend(loc='best')
342
    set_plot_labels(fig,'','','fairness')
343
    save_figure(folder,'corr_fairness')
344

    
345
def corr_visual_loss_active_peers(folder,groups):
346
        files = []
347
        accuracy = []
348
        for g in groups:
349
                files.append(folder+'/'+g+'/packets_loss.exp')
350
        for f in files:
351
                data = read_csv(f)
352
                data = data[data['losts']/data['chunks'] < 0.5]
353
                chunks = sum(data['chunks'])
354
                losts = sum(data['losts'])
355
                if chunks > 0:
356
                        accuracy.append(1-(losts/chunks))
357
#        accuracy, groups = sort_list(accuracy,groups)
358
        simple_bar_graph(groups,accuracy,title='Chunks received ratio of active peers')
359
        save_figure(folder,'overall_loss_active_peers')
360

    
361
def corr_visual_main(argv):
362
        [folder, save] = get_params(argv)
363
        print "folder is " + folder
364
        groups = get_folders_names(folder)
365
        groups = purify_groups_names(groups)
366
        print "groups are: "+str(groups)
367
        
368
        mpl.rcParams.update({'font.size': 16})
369
        #corr_visual_loss(folder,groups)
370
        #corr_visual_loss_active_peers(folder,groups)
371
        corr_neigh_impact_evolution(folder,groups)
372
        corr_neigh_impact_over_tag(folder,groups)
373
        corr_convergence_time(folder,groups)
374
        corr_neigh_fairness(folder,groups)
375
        #corr_visual_hops(folder,groups)
376
        #corr_visual_delay(folder,groups)
377
        #delayVisualize2(folder,groups)
378

    
379
        if not save:
380
                plt.show()
381

    
382

    
383
if __name__ == "__main__":
384
        corr_visual_main(sys.argv[1:])