Statistics
| Branch: | Revision:

iof-tools / log_parser.py @ ea1321e9

History | View | Annotate | Download (16 KB)

1
#!/usr/bin/env python3
2
import argparse
3
import datetime
4
import re
5
from collections import defaultdict, Counter
6
from os import walk
7
import numpy as np
8
import pprint
9

    
10

    
11
log_types = ['FATAL', 'RECONF']
12

    
13

    
14
class WrongLogLine(Exception):
15
    pass
16

    
17

    
18
class WrongFileName(Exception):
19
    pass
20

    
21

    
22
args = None
23
resolution = 1
24

    
25
def print_in_columns(data, width=15, separator=','):
26
    print(separator.join([str(d).ljust(width) for d in data]))
27

    
28

    
29
def to_unixtime(t):
30
    """ note that this does not work if all timestamps are not from the same time zone """
31
    return int((t - datetime.datetime(1970, 1, 1)).total_seconds()*resolution)
32

    
33

    
34
def parse_args():
35
    global args
36
    global resolution
37
    parser = argparse.ArgumentParser()
38
    parser.add_argument('-f', help='the log file', required=False, nargs='*')
39
    parser.add_argument('-ff', help='Folder with log Folders', required=False)
40
    parser.add_argument('-v', help='be more verbose', default=False,
41
                        action='store_true')
42
    parser.add_argument('-c', help='Compute convergence delay', default=False,
43
                        action='store_true')
44
    parser.add_argument('-t', help='compute the number of updates generated',
45
                        default=False, action='store_true')
46
    parser.add_argument('-T', help='time resolution (s/decimal/cent/milli)',
47
                        default='SEC',
48
                        choices=['SECS', 'DSEC', 'CSEC', 'MSEC'])
49
    parser.add_argument('-d', required=False, default=0, action='store', help="Use this option to see a negative delta")
50
    args = parser.parse_args()
51

    
52
    if not (args.f or args.ff):
53
        parser.error('No folders provided, you have to choose at one type of folder to pass')
54
    if args.f and args.ff:
55
        parser.error('Too much folders provided, you have to choose at ONE type of folder to pass')
56

    
57
    if args.T == 'DSEC':
58
        resolution = 10
59
    elif args.T == 'CSEC':
60
        resolution = 100
61
    elif args.T == 'MSEC':
62
        resolution = 1000
63

    
64

    
65
def parse_line(line, verb):
66
    split_line = line.split()
67
    try:
68
        date_time = split_line[0] + " " + split_line[1]
69
        log_time = to_unixtime(datetime.datetime.strptime(date_time, '%Y-%m-%d %H:%M:%S.%f'))
70
    except (ValueError, IndexError):
71
        if verb:
72
            print("Ignoring this log line:", line)
73
        raise WrongLogLine
74
    log_type = ''.join(split_line[2][1:-1])
75
    if log_type not in log_types:
76
        if verb:
77
            print("Ignoring this log line:", line)
78
        raise WrongLogLine
79
    try:
80
        log_body = line.split('{')[1].split('}')[0]
81
    except IndexError:
82
        if verb:
83
            print("Ignoring this log line:", line)
84
        raise WrongLogLine
85
    log_fields = log_body.split(',')
86
    log_dict = {}
87
    for k, v in [x.split(':') for x in log_fields]:
88
        log_dict[k.strip()] = v.strip()
89
    if log_dict['type'] == 'RECONF':
90
        return log_time, log_dict, True
91
    return log_time, log_dict, False
92

    
93

    
94
def parse_file(fname):
95
    try:
96
        AS_number = int(fname.split('_')[-1][:-4]) + 1
97
    except (ValueError, ):
98
        print('Invalid file name:', fname)
99
        print('I expect a file name of the form log_h_X.log '
100
              'where X is an AS number')
101
        exit()
102
    data = []
103
    reconf_time = None
104
    last_message_before_reconf = None
105
    with open(fname, 'r') as f:
106
        for line in f:
107
            try:
108
                t, d, reconf = parse_line(line, args.v)
109
                if reconf:
110
                    reconf_time = t
111
                    last_message_before_reconf = last_one
112
                last_one = t
113
            except WrongLogLine:
114
                continue
115
            data.append([t, d])
116
    return AS_number, data, last_message_before_reconf, reconf_time
117

    
118

    
119
def compute_convergence_time(data):
120
    convergence_time = ''
121
    best_path = ''
122
    first_log = None
123
    for (t, d) in data:
124
        if not first_log:
125
            first_log = t
126
        if 'processing' in d and d['processing'] == 'NEW_BEST_PATH':
127
            convergence_time = t
128
            best_path = d['actual_best_path']
129
        if 'processing' in d and (d['processing'] == 'REMOVED_REPLACED_PATH'
130
                                  or d['processing'] == 'NEW_PATH'):
131
            if d['actual_best_path'] == 'NONE':
132
                convergence_time = ''
133
                best_path = ''
134
            elif d['actual_best_path'] != best_path:
135
                best_path = d['actual_best_path']
136
                convergence_time = t
137
        if args.v and 'processing' in d:
138
            print(t, d)
139
    if convergence_time:
140
        return convergence_time, first_log
141
    return None, None
142

    
143

    
144
def compute_updates_number(data):
145
    updates = Counter()
146
    min_secs = data[0][0]
147
    max_secs = data[-1][0]
148
    for (t, d) in data:
149
        if d['type'] == 'UPDATE_TX':
150
            updates[t] += 1
151
    return updates, min_secs, max_secs
152

    
153

    
154
def main():
155
    parse_args()
156
    AS_data = defaultdict(dict)
157
    reconf_time = None
158
    last_reconf = None
159
    last_message_before_reconf = None
160
    reconf_ASes = list()
161
    max_key = 0
162
    key_counter = Counter()
163
    AS_data_all = defaultdict(dict)
164
    if args.f and not args.ff:
165
        for fname in args.f:
166
            AS_number, data, last_message_before_tmp, reconf_temp = parse_file(fname)
167
            if reconf_temp:
168
                reconf_time = reconf_temp
169
                last_message_before_reconf = last_message_before_tmp
170
                reconf_ASes.append(AS_number)
171
            if args.c:
172
                convergence_time, first_log = compute_convergence_time(data)
173
                AS_data[AS_number]['convergence_time'] = convergence_time
174
                AS_data[AS_number]['first_log'] = first_log
175
            if args.t:
176
                updates, min_secs, max_secs = compute_updates_number(data)
177
                AS_data[AS_number]['updates'] = updates
178
                AS_data[AS_number]['min_secs'] = min_secs
179
                AS_data[AS_number]['max_secs'] = max_secs
180

    
181
        for AS_number in AS_data:
182
            if AS_data[AS_number]['convergence_time']:
183
                if reconf_time:
184
                    AS_data[AS_number]['convergence_time'] -= reconf_time
185
                else:
186
                    AS_data[AS_number]['convergence_time'] -= AS_data[AS_number]['first_log']
187
            else:
188
                if AS_number in reconf_ASes:
189
                    AS_data[AS_number]['convergence_time'] = 0
190
                else:
191
                    AS_data[AS_number]['convergence_time'] = 1000000
192

    
193
            if 'updates' in AS_data[AS_number]:
194
                new_counter = Counter()
195
                for key in AS_data[AS_number]['updates']:
196
                    new_key = key - reconf_time
197
                    max_key = max(max_key, new_key)
198
                    key_counter[str(AS_number) + str(new_key)] += 1
199
                    value = AS_data[AS_number]['updates'][key]
200
                    new_counter[new_key] = value
201
                AS_data[AS_number]['updates'] = new_counter
202
    else:
203
        dirNames = list()
204
        for (dir_path, dir_names, filenames) in walk(args.ff):
205
            dirNames.extend(dir_names)
206
            break
207
        for dir in dirNames:
208
            fileList = list()
209
            for (dir_path, dir_names, filenames) in walk(args.ff + "/" + dir):
210
                fileList.extend(filenames)
211
                break
212
            if dir not in AS_data_all:
213
                AS_data_all[dir] = defaultdict(dict)
214
            for fname in fileList:
215
                AS_number, data, last_message_before_tmp, reconf_temp = parse_file(args.ff + "/" + dir + "/" + fname)
216
                if reconf_temp:
217
                    reconf_time = reconf_temp
218
                    last_message_before_reconf = last_message_before_tmp
219
                    reconf_ASes.append(AS_number)
220
                if args.c:
221
                    convergence_time, first_log = compute_convergence_time(data)
222
                    AS_data_all[dir][AS_number]['convergence_time'] = convergence_time
223
                    AS_data_all[dir][AS_number]['first_log'] = first_log
224
                if args.t:
225
                    updates, min_secs, max_secs = compute_updates_number(data)
226
                    AS_data_all[dir][AS_number]['updates'] = updates
227
                    AS_data_all[dir][AS_number]['min_secs'] = min_secs
228
                    AS_data_all[dir][AS_number]['max_secs'] = max_secs
229
            for AS_number in AS_data_all[dir]:
230
                if AS_data_all[dir][AS_number]['convergence_time']:
231
                    if reconf_time:
232
                        AS_data_all[dir][AS_number]['convergence_time'] -= reconf_time
233
                    else:
234
                        AS_data_all[dir][AS_number]['convergence_time'] -= AS_data_all[dir][AS_number]['first_log']
235
                else:
236
                    if AS_number in reconf_ASes:
237
                        AS_data_all[dir][AS_number]['convergence_time'] = 0
238
                    else:
239
                        AS_data_all[dir][AS_number]['convergence_time'] = 1000000
240

    
241
                if 'convergence_time' not in AS_data[AS_number]:
242
                    AS_data[AS_number]['convergence_time'] = AS_data_all[dir][AS_number]['convergence_time']
243
                else:
244
                    AS_data[AS_number]['convergence_time'] += AS_data_all[dir][AS_number]['convergence_time']
245

    
246
                if 'updates' in AS_data_all[dir][AS_number]:
247
                    new_counter = Counter()
248
                    for key in AS_data_all[dir][AS_number]['updates']:
249
                        new_key = key - reconf_time
250
                        max_key = max(max_key, new_key)
251
                        key_counter[str(AS_number) + str(new_key)] += 1
252
                        value = AS_data_all[dir][AS_number]['updates'][key]
253
                        new_counter[new_key] = value
254
                    AS_data_all[dir][AS_number]['updates'] = new_counter
255

    
256
                if 'updates' not in AS_data[AS_number]:
257
                    AS_data[AS_number]['updates'] = AS_data_all[dir][AS_number]['updates']
258
                else:
259
                    AS_data[AS_number]['updates'] += AS_data_all[dir][AS_number]['updates']
260

    
261
        for AS_number in AS_data:
262
            AS_data[AS_number]['convergence_time'] /= len(dirNames)
263
            for key in AS_data[AS_number]['updates']:
264
                AS_data[AS_number]['updates'][key] /= key_counter[str(AS_number) + str(key)]
265

    
266
    delta = reconf_time - last_message_before_reconf
267

    
268
    if args.c:
269
        print_in_columns(['AS', 'convergence_time'])
270
        for AS_number, c_data in sorted(AS_data.items()):
271
            print_line = [AS_number, c_data['convergence_time']]
272
            print_in_columns(print_line)
273
        print('\n\n')
274

    
275
        print_in_columns(['time', 'converged_ASes', 'non_converged_ASes', 'total_nodes'])
276
        if int(args.d) > 0:
277
            if delta > int(args.d):
278
                i = int(args.d)
279
                while i > 0:
280
                    print_in_columns(['-' + str(i), str(len(AS_data)), '0', str(len(AS_data))])
281
                    i -= 1
282
        convergence_time = []
283
        never_converged_ASes = 0
284
        non_reconfigured_ASes = 0
285
        last_reconf = 0
286
        for AS_number, c_data in sorted(AS_data.items()):
287
            if 'convergence_time' in c_data:
288
                conv_time = c_data['convergence_time']
289

    
290
                if conv_time >= 0:
291
                    convergence_time.append((AS_number, conv_time))
292
                    if c_data['convergence_time'] > last_reconf:
293
                        last_reconf = c_data['convergence_time']
294
                else:
295
                    non_reconfigured_ASes += 1
296
            else:
297
                if AS_number not in reconf_ASes:
298
                    never_converged_ASes += 1
299
                else:
300
                    convergence_time.append((AS_number, 0))
301
        tot_nodes = len(AS_data)
302
        max_time = max([x[1] for x in convergence_time]) if max_key == 0 else max_key
303
        for i in range(max_time + 1):
304
            conv_ASes = 0
305
            for (AS, t) in convergence_time:
306
                if i >= t:
307
                    conv_ASes += 1
308
            print_in_columns([i, conv_ASes + non_reconfigured_ASes,
309
                              tot_nodes - conv_ASes - non_reconfigured_ASes,
310
                              tot_nodes])
311
        print('\n\n')
312

    
313
    reconf_time = 0
314

    
315
    if args.t:
316
        # here seconds are in unix time
317
        if reconf_time is not None:
318
            reconf_secs = reconf_time
319
        else:
320
            reconf_secs = min([AS_data[x]['min_secs'] for x in AS_data])
321

    
322
        if last_reconf is not None:
323
            end_secs = max_key if max_key != 0 else last_reconf
324
        else:
325
            end_secs = max([AS_data[x]['max_secs'] for x in AS_data])
326

    
327
        integral_on_time = dict()
328
        if args.ff and not args.f:
329
            integral_list = list()
330
            for dir in AS_data_all:
331
                integral_on_time_dir = dict()
332
                for i in range(reconf_time, end_secs + 1):
333
                    if i not in integral_on_time:
334
                        integral_on_time_dir[i] = 0
335
                    for as_number in AS_data_all[dir]:
336
                        integral_on_time_dir[i] += AS_data_all[dir][as_number]['updates'][i]
337
                print(dir, integral_on_time_dir)
338
                for i in range(reconf_time + 1, end_secs + 1):
339
                    integral_on_time_dir[i] += integral_on_time_dir[i - 1]
340
                integral_list.append(integral_on_time_dir)
341
            for integral in integral_list:
342
                print(integral)
343
            for i in range(reconf_time, end_secs + 1):
344
                for integral in integral_list:
345
                    if i not in integral_on_time:
346
                        integral_on_time[i] = 0
347
                    integral_on_time[i] += integral[i]
348
                integral_on_time[i] /= len(integral_list)
349

    
350
        print_in_columns(['time'] + ['sum'] + sorted(AS_data.keys()), width=4)
351
        if int(args.d) > 0:
352
            if delta > int(args.d):
353
                i = int(args.d)
354
                while i > 0:
355
                    print_in_columns(['-' + str(i)] + ['0'] + ['0' for x in AS_data.keys()], width=4)
356
                    i -= 1
357
        # just a check that we are not leving any number behind
358
        control_total = 0
359
        for i in range(reconf_time, end_secs+1):
360
            print_list = []
361
            tot_udp = 0
362
            for (AS_number, c_data) in sorted(AS_data.items()):
363
                upd = c_data['updates'].get(i, 0)
364
                tot_udp += upd
365
                print_list.append(str(upd))
366
            print_list.insert(0, tot_udp)
367
            print_list.insert(0, str(i-reconf_secs))
368
            print_in_columns(print_list, width=4)
369
            control_total += tot_udp
370
        tot_updates = 0
371
        for (AS_number, c_data) in sorted(AS_data.items()):
372
            for k,v in c_data['updates'].items():
373
                if k >= reconf_time and k < end_secs + 1:
374
                    tot_updates += v
375
        if (tot_updates != control_total):
376
            print("Error in counting updates")
377
        print('\n\n')
378

    
379
        print_in_columns(['tim','sum'])
380
        #for i in range(reconf_time-int(args.d), end_secs+1):
381
        #   print_in_columns([str(i), str(tot_updates)])
382
        if int(args.d) > 0:
383
            if delta > int(args.d):
384
                i = int(args.d)
385
                while i > 0:
386
                    print_in_columns(['-' + str(i)] + ['0'], width=4)
387
                    i -= 1
388
        total_upd = 0
389
        if args.f and not args.ff:
390
            for i in range(reconf_time, end_secs+1):
391
                for (AS_number, c_data) in sorted(AS_data.items()):
392
                    upd = c_data['updates'][i]
393
                    total_upd += upd
394
                print_in_columns([str(i), str(total_upd)], width=4)
395
        else:
396
            for i in range(reconf_time, end_secs+1):
397
                """counter = 0
398
                delta = 0
399
                for (AS_number, c_data) in sorted(AS_data.items()):
400
                    upd = 0
401
                    print(AS_number, c_data['updates'])
402
                    if i in c_data['updates']:
403
                        counter += 1
404
                        upd = c_data['updates'][i]
405
                        delta += upd
406
                    if upd > 0:
407
                        total_upd += upd/counter"""
408
                print_in_columns([str(i), str(integral_on_time[i])], width=4)
409
main()