nepatest_popbabel / fasttimerouter.py @ 60ba786f
History | View | Annotate | Download (7.86 KB)
1 |
import glob |
---|---|
2 |
import json |
3 |
from pprint import pprint |
4 |
import sys |
5 |
import os |
6 |
import numpy as np |
7 |
import matplotlib |
8 |
matplotlib.use('Agg')
|
9 |
from matplotlib import pyplot as plt |
10 |
from pprint import pprint |
11 |
import code |
12 |
sys.path.append('mylib')
|
13 |
from mytime import MyTime |
14 |
from RoutingException import Retracted, DestNotFound |
15 |
|
16 |
|
17 |
#ok, let me call this function like this...
|
18 |
#currently works only because ALL routes are \32 addresses
|
19 |
def longestPrefixMatchNextHop(rt,dest): |
20 |
for d in rt.keys(): |
21 |
if d.startswith(dest):
|
22 |
nh,cost=rt[d] |
23 |
if(cost<255): |
24 |
return nh
|
25 |
else:
|
26 |
raise Retracted("DEST "+str(dest)+" RETRACTED, blackhole!") |
27 |
raise DestNotFound("DEST 404: "+str(dest)) |
28 |
|
29 |
#time2N2RT: <secondo, N2RT>
|
30 |
time2N2RT={} |
31 |
#N2RT: <node_id, RT>
|
32 |
#N2RT = {}
|
33 |
#RT: <dest, next_hop>
|
34 |
#ip2node: <ip, node_id>
|
35 |
|
36 |
def navigateRoutingTable(node2rt,ip2node,node2ips,sec,failedNodesId): |
37 |
ips=ip2node.keys() |
38 |
nodes=node2rt.keys() |
39 |
|
40 |
#remove ips of failed_nodes, given we want to check how many routes among ips of active nodes pass trough the dead node
|
41 |
remIps=node2ips[failedNodesId] |
42 |
ips = list(set(ips) - set(remIps)) |
43 |
|
44 |
black_holes=0
|
45 |
loops=0
|
46 |
routes=[] |
47 |
|
48 |
for source_node in nodes: |
49 |
if source_node==failedNodesId: continue |
50 |
source_ips=node2ips[source_node] |
51 |
s=source_ips[0] #we choose first IP of src as representative for src_ips |
52 |
for d in ips: |
53 |
if (d in source_ips): continue |
54 |
#print "\t\tlooking for route from "+str(s)+"..->.."+str(d)
|
55 |
#r=route->list of IPs crossed
|
56 |
r = [] |
57 |
nr = [] |
58 |
r.append(s) |
59 |
current_node=ip2node[s] |
60 |
nr.append(current_node) |
61 |
target_node=ip2node[d] |
62 |
|
63 |
while(current_node!=target_node):
|
64 |
try:
|
65 |
nh=longestPrefixMatchNextHop(node2rt[current_node],d) |
66 |
except DestNotFound as dnf: |
67 |
#current_node non conosce la destinazione...
|
68 |
#print dnf, "flushed? disconnected ntw?"
|
69 |
break
|
70 |
except KeyError as e: |
71 |
#print e, "is dead"
|
72 |
#current node does not own a RT, is he dead?
|
73 |
if(current_node==failedNodesId):
|
74 |
black_holes+=len(source_ips)
|
75 |
break
|
76 |
else:
|
77 |
print e,sec, "Bad logging frequency!" |
78 |
break
|
79 |
except Retracted as rte: |
80 |
#print rte
|
81 |
black_holes+=len(source_ips)
|
82 |
break
|
83 |
if (nh not in r): |
84 |
r.append(nh) |
85 |
current_node=ip2node[nh] |
86 |
nr.append(current_node) |
87 |
else:
|
88 |
loops+=len(source_ips)
|
89 |
#print "s,d,nh",s,d,nh
|
90 |
#print r
|
91 |
break
|
92 |
print "\t#loops: "+str(loops)+", blackholes: "+str(black_holes) |
93 |
return (black_holes,loops,)
|
94 |
|
95 |
|
96 |
#the folder where to look for dumps and logs
|
97 |
folder=sys.argv[1]
|
98 |
granulartiy=None
|
99 |
if len(sys.argv) > 2: |
100 |
granulartiy=sys.argv[2] #0.5=ogni mezzo sec, 1=ogni sec |
101 |
print "Check will be done loading only 1 RT for each second" |
102 |
print folder
|
103 |
|
104 |
ip2node={} |
105 |
node2ips={} |
106 |
|
107 |
print "###########\n\n\n\n" |
108 |
#Ricostruzione RT indicizzate per tempo
|
109 |
#bisogna anche salvarsi il tempo di rottura del nodo che e' morto
|
110 |
minDumpsLength=sys.maxint |
111 |
failedNodesId=""
|
112 |
fail_time=MyTime() |
113 |
stopTimes=[] |
114 |
stopTimes2n={} |
115 |
|
116 |
|
117 |
for fname in glob.glob(folder+'topo*'): |
118 |
print "Loading json from " + fname |
119 |
f=open(fname, "r") |
120 |
jtimedroutes=json.load(f) |
121 |
del(jtimedroutes[0]) #the first dummy entry always put in Babel dumps |
122 |
|
123 |
#identifyinf failed node as the one with minimun number of dumps
|
124 |
'''if (len(jtimedroutes)<minDumpsLength):
|
125 |
minDumpsLength=len(jtimedroutes)
|
126 |
lastDump=jtimedroutes[-1]
|
127 |
failedNodesId=lastDump['router_id']
|
128 |
fail_time=MyTime(lastDump['topology_id'])
|
129 |
print "\tFAILURE NEWS: "+failedNodesId+" stopped working at "+fail_time.toString()'''
|
130 |
|
131 |
#put stopTime in a vector to see which is the first correct node that stopped logging
|
132 |
lastDump=jtimedroutes[-1]
|
133 |
router_id=lastDump['router_id']
|
134 |
stopTime=MyTime(lastDump['topology_id'])
|
135 |
stopTimes.append(stopTime) |
136 |
stopTimes2n[stopTime.asHMSUVector()]=router_id |
137 |
|
138 |
#pprint(time2N2RT)
|
139 |
#Retrieving RTs dumped by this node
|
140 |
for jtimedRT in jtimedroutes: |
141 |
router_id=jtimedRT['router_id']
|
142 |
stime=jtimedRT['topology_id']
|
143 |
routes=jtimedRT['routes']
|
144 |
#print "\tAt time: "+stime+" we have an RT for node: "+router_id
|
145 |
time=MyTime(stime) |
146 |
if granulartiy:
|
147 |
tk=time.asHMSVector() |
148 |
else:
|
149 |
tk=time.asHMSUVector() |
150 |
#tk=tk[0]*60*60+tk[1]*60+tk[2]
|
151 |
|
152 |
if (not time2N2RT or tk not in time2N2RT): |
153 |
time2N2RT[tk]={} |
154 |
#print "New second",router_id,tk
|
155 |
if (not time2N2RT[tk] or router_id not in time2N2RT[tk]): |
156 |
time2N2RT[tk][router_id]={} |
157 |
#print "\tNew router for ",router_id,tk
|
158 |
|
159 |
thisnodeIPs=[] |
160 |
for r in routes: |
161 |
#RT=time2N2RT[tk][router_id]
|
162 |
dest=r['destination']
|
163 |
nexthop=r['next']
|
164 |
cost=float(r['cost']) |
165 |
if(cost==0): |
166 |
nexthop=dest.split('/')[0] |
167 |
thisnodeIPs.append(nexthop) |
168 |
ip2node[nexthop]=router_id |
169 |
|
170 |
time2N2RT[tk][router_id][dest]=(nexthop,cost) |
171 |
|
172 |
node2ips[router_id]=thisnodeIPs |
173 |
#N2RT[router_id]=RT
|
174 |
del(jtimedroutes)
|
175 |
f.close() |
176 |
|
177 |
|
178 |
stopTimes=sorted(stopTimes)
|
179 |
fail_time=stopTimes[0]
|
180 |
failedNodesId=stopTimes2n[fail_time.asHMSUVector()] |
181 |
stopLogTime=stopTimes[1] #[0] -> fail node stop |
182 |
|
183 |
print "According to data read from json it seems that" |
184 |
print "Failure of " + failedNodesId + " at: "+fail_time.toString() |
185 |
print "Stop log at: "+stopLogTime.toString() |
186 |
|
187 |
#code.interact(local=dict(globals(), **locals()))
|
188 |
|
189 |
print "#################\n\n\n\n" |
190 |
srtsec=sorted(time2N2RT.keys())
|
191 |
#print "Times"
|
192 |
#pprint(srtsec)
|
193 |
#print "\n\n"
|
194 |
'''for sec in srtsec:
|
195 |
print "Al secondo: "+ MyTime.vecToString(sec)
|
196 |
for node in time2N2RT[sec]:
|
197 |
print "\t\tRT("+str(node)+")={"
|
198 |
for dest in time2N2RT[sec][node]:
|
199 |
print "\t\t\t\t"+str(dest)+" via "+str(time2N2RT[sec][node][dest])
|
200 |
print "\t\t}"
|
201 |
print "\n"'''
|
202 |
|
203 |
print "#################\n\n\n\n" |
204 |
#Navigazione e conteggio disagi:
|
205 |
results={} #<time,vettore risultati>: disagi per secondo
|
206 |
|
207 |
def checkIfItsEnough(results,sec,fail_time): |
208 |
if len(results)>12: |
209 |
lastResKeys=sorted(results.keys())[-12:] |
210 |
retval=True
|
211 |
for k in lastResKeys: |
212 |
retval=retval and (results[k]==(0,0)) |
213 |
return retval
|
214 |
else:
|
215 |
return False |
216 |
|
217 |
|
218 |
print "Looking for problems in network for each period" |
219 |
done=0.0
|
220 |
over=len(srtsec)
|
221 |
for sec in srtsec: |
222 |
done+=1.0
|
223 |
print "\nProblems at time: "+MyTime.vecToString(sec)+"? Progress: %.2f" % (done / over * 100.0) |
224 |
#da tenere conto di nodo_fallito e fail_time
|
225 |
#non analizziamo i log precedenti a fail_time
|
226 |
|
227 |
if (fail_time.vecCompare(sec)>0): |
228 |
print "\t"+MyTime.vecToString(sec) + " prior than fail_time: "+fail_time.toString() |
229 |
#to save memory
|
230 |
del(time2N2RT[sec])
|
231 |
continue
|
232 |
|
233 |
if (stopLogTime.vecCompare(sec)<0): |
234 |
print "\t"+MyTime.vecToString(sec) + " after than stopLogTime: "+stopLogTime.toString() |
235 |
#to save memory
|
236 |
del(time2N2RT[sec])
|
237 |
break
|
238 |
|
239 |
#"else" failTime < sec < StopLogTime ==> it is worth to navigate RTs
|
240 |
#gestione buchi nei dump, recuperiamo ultima RT valida se ne manca qualcuna
|
241 |
secNode2rt=time2N2RT[sec] |
242 |
missingKeys=set(node2ips.keys())-set(secNode2rt.keys()) |
243 |
missingKeys.discard(failedNodesId) |
244 |
if (len(missingKeys)>0): |
245 |
#code.interact(local=dict(globals(), **locals()))
|
246 |
try:
|
247 |
secBef=MyTime.nextHMSUVOne(sec) |
248 |
except:
|
249 |
continue
|
250 |
for m in missingKeys: |
251 |
if (m in time2N2RT[secBef]): |
252 |
print "Dumping hole closed...for node", m |
253 |
secNode2rt[m]=time2N2RT[secBef][m] |
254 |
else:
|
255 |
print "Dumping hole hard to close...skip analysis XD" |
256 |
res=navigateRoutingTable(secNode2rt,ip2node,node2ips,sec,failedNodesId) |
257 |
results[sec]=res |
258 |
#to save memory
|
259 |
del(time2N2RT[sec])
|
260 |
enough=checkIfItsEnough(results,sec,fail_time) |
261 |
if (enough):
|
262 |
print "OK, network seems already reorganized, no need to analyze more logs" |
263 |
break
|
264 |
|
265 |
|
266 |
print "#######\n\n\n" |
267 |
|
268 |
#Visualiza data in pretty table
|
269 |
from prettytable import PrettyTable |
270 |
t = PrettyTable(['Time', '#loops','#blackholes']) |
271 |
|
272 |
#sava data in csv file
|
273 |
prefix=folder.replace("/","-") |
274 |
f = open(folder+prefix+"results.csv", "w") |
275 |
f.write("Time,loops,blackholes,total\n")
|
276 |
for sec in sorted(results.keys()): |
277 |
#print "At sec: "+MyTime.vecToString(sec)+" #loops: "+str(results[sec][1]) + " #blackholes: "+str(results[sec][0])
|
278 |
t.add_row([ MyTime.vecToString(sec), str(results[sec][1]), str(results[sec][0])]) |
279 |
f.write(MyTime.vecToString(sec) +","+ str(results[sec][1])+","+ str(results[sec][0])+","+str(sum(results[sec]))+"\n") |
280 |
f.close() |
281 |
print t
|