iof-bird-daemon / proto / bgp / packets.c @ 1389f369
History | View | Annotate | Download (22.8 KB)
1 |
/*
|
---|---|
2 |
* BIRD -- BGP Packet Processing
|
3 |
*
|
4 |
* (c) 2000 Martin Mares <mj@ucw.cz>
|
5 |
*
|
6 |
* Can be freely distributed and used under the terms of the GNU GPL.
|
7 |
*/
|
8 |
|
9 |
#undef LOCAL_DEBUG
|
10 |
|
11 |
#include "nest/bird.h" |
12 |
#include "nest/iface.h" |
13 |
#include "nest/protocol.h" |
14 |
#include "nest/route.h" |
15 |
#include "nest/attrs.h" |
16 |
#include "conf/conf.h" |
17 |
#include "lib/unaligned.h" |
18 |
#include "lib/socket.h" |
19 |
|
20 |
#include "bgp.h" |
21 |
|
22 |
static byte *
|
23 |
bgp_create_notification(struct bgp_conn *conn, byte *buf)
|
24 |
{ |
25 |
struct bgp_proto *p = conn->bgp;
|
26 |
|
27 |
BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
|
28 |
buf[0] = conn->notify_code;
|
29 |
buf[1] = conn->notify_subcode;
|
30 |
memcpy(buf+2, conn->notify_data, conn->notify_size);
|
31 |
return buf + 2 + conn->notify_size; |
32 |
} |
33 |
|
34 |
#ifdef IPV6
|
35 |
static byte *
|
36 |
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
|
37 |
{ |
38 |
*buf++ = 1; /* Capability 1: Multiprotocol extensions */ |
39 |
*buf++ = 4; /* Capability data length */ |
40 |
*buf++ = 0; /* We support AF IPv6 */ |
41 |
*buf++ = BGP_AF_IPV6; |
42 |
*buf++ = 0; /* RFU */ |
43 |
*buf++ = 1; /* and SAFI 1 */ |
44 |
return buf;
|
45 |
} |
46 |
#endif
|
47 |
|
48 |
static byte *
|
49 |
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
|
50 |
{ |
51 |
*buf++ = 65; /* Capability 65: Support for 4-octet AS number */ |
52 |
*buf++ = 4; /* Capability data length */ |
53 |
put_u32(buf, conn->bgp->local_as); |
54 |
return buf + 4; |
55 |
} |
56 |
|
57 |
static byte *
|
58 |
bgp_create_open(struct bgp_conn *conn, byte *buf)
|
59 |
{ |
60 |
struct bgp_proto *p = conn->bgp;
|
61 |
byte *cap; |
62 |
int cap_len;
|
63 |
|
64 |
BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
|
65 |
BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id); |
66 |
buf[0] = BGP_VERSION;
|
67 |
put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS); |
68 |
put_u16(buf+3, p->cf->hold_time);
|
69 |
put_u32(buf+5, p->local_id);
|
70 |
/* Skipped 3 B for length field and Capabilities parameter header */
|
71 |
cap = buf + 12;
|
72 |
|
73 |
#ifdef IPV6
|
74 |
cap = bgp_put_cap_ipv6(conn, cap); |
75 |
#endif
|
76 |
if (p->cf->enable_as4)
|
77 |
cap = bgp_put_cap_as4(conn, cap); |
78 |
|
79 |
cap_len = cap - buf - 12;
|
80 |
if (cap_len > 0) |
81 |
{ |
82 |
buf[9] = cap_len + 2; /* Optional params len */ |
83 |
buf[10] = 2; /* Option: Capability list */ |
84 |
buf[11] = cap_len; /* Option length */ |
85 |
return cap;
|
86 |
} |
87 |
else
|
88 |
{ |
89 |
buf[9] = 0; /* No optional parameters */ |
90 |
return buf + 10; |
91 |
} |
92 |
} |
93 |
|
94 |
static unsigned int |
95 |
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains) |
96 |
{ |
97 |
byte *start = w; |
98 |
ip_addr a; |
99 |
int bytes;
|
100 |
|
101 |
while (!EMPTY_LIST(buck->prefixes) && remains >= 5) |
102 |
{ |
103 |
struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes)); |
104 |
DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
|
105 |
*w++ = px->n.pxlen; |
106 |
bytes = (px->n.pxlen + 7) / 8; |
107 |
a = px->n.prefix; |
108 |
ipa_hton(a); |
109 |
memcpy(w, &a, bytes); |
110 |
w += bytes; |
111 |
remains -= bytes + 1;
|
112 |
rem_node(&px->bucket_node); |
113 |
fib_delete(&p->prefix_fib, px); |
114 |
} |
115 |
return w - start;
|
116 |
} |
117 |
|
118 |
#ifndef IPV6 /* IPv4 version */ |
119 |
|
120 |
static byte *
|
121 |
bgp_create_update(struct bgp_conn *conn, byte *buf)
|
122 |
{ |
123 |
struct bgp_proto *p = conn->bgp;
|
124 |
struct bgp_bucket *buck;
|
125 |
int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4; |
126 |
byte *w; |
127 |
int wd_size = 0; |
128 |
int r_size = 0; |
129 |
int a_size = 0; |
130 |
|
131 |
w = buf+2;
|
132 |
if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
|
133 |
{ |
134 |
DBG("Withdrawn routes:\n");
|
135 |
wd_size = bgp_encode_prefixes(p, w, buck, remains); |
136 |
w += wd_size; |
137 |
remains -= wd_size; |
138 |
} |
139 |
put_u16(buf, wd_size); |
140 |
|
141 |
if (remains >= 2048) |
142 |
{ |
143 |
while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next) |
144 |
{ |
145 |
if (EMPTY_LIST(buck->prefixes))
|
146 |
{ |
147 |
DBG("Deleting empty bucket %p\n", buck);
|
148 |
rem_node(&buck->send_node); |
149 |
bgp_free_bucket(p, buck); |
150 |
continue;
|
151 |
} |
152 |
DBG("Processing bucket %p\n", buck);
|
153 |
a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 1024); |
154 |
put_u16(w, a_size); |
155 |
w += a_size + 2;
|
156 |
r_size = bgp_encode_prefixes(p, w, buck, remains - a_size); |
157 |
w += r_size; |
158 |
break;
|
159 |
} |
160 |
} |
161 |
if (!a_size) /* Attributes not already encoded */ |
162 |
{ |
163 |
put_u16(w, 0);
|
164 |
w += 2;
|
165 |
} |
166 |
if (wd_size || r_size)
|
167 |
{ |
168 |
BGP_TRACE(D_PACKETS, "Sending UPDATE");
|
169 |
return w;
|
170 |
} |
171 |
else
|
172 |
return NULL; |
173 |
} |
174 |
|
175 |
#else /* IPv6 version */ |
176 |
|
177 |
static byte *
|
178 |
bgp_create_update(struct bgp_conn *conn, byte *buf)
|
179 |
{ |
180 |
struct bgp_proto *p = conn->bgp;
|
181 |
struct bgp_bucket *buck;
|
182 |
int size, is_ll;
|
183 |
int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4; |
184 |
byte *w, *tmp, *tstart; |
185 |
ip_addr ip, ip_ll; |
186 |
ea_list *ea; |
187 |
eattr *nh; |
188 |
neighbor *n; |
189 |
|
190 |
put_u16(buf, 0);
|
191 |
w = buf+4;
|
192 |
|
193 |
if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
|
194 |
{ |
195 |
DBG("Withdrawn routes:\n");
|
196 |
tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
|
197 |
*tmp++ = 0;
|
198 |
*tmp++ = BGP_AF_IPV6; |
199 |
*tmp++ = 1;
|
200 |
ea->attrs[0].u.ptr->length = bgp_encode_prefixes(p, tmp, buck, remains-11); |
201 |
size = bgp_encode_attrs(p, w, ea, remains); |
202 |
w += size; |
203 |
remains -= size; |
204 |
} |
205 |
|
206 |
if (remains >= 2048) |
207 |
{ |
208 |
while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next) |
209 |
{ |
210 |
if (EMPTY_LIST(buck->prefixes))
|
211 |
{ |
212 |
DBG("Deleting empty bucket %p\n", buck);
|
213 |
rem_node(&buck->send_node); |
214 |
bgp_free_bucket(p, buck); |
215 |
continue;
|
216 |
} |
217 |
DBG("Processing bucket %p\n", buck);
|
218 |
size = bgp_encode_attrs(p, w, buck->eattrs, 1024);
|
219 |
w += size; |
220 |
remains -= size; |
221 |
tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
|
222 |
*tmp++ = 0;
|
223 |
*tmp++ = BGP_AF_IPV6; |
224 |
*tmp++ = 1;
|
225 |
nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); |
226 |
ASSERT(nh); |
227 |
ip = *(ip_addr *) nh->u.ptr->data; |
228 |
is_ll = 0;
|
229 |
if (ipa_equal(ip, p->local_addr))
|
230 |
{ |
231 |
is_ll = 1;
|
232 |
ip_ll = p->local_link; |
233 |
} |
234 |
else
|
235 |
{ |
236 |
n = neigh_find(&p->p, &ip, 0);
|
237 |
if (n && n->iface == p->neigh->iface)
|
238 |
{ |
239 |
/* FIXME: We are assuming the global scope addresses use the lower 64 bits
|
240 |
* as an interface identifier which hasn't necessarily to be true.
|
241 |
*/
|
242 |
is_ll = 1;
|
243 |
ip_ll = ipa_or(ipa_build(0xfe800000,0,0,0), ipa_and(ip, ipa_build(0,0,~0,~0))); |
244 |
} |
245 |
} |
246 |
if (is_ll)
|
247 |
{ |
248 |
*tmp++ = 32;
|
249 |
ipa_hton(ip); |
250 |
memcpy(tmp, &ip, 16);
|
251 |
ipa_hton(ip_ll); |
252 |
memcpy(tmp+16, &ip_ll, 16); |
253 |
tmp += 32;
|
254 |
} |
255 |
else
|
256 |
{ |
257 |
*tmp++ = 16;
|
258 |
ipa_hton(ip); |
259 |
memcpy(tmp, &ip, 16);
|
260 |
tmp += 16;
|
261 |
} |
262 |
*tmp++ = 0; /* No SNPA information */ |
263 |
tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1)); |
264 |
ea->attrs[0].u.ptr->length = tmp - tstart;
|
265 |
w += bgp_encode_attrs(p, w, ea, remains); |
266 |
break;
|
267 |
} |
268 |
} |
269 |
|
270 |
size = w - (buf+4);
|
271 |
put_u16(buf+2, size);
|
272 |
lp_flush(bgp_linpool); |
273 |
if (size)
|
274 |
{ |
275 |
BGP_TRACE(D_PACKETS, "Sending UPDATE");
|
276 |
return w;
|
277 |
} |
278 |
else
|
279 |
return NULL; |
280 |
} |
281 |
|
282 |
#endif
|
283 |
|
284 |
static void |
285 |
bgp_create_header(byte *buf, unsigned int len, unsigned int type) |
286 |
{ |
287 |
memset(buf, 0xff, 16); /* Marker */ |
288 |
put_u16(buf+16, len);
|
289 |
buf[18] = type;
|
290 |
} |
291 |
|
292 |
/**
|
293 |
* bgp_fire_tx - transmit packets
|
294 |
* @conn: connection
|
295 |
*
|
296 |
* Whenever the transmit buffers of the underlying TCP connection
|
297 |
* are free and we have any packets queued for sending, the socket functions
|
298 |
* call bgp_fire_tx() which takes care of selecting the highest priority packet
|
299 |
* queued (Notification > Keepalive > Open > Update), assembling its header
|
300 |
* and body and sending it to the connection.
|
301 |
*/
|
302 |
static int |
303 |
bgp_fire_tx(struct bgp_conn *conn)
|
304 |
{ |
305 |
struct bgp_proto *p = conn->bgp;
|
306 |
unsigned int s = conn->packets_to_send; |
307 |
sock *sk = conn->sk; |
308 |
byte *buf, *pkt, *end; |
309 |
int type;
|
310 |
|
311 |
if (!sk)
|
312 |
{ |
313 |
conn->packets_to_send = 0;
|
314 |
return 0; |
315 |
} |
316 |
buf = sk->tbuf; |
317 |
pkt = buf + BGP_HEADER_LENGTH; |
318 |
|
319 |
if (s & (1 << PKT_SCHEDULE_CLOSE)) |
320 |
{ |
321 |
bgp_close_conn(conn); |
322 |
return 0; |
323 |
} |
324 |
if (s & (1 << PKT_NOTIFICATION)) |
325 |
{ |
326 |
s = 1 << PKT_SCHEDULE_CLOSE;
|
327 |
type = PKT_NOTIFICATION; |
328 |
end = bgp_create_notification(conn, pkt); |
329 |
} |
330 |
else if (s & (1 << PKT_KEEPALIVE)) |
331 |
{ |
332 |
s &= ~(1 << PKT_KEEPALIVE);
|
333 |
type = PKT_KEEPALIVE; |
334 |
end = pkt; /* Keepalives carry no data */
|
335 |
BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
|
336 |
bgp_start_timer(conn->keepalive_timer, conn->keepalive_time); |
337 |
} |
338 |
else if (s & (1 << PKT_OPEN)) |
339 |
{ |
340 |
s &= ~(1 << PKT_OPEN);
|
341 |
type = PKT_OPEN; |
342 |
end = bgp_create_open(conn, pkt); |
343 |
} |
344 |
else if (s & (1 << PKT_UPDATE)) |
345 |
{ |
346 |
end = bgp_create_update(conn, pkt); |
347 |
type = PKT_UPDATE; |
348 |
if (!end)
|
349 |
{ |
350 |
conn->packets_to_send = 0;
|
351 |
return 0; |
352 |
} |
353 |
} |
354 |
else
|
355 |
return 0; |
356 |
conn->packets_to_send = s; |
357 |
bgp_create_header(buf, end - buf, type); |
358 |
return sk_send(sk, end - buf);
|
359 |
} |
360 |
|
361 |
/**
|
362 |
* bgp_schedule_packet - schedule a packet for transmission
|
363 |
* @conn: connection
|
364 |
* @type: packet type
|
365 |
*
|
366 |
* Schedule a packet of type @type to be sent as soon as possible.
|
367 |
*/
|
368 |
void
|
369 |
bgp_schedule_packet(struct bgp_conn *conn, int type) |
370 |
{ |
371 |
DBG("BGP: Scheduling packet type %d\n", type);
|
372 |
conn->packets_to_send |= 1 << type;
|
373 |
if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
|
374 |
while (bgp_fire_tx(conn))
|
375 |
; |
376 |
} |
377 |
|
378 |
void
|
379 |
bgp_tx(sock *sk) |
380 |
{ |
381 |
struct bgp_conn *conn = sk->data;
|
382 |
|
383 |
DBG("BGP: TX hook\n");
|
384 |
while (bgp_fire_tx(conn))
|
385 |
; |
386 |
} |
387 |
|
388 |
/* Capatibility negotiation as per RFC 2842 */
|
389 |
|
390 |
void
|
391 |
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) |
392 |
{ |
393 |
struct bgp_proto *p = conn->bgp;
|
394 |
int cl;
|
395 |
u32 as; |
396 |
|
397 |
while (len > 0) |
398 |
{ |
399 |
if (len < 2 || len < 2 + opt[1]) |
400 |
goto err;
|
401 |
|
402 |
cl = opt[1];
|
403 |
|
404 |
switch (opt[0]) |
405 |
{ |
406 |
case 65: |
407 |
if (cl != 4) |
408 |
goto err;
|
409 |
p->as4_support = 1;
|
410 |
p->as4_session = p->cf->enable_as4; |
411 |
if (p->as4_session)
|
412 |
conn->advertised_as = get_u32(opt + 2);
|
413 |
break;
|
414 |
|
415 |
/* We can safely ignore all other capabilities */
|
416 |
} |
417 |
len -= 2 + cl;
|
418 |
opt += 2 + cl;
|
419 |
} |
420 |
return;
|
421 |
|
422 |
err:
|
423 |
bgp_error(conn, 2, 0, NULL, 0); |
424 |
return;
|
425 |
} |
426 |
|
427 |
static int |
428 |
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len) |
429 |
{ |
430 |
int ol;
|
431 |
|
432 |
while (len > 0) |
433 |
{ |
434 |
if (len < 2 || len < 2 + opt[1]) |
435 |
{ bgp_error(conn, 2, 0, NULL, 0); return 0; } |
436 |
#ifdef LOCAL_DEBUG
|
437 |
{ |
438 |
int i;
|
439 |
DBG("\tOption %02x:", opt[0]); |
440 |
for(i=0; i<opt[1]; i++) |
441 |
DBG(" %02x", opt[2+i]); |
442 |
DBG("\n");
|
443 |
} |
444 |
#endif
|
445 |
|
446 |
ol = opt[1];
|
447 |
switch (opt[0]) |
448 |
{ |
449 |
case 2: |
450 |
bgp_parse_capabilities(conn, opt + 2, ol);
|
451 |
break;
|
452 |
|
453 |
default:
|
454 |
/*
|
455 |
* BGP specs don't tell us to send which option
|
456 |
* we didn't recognize, but it's common practice
|
457 |
* to do so. Also, capability negotiation with
|
458 |
* Cisco routers doesn't work without that.
|
459 |
*/
|
460 |
bgp_error(conn, 2, 4, opt, ol); |
461 |
return 0; |
462 |
} |
463 |
len -= 2 + ol;
|
464 |
opt += 2 + ol;
|
465 |
} |
466 |
return 0; |
467 |
} |
468 |
|
469 |
static void |
470 |
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) |
471 |
{ |
472 |
struct bgp_conn *other;
|
473 |
struct bgp_proto *p = conn->bgp;
|
474 |
struct bgp_config *cf = p->cf;
|
475 |
unsigned hold;
|
476 |
u32 id; |
477 |
|
478 |
/* Check state */
|
479 |
if (conn->state != BS_OPENSENT)
|
480 |
{ bgp_error(conn, 5, 0, NULL, 0); } |
481 |
|
482 |
/* Check message contents */
|
483 |
if (len < 29 || len != 29 + pkt[28]) |
484 |
{ bgp_error(conn, 1, 2, pkt+16, 2); return; } |
485 |
if (pkt[19] != BGP_VERSION) |
486 |
{ bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */ |
487 |
conn->advertised_as = get_u16(pkt+20);
|
488 |
hold = get_u16(pkt+22);
|
489 |
id = get_u32(pkt+24);
|
490 |
BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
|
491 |
|
492 |
p->remote_id = id; // ???
|
493 |
if (bgp_parse_options(conn, pkt+29, pkt[28])) |
494 |
return;
|
495 |
|
496 |
if (hold > 0 && hold < 3) |
497 |
{ bgp_error(conn, 2, 6, pkt+22, 2); return; } |
498 |
|
499 |
if (!id || id == 0xffffffff || id == p->local_id) |
500 |
{ bgp_error(conn, 2, 3, pkt+24, -4); return; } |
501 |
|
502 |
|
503 |
if (conn->advertised_as != p->remote_as)
|
504 |
{ |
505 |
bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return; |
506 |
} |
507 |
|
508 |
/* Check the other connection */
|
509 |
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn; |
510 |
switch (other->state)
|
511 |
{ |
512 |
case BS_IDLE:
|
513 |
case BS_CONNECT:
|
514 |
case BS_ACTIVE:
|
515 |
case BS_OPENSENT:
|
516 |
break;
|
517 |
case BS_OPENCONFIRM:
|
518 |
if ((p->local_id < id) == (conn == &p->incoming_conn))
|
519 |
{ |
520 |
/* Should close the other connection */
|
521 |
BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
|
522 |
bgp_error(other, 6, 0, NULL, 0); |
523 |
break;
|
524 |
} |
525 |
/* Fall thru */
|
526 |
case BS_ESTABLISHED:
|
527 |
/* Should close this connection */
|
528 |
BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
|
529 |
bgp_error(conn, 6, 0, NULL, 0); |
530 |
return;
|
531 |
default:
|
532 |
bug("bgp_rx_open: Unknown state");
|
533 |
} |
534 |
|
535 |
/* Make this connection primary */
|
536 |
conn->primary = 1;
|
537 |
p->conn = conn; |
538 |
|
539 |
/* Update our local variables */
|
540 |
if (hold < p->cf->hold_time)
|
541 |
conn->hold_time = hold; |
542 |
else
|
543 |
conn->hold_time = p->cf->hold_time; |
544 |
conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
|
545 |
// p->remote_as = conn->advertised_as;
|
546 |
p->remote_id = id; |
547 |
DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id);
|
548 |
|
549 |
bgp_schedule_packet(conn, PKT_KEEPALIVE); |
550 |
bgp_start_timer(conn->hold_timer, conn->hold_time); |
551 |
conn->state = BS_OPENCONFIRM; |
552 |
} |
553 |
|
554 |
#define DECODE_PREFIX(pp, ll) do { \ |
555 |
int b = *pp++; \
|
556 |
int q; \
|
557 |
ll--; \ |
558 |
if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \ |
559 |
q = (b+7) / 8; \ |
560 |
if (ll < q) { err=1; goto bad; } \ |
561 |
memcpy(&prefix, pp, q); \ |
562 |
pp += q; \ |
563 |
ll -= q; \ |
564 |
ipa_ntoh(prefix); \ |
565 |
prefix = ipa_and(prefix, ipa_mkmask(b)); \ |
566 |
pxlen = b; \ |
567 |
} while (0) |
568 |
|
569 |
static inline int |
570 |
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
|
571 |
{ |
572 |
neighbor *neigh; |
573 |
ip_addr nexthop; |
574 |
struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
|
575 |
ASSERT(nh); |
576 |
nexthop = *(ip_addr *) nh->u.ptr->data; |
577 |
neigh = neigh_find(&bgp->p, &nexthop, 0);
|
578 |
if (neigh)
|
579 |
{ |
580 |
if (neigh->scope == SCOPE_HOST)
|
581 |
{ |
582 |
DBG("BGP: Loop!\n");
|
583 |
return 0; |
584 |
} |
585 |
} |
586 |
else
|
587 |
neigh = bgp->neigh; |
588 |
a->gw = neigh->addr; |
589 |
a->iface = neigh->iface; |
590 |
return 1; |
591 |
} |
592 |
|
593 |
#ifndef IPV6 /* IPv4 version */ |
594 |
|
595 |
static void |
596 |
bgp_do_rx_update(struct bgp_conn *conn,
|
597 |
byte *withdrawn, int withdrawn_len,
|
598 |
byte *nlri, int nlri_len,
|
599 |
byte *attrs, int attr_len)
|
600 |
{ |
601 |
struct bgp_proto *p = conn->bgp;
|
602 |
rta *a0; |
603 |
rta *a = NULL;
|
604 |
ip_addr prefix; |
605 |
net *n; |
606 |
int err = 0, pxlen; |
607 |
|
608 |
/* Withdraw routes */
|
609 |
while (withdrawn_len)
|
610 |
{ |
611 |
DECODE_PREFIX(withdrawn, withdrawn_len); |
612 |
DBG("Withdraw %I/%d\n", prefix, pxlen);
|
613 |
if (n = net_find(p->p.table, prefix, pxlen))
|
614 |
rte_update(p->p.table, n, &p->p, NULL);
|
615 |
} |
616 |
|
617 |
if (!attr_len && !nlri_len) /* shortcut */ |
618 |
return;
|
619 |
|
620 |
a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len); |
621 |
if (a0 && nlri_len && bgp_get_nexthop(p, a0))
|
622 |
{ |
623 |
a = rta_lookup(a0); |
624 |
while (nlri_len)
|
625 |
{ |
626 |
rte *e; |
627 |
DECODE_PREFIX(nlri, nlri_len); |
628 |
DBG("Add %I/%d\n", prefix, pxlen);
|
629 |
e = rte_get_temp(rta_clone(a)); |
630 |
n = net_get(p->p.table, prefix, pxlen); |
631 |
e->net = n; |
632 |
e->pflags = 0;
|
633 |
rte_update(p->p.table, n, &p->p, e); |
634 |
} |
635 |
} |
636 |
bad:
|
637 |
if (a)
|
638 |
rta_free(a); |
639 |
if (err)
|
640 |
bgp_error(conn, 3, err, NULL, 0); |
641 |
return;
|
642 |
} |
643 |
|
644 |
#else /* IPv6 version */ |
645 |
|
646 |
#define DO_NLRI(name) \
|
647 |
start = x = p->name##_start; \ |
648 |
len = len0 = p->name##_len; \ |
649 |
if (len) \
|
650 |
{ \ |
651 |
if (len < 3) goto bad; \ |
652 |
af = get_u16(x); \ |
653 |
sub = x[2]; \
|
654 |
x += 3; \
|
655 |
len -= 3; \
|
656 |
DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
|
657 |
} \ |
658 |
else \
|
659 |
af = 0; \
|
660 |
if (af == BGP_AF_IPV6)
|
661 |
|
662 |
static void |
663 |
bgp_do_rx_update(struct bgp_conn *conn,
|
664 |
byte *withdrawn, int withdrawn_len,
|
665 |
byte *nlri, int nlri_len,
|
666 |
byte *attrs, int attr_len)
|
667 |
{ |
668 |
struct bgp_proto *p = conn->bgp;
|
669 |
byte *start, *x; |
670 |
int len, len0;
|
671 |
unsigned af, sub;
|
672 |
rta *a0; |
673 |
rta *a = NULL;
|
674 |
ip_addr prefix; |
675 |
net *n; |
676 |
rte e; |
677 |
int err = 0, pxlen; |
678 |
|
679 |
p->mp_reach_len = 0;
|
680 |
p->mp_unreach_len = 0;
|
681 |
a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
|
682 |
if (!a0)
|
683 |
return;
|
684 |
|
685 |
DO_NLRI(mp_unreach) |
686 |
{ |
687 |
while (len)
|
688 |
{ |
689 |
DECODE_PREFIX(x, len); |
690 |
DBG("Withdraw %I/%d\n", prefix, pxlen);
|
691 |
if (n = net_find(p->p.table, prefix, pxlen))
|
692 |
rte_update(p->p.table, n, &p->p, NULL);
|
693 |
} |
694 |
} |
695 |
|
696 |
DO_NLRI(mp_reach) |
697 |
{ |
698 |
int i;
|
699 |
|
700 |
/* Create fake NEXT_HOP attribute */
|
701 |
if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2) |
702 |
goto bad;
|
703 |
memcpy(bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, 16), x+1, 16); |
704 |
len -= *x + 2;
|
705 |
x += *x + 1;
|
706 |
|
707 |
/* Ignore SNPA info */
|
708 |
i = *x++; |
709 |
while (i--)
|
710 |
{ |
711 |
if (len < 1 || len < 1 + *x) |
712 |
goto bad;
|
713 |
len -= *x + 1;
|
714 |
x += *x + 1;
|
715 |
} |
716 |
|
717 |
if (bgp_get_nexthop(p, a0))
|
718 |
{ |
719 |
a = rta_lookup(a0); |
720 |
while (len)
|
721 |
{ |
722 |
rte *e; |
723 |
DECODE_PREFIX(x, len); |
724 |
DBG("Add %I/%d\n", prefix, pxlen);
|
725 |
e = rte_get_temp(rta_clone(a)); |
726 |
n = net_get(p->p.table, prefix, pxlen); |
727 |
e->net = n; |
728 |
e->pflags = 0;
|
729 |
rte_update(p->p.table, n, &p->p, e); |
730 |
} |
731 |
rta_free(a); |
732 |
} |
733 |
} |
734 |
|
735 |
return;
|
736 |
|
737 |
bad:
|
738 |
bgp_error(conn, 3, 9, start, len0); |
739 |
if (a)
|
740 |
rta_free(a); |
741 |
return;
|
742 |
} |
743 |
|
744 |
#endif
|
745 |
|
746 |
static void |
747 |
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len) |
748 |
{ |
749 |
struct bgp_proto *p = conn->bgp;
|
750 |
byte *withdrawn, *attrs, *nlri; |
751 |
int withdrawn_len, attr_len, nlri_len;
|
752 |
|
753 |
BGP_TRACE(D_PACKETS, "Got UPDATE");
|
754 |
if (conn->state != BS_ESTABLISHED)
|
755 |
{ bgp_error(conn, 5, 0, NULL, 0); return; } |
756 |
bgp_start_timer(conn->hold_timer, conn->hold_time); |
757 |
|
758 |
/* Find parts of the packet and check sizes */
|
759 |
if (len < 23) |
760 |
{ |
761 |
bgp_error(conn, 1, 2, pkt+16, 2); |
762 |
return;
|
763 |
} |
764 |
withdrawn = pkt + 21;
|
765 |
withdrawn_len = get_u16(pkt + 19);
|
766 |
if (withdrawn_len + 23 > len) |
767 |
goto malformed;
|
768 |
attrs = withdrawn + withdrawn_len + 2;
|
769 |
attr_len = get_u16(attrs - 2);
|
770 |
if (withdrawn_len + attr_len + 23 > len) |
771 |
goto malformed;
|
772 |
nlri = attrs + attr_len; |
773 |
nlri_len = len - withdrawn_len - attr_len - 23;
|
774 |
if (!attr_len && nlri_len)
|
775 |
goto malformed;
|
776 |
DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
|
777 |
|
778 |
lp_flush(bgp_linpool); |
779 |
|
780 |
bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len); |
781 |
return;
|
782 |
|
783 |
malformed:
|
784 |
bgp_error(conn, 3, 1, NULL, 0); |
785 |
} |
786 |
|
787 |
static struct { |
788 |
byte major, minor; |
789 |
byte *msg; |
790 |
} bgp_msg_table[] = { |
791 |
{ 1, 0, "Invalid message header" }, |
792 |
{ 1, 1, "Connection not synchronized" }, |
793 |
{ 1, 2, "Bad message length" }, |
794 |
{ 1, 3, "Bad message type" }, |
795 |
{ 2, 0, "Invalid OPEN message" }, |
796 |
{ 2, 1, "Unsupported version number" }, |
797 |
{ 2, 2, "Bad peer AS" }, |
798 |
{ 2, 3, "Bad BGP identifier" }, |
799 |
{ 2, 4, "Unsupported optional parameter" }, |
800 |
{ 2, 5, "Authentication failure" }, |
801 |
{ 2, 6, "Unacceptable hold time" }, |
802 |
{ 2, 7, "Required capability missing" }, /* [RFC3392] */ |
803 |
{ 3, 0, "Invalid UPDATE message" }, |
804 |
{ 3, 1, "Malformed attribute list" }, |
805 |
{ 3, 2, "Unrecognized well-known attribute" }, |
806 |
{ 3, 3, "Missing mandatory attribute" }, |
807 |
{ 3, 4, "Invalid attribute flags" }, |
808 |
{ 3, 5, "Invalid attribute length" }, |
809 |
{ 3, 6, "Invalid ORIGIN attribute" }, |
810 |
{ 3, 7, "AS routing loop" }, /* Deprecated */ |
811 |
{ 3, 8, "Invalid NEXT_HOP attribute" }, |
812 |
{ 3, 9, "Optional attribute error" }, |
813 |
{ 3, 10, "Invalid network field" }, |
814 |
{ 3, 11, "Malformed AS_PATH" }, |
815 |
{ 4, 0, "Hold timer expired" }, |
816 |
{ 5, 0, "Finite state machine error" }, |
817 |
{ 6, 0, "Cease" } |
818 |
}; |
819 |
|
820 |
void
|
821 |
bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len) |
822 |
{ |
823 |
byte *name, namebuf[16];
|
824 |
byte *t, argbuf[36];
|
825 |
unsigned i;
|
826 |
|
827 |
if (code == 6 && !subcode) /* Don't report Cease messages */ |
828 |
return;
|
829 |
|
830 |
bsprintf(namebuf, "%d.%d", code, subcode);
|
831 |
name = namebuf; |
832 |
for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++) |
833 |
if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
|
834 |
{ |
835 |
name = bgp_msg_table[i].msg; |
836 |
break;
|
837 |
} |
838 |
t = argbuf; |
839 |
if (len)
|
840 |
{ |
841 |
*t++ = ':';
|
842 |
*t++ = ' ';
|
843 |
if (len > 16) |
844 |
len = 16;
|
845 |
for (i=0; i<len; i++) |
846 |
t += bsprintf(t, "%02x", data[i]);
|
847 |
} |
848 |
*t = 0;
|
849 |
log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
|
850 |
} |
851 |
|
852 |
static void |
853 |
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len) |
854 |
{ |
855 |
if (len < 21) |
856 |
{ |
857 |
bgp_error(conn, 1, 2, pkt+16, 2); |
858 |
return;
|
859 |
} |
860 |
bgp_log_error(conn->bgp, "Received error notification", pkt[19], pkt[20], pkt+21, len-21); |
861 |
conn->error_flag = 1;
|
862 |
if (conn->primary)
|
863 |
proto_notify_state(&conn->bgp->p, PS_STOP); |
864 |
bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE); |
865 |
} |
866 |
|
867 |
static void |
868 |
bgp_rx_keepalive(struct bgp_conn *conn)
|
869 |
{ |
870 |
struct bgp_proto *p = conn->bgp;
|
871 |
|
872 |
BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
|
873 |
bgp_start_timer(conn->hold_timer, conn->hold_time); |
874 |
switch (conn->state)
|
875 |
{ |
876 |
case BS_OPENCONFIRM:
|
877 |
DBG("BGP: UP!!!\n");
|
878 |
conn->state = BS_ESTABLISHED; |
879 |
bgp_attr_init(conn->bgp); |
880 |
proto_notify_state(&conn->bgp->p, PS_UP); |
881 |
break;
|
882 |
case BS_ESTABLISHED:
|
883 |
break;
|
884 |
default:
|
885 |
bgp_error(conn, 5, 0, NULL, 0); |
886 |
} |
887 |
} |
888 |
|
889 |
/**
|
890 |
* bgp_rx_packet - handle a received packet
|
891 |
* @conn: BGP connection
|
892 |
* @pkt: start of the packet
|
893 |
* @len: packet size
|
894 |
*
|
895 |
* bgp_rx_packet() takes a newly received packet and calls the corresponding
|
896 |
* packet handler according to the packet type.
|
897 |
*/
|
898 |
static void |
899 |
bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len) |
900 |
{ |
901 |
DBG("BGP: Got packet %02x (%d bytes)\n", pkt[18], len); |
902 |
switch (pkt[18]) |
903 |
{ |
904 |
case PKT_OPEN: return bgp_rx_open(conn, pkt, len); |
905 |
case PKT_UPDATE: return bgp_rx_update(conn, pkt, len); |
906 |
case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len); |
907 |
case PKT_KEEPALIVE: return bgp_rx_keepalive(conn); |
908 |
default: bgp_error(conn, 1, 3, pkt+18, 1); |
909 |
} |
910 |
} |
911 |
|
912 |
/**
|
913 |
* bgp_rx - handle received data
|
914 |
* @sk: socket
|
915 |
* @size: amount of data received
|
916 |
*
|
917 |
* bgp_rx() is called by the socket layer whenever new data arrive from
|
918 |
* the underlying TCP connection. It assembles the data fragments to packets,
|
919 |
* checks their headers and framing and passes complete packets to
|
920 |
* bgp_rx_packet().
|
921 |
*/
|
922 |
int
|
923 |
bgp_rx(sock *sk, int size)
|
924 |
{ |
925 |
struct bgp_conn *conn = sk->data;
|
926 |
byte *pkt_start = sk->rbuf; |
927 |
byte *end = pkt_start + size; |
928 |
unsigned i, len;
|
929 |
|
930 |
DBG("BGP: RX hook: Got %d bytes\n", size);
|
931 |
while (end >= pkt_start + BGP_HEADER_LENGTH)
|
932 |
{ |
933 |
if (conn->error_flag)
|
934 |
{ |
935 |
/*
|
936 |
* We still need to remember the erroneous packet, so that
|
937 |
* we can generate error notifications properly. To avoid
|
938 |
* subsequent reads rewriting the buffer, we just reset the
|
939 |
* rx_hook.
|
940 |
*/
|
941 |
DBG("BGP: Error, dropping input\n");
|
942 |
sk->rx_hook = NULL;
|
943 |
return 0; |
944 |
} |
945 |
for(i=0; i<16; i++) |
946 |
if (pkt_start[i] != 0xff) |
947 |
{ |
948 |
bgp_error(conn, 1, 1, NULL, 0); |
949 |
break;
|
950 |
} |
951 |
len = get_u16(pkt_start+16);
|
952 |
if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
|
953 |
{ |
954 |
bgp_error(conn, 1, 2, pkt_start+16, 2); |
955 |
break;
|
956 |
} |
957 |
if (end < pkt_start + len)
|
958 |
break;
|
959 |
bgp_rx_packet(conn, pkt_start, len); |
960 |
pkt_start += len; |
961 |
} |
962 |
if (pkt_start != sk->rbuf)
|
963 |
{ |
964 |
memmove(sk->rbuf, pkt_start, end - pkt_start); |
965 |
sk->rpos = sk->rbuf + (end - pkt_start); |
966 |
} |
967 |
return 0; |
968 |
} |