iof-bird-daemon / proto / bgp / attrs.c @ 62e64905
History | View | Annotate | Download (52.2 KB)
1 |
/*
|
---|---|
2 |
* BIRD -- BGP Attributes
|
3 |
*
|
4 |
* (c) 2000 Martin Mares <mj@ucw.cz>
|
5 |
* (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
|
6 |
* (c) 2008--2016 CZ.NIC z.s.p.o.
|
7 |
*
|
8 |
* Can be freely distributed and used under the terms of the GNU GPL.
|
9 |
*/
|
10 |
|
11 |
#undef LOCAL_DEBUG
|
12 |
|
13 |
#include <stdlib.h> |
14 |
|
15 |
#include "nest/bird.h" |
16 |
#include "nest/iface.h" |
17 |
#include "nest/protocol.h" |
18 |
#include "nest/route.h" |
19 |
#include "nest/attrs.h" |
20 |
#include "conf/conf.h" |
21 |
#include "lib/resource.h" |
22 |
#include "lib/string.h" |
23 |
#include "lib/unaligned.h" |
24 |
|
25 |
#include "bgp.h" |
26 |
|
27 |
/*
|
28 |
* UPDATE message error handling
|
29 |
*
|
30 |
* All checks from RFC 4271 6.3 are done as specified with these exceptions:
|
31 |
* - The semantic check of an IP address from NEXT_HOP attribute is missing.
|
32 |
* - Checks of some optional attribute values are missing.
|
33 |
* - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
|
34 |
* are probably inadequate.
|
35 |
*
|
36 |
* Loop detection based on AS_PATH causes updates to be withdrawn. RFC
|
37 |
* 4271 does not explicitly specifiy the behavior in that case.
|
38 |
*
|
39 |
* Loop detection related to route reflection (based on ORIGINATOR_ID
|
40 |
* and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
|
41 |
* specifies that such updates should be ignored, but that is generally
|
42 |
* a bad idea.
|
43 |
*
|
44 |
* Error checking of optional transitive attributes is done according to
|
45 |
* draft-ietf-idr-optional-transitive-03, but errors are handled always
|
46 |
* as withdraws.
|
47 |
*
|
48 |
* Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
|
49 |
* but unknown segments cause a session drop with Malformed AS_PATH
|
50 |
* error (see validate_path()). The behavior in such case is not
|
51 |
* explicitly specified by RFC 4271. RFC 5065 specifies that
|
52 |
* inconsistent AS_CONFED_* segments should cause a session drop, but
|
53 |
* implementations that pass invalid AS_CONFED_* segments are
|
54 |
* widespread.
|
55 |
*
|
56 |
* Error handling of AS4_* attributes is done as specified by
|
57 |
* draft-ietf-idr-rfc4893bis-03. There are several possible
|
58 |
* inconsistencies between AGGREGATOR and AS4_AGGREGATOR that are not
|
59 |
* handled by that draft, these are logged and ignored (see
|
60 |
* bgp_reconstruct_4b_attrs()).
|
61 |
*
|
62 |
* BGP attribute table has several hooks:
|
63 |
*
|
64 |
* export - Hook that validates and normalizes attribute during export phase.
|
65 |
* Receives eattr, may modify it (e.g., sort community lists for canonical
|
66 |
* representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
|
67 |
* necessary. May assume that eattr has value valid w.r.t. its type, but may be
|
68 |
* invalid w.r.t. BGP constraints. Optional.
|
69 |
*
|
70 |
* encode - Hook that converts internal representation to external one during
|
71 |
* packet writing. Receives eattr and puts it in the buffer (including attribute
|
72 |
* header). Returns number of bytes, or -1 if not enough space. May assume that
|
73 |
* eattr has value valid w.r.t. its type and validated by export hook. Mandatory
|
74 |
* for all known attributes that exist internally after export phase (i.e., all
|
75 |
* except pseudoattributes MP_(UN)REACH_NLRI).
|
76 |
*
|
77 |
* decode - Hook that converts external representation to internal one during
|
78 |
* packet parsing. Receives attribute data in buffer, validates it and adds
|
79 |
* attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
|
80 |
* bgp_parse_error() may be used to escape. Mandatory for all known attributes.
|
81 |
*
|
82 |
* format - Optional hook that converts eattr to textual representation.
|
83 |
*/
|
84 |
|
85 |
// XXXX review pool usage : c->c.proto->pool
|
86 |
|
87 |
|
88 |
struct bgp_attr_desc {
|
89 |
const char *name; |
90 |
uint type; |
91 |
uint flags; |
92 |
void (*export)(struct bgp_export_state *s, eattr *a); |
93 |
int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); |
94 |
void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); |
95 |
void (*format)(eattr *ea, byte *buf, uint size);
|
96 |
}; |
97 |
|
98 |
static const struct bgp_attr_desc bgp_attr_table[]; |
99 |
|
100 |
static inline int bgp_attr_known(uint code); |
101 |
|
102 |
eattr * |
103 |
bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
|
104 |
{ |
105 |
ASSERT(bgp_attr_known(code)); |
106 |
|
107 |
ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); |
108 |
eattr *e = &a->attrs[0];
|
109 |
|
110 |
a->flags = EALF_SORTED; |
111 |
a->count = 1;
|
112 |
a->next = *attrs; |
113 |
*attrs = a; |
114 |
|
115 |
e->id = EA_CODE(EAP_BGP, code); |
116 |
e->type = bgp_attr_table[code].type; |
117 |
e->flags = flags; |
118 |
|
119 |
if (e->type & EAF_EMBEDDED)
|
120 |
e->u.data = (u32) val; |
121 |
else
|
122 |
e->u.ptr = (struct adata *) val;
|
123 |
|
124 |
return e;
|
125 |
} |
126 |
|
127 |
|
128 |
|
129 |
#define REPORT(msg, args...) \
|
130 |
({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); }) |
131 |
|
132 |
#define DISCARD(msg, args...) \
|
133 |
({ REPORT(msg, ## args); return; }) |
134 |
|
135 |
#define WITHDRAW(msg, args...) \
|
136 |
({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) |
137 |
|
138 |
#define UNSET(a) \
|
139 |
({ a->type = EAF_TYPE_UNDEF; return; })
|
140 |
|
141 |
#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor" |
142 |
#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor" |
143 |
#define BAD_LENGTH "Malformed %s attribute - invalid length (%u)" |
144 |
#define BAD_VALUE "Malformed %s attribute - invalid value (%u)" |
145 |
#define NO_MANDATORY "Missing mandatory %s attribute" |
146 |
|
147 |
|
148 |
static inline int |
149 |
bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len) |
150 |
{ |
151 |
*buf++ = flags; |
152 |
*buf++ = code; |
153 |
*buf++ = len; |
154 |
return 3; |
155 |
} |
156 |
|
157 |
static inline int |
158 |
bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len) |
159 |
{ |
160 |
*buf++ = flags | BAF_EXT_LEN; |
161 |
*buf++ = code; |
162 |
put_u16(buf, len); |
163 |
return 4; |
164 |
} |
165 |
|
166 |
static inline int |
167 |
bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len) |
168 |
{ |
169 |
if (len < 256) |
170 |
return bgp_put_attr_hdr3(buf, code, flags, len);
|
171 |
else
|
172 |
return bgp_put_attr_hdr4(buf, code, flags, len);
|
173 |
} |
174 |
|
175 |
static int |
176 |
bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
177 |
{ |
178 |
if (size < (3+1)) |
179 |
return -1; |
180 |
|
181 |
bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
|
182 |
buf[3] = a->u.data;
|
183 |
|
184 |
return 3+1; |
185 |
} |
186 |
|
187 |
static int |
188 |
bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
189 |
{ |
190 |
if (size < (3+4)) |
191 |
return -1; |
192 |
|
193 |
bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
|
194 |
put_u32(buf+3, a->u.data);
|
195 |
|
196 |
return 3+4; |
197 |
} |
198 |
|
199 |
static int |
200 |
bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
201 |
{ |
202 |
uint len = a->u.ptr->length; |
203 |
|
204 |
if (size < (4+len)) |
205 |
return -1; |
206 |
|
207 |
uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len); |
208 |
put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
|
209 |
|
210 |
return hdr + len;
|
211 |
} |
212 |
|
213 |
static int |
214 |
bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len) |
215 |
{ |
216 |
if (size < (4+len)) |
217 |
return -1; |
218 |
|
219 |
uint hdr = bgp_put_attr_hdr(buf, code, flags, len); |
220 |
memcpy(buf + hdr, data, len); |
221 |
|
222 |
return hdr + len;
|
223 |
} |
224 |
|
225 |
static int |
226 |
bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
|
227 |
{ |
228 |
return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
|
229 |
} |
230 |
|
231 |
|
232 |
/*
|
233 |
* Attribute hooks
|
234 |
*/
|
235 |
|
236 |
static void |
237 |
bgp_export_origin(struct bgp_export_state *s, eattr *a)
|
238 |
{ |
239 |
if (a->u.data > 2) |
240 |
WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
|
241 |
} |
242 |
|
243 |
static void |
244 |
bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
245 |
{ |
246 |
if (len != 1) |
247 |
WITHDRAW(BAD_LENGTH, "ORIGIN", len);
|
248 |
|
249 |
if (data[0] > 2) |
250 |
WITHDRAW(BAD_VALUE, "ORIGIN", data[0]); |
251 |
|
252 |
bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
|
253 |
} |
254 |
|
255 |
static void |
256 |
bgp_format_origin(eattr *a, byte *buf, uint size UNUSED) |
257 |
{ |
258 |
static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" }; |
259 |
|
260 |
bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?"); |
261 |
} |
262 |
|
263 |
|
264 |
static int |
265 |
bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
|
266 |
{ |
267 |
byte *data = a->u.ptr->data; |
268 |
uint len = a->u.ptr->length; |
269 |
|
270 |
if (!s->as4_session)
|
271 |
{ |
272 |
/* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
|
273 |
byte *src = data; |
274 |
data = alloca(len); |
275 |
len = as_path_32to16(data, src, len); |
276 |
} |
277 |
|
278 |
return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
|
279 |
} |
280 |
|
281 |
static void |
282 |
bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
283 |
{ |
284 |
char err[128]; |
285 |
|
286 |
if (!as_path_valid(data, len, (s->as4_session ? 4 : 2), err, sizeof(err))) |
287 |
WITHDRAW("Malformed AS_PATH attribute - %s", err);
|
288 |
|
289 |
if (!s->as4_session)
|
290 |
{ |
291 |
/* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
|
292 |
byte *src = data; |
293 |
data = alloca(2*len);
|
294 |
len = as_path_16to32(data, src, len); |
295 |
} |
296 |
|
297 |
bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len); |
298 |
} |
299 |
|
300 |
|
301 |
static int |
302 |
bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
|
303 |
{ |
304 |
/*
|
305 |
* The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
|
306 |
* the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
|
307 |
* store it and encode it later by AFI-specific hooks.
|
308 |
*/
|
309 |
|
310 |
if (s->channel->afi == BGP_AF_IPV4)
|
311 |
{ |
312 |
ASSERT(a->u.ptr->length == sizeof(ip_addr));
|
313 |
|
314 |
if (size < (3+4)) |
315 |
return -1; |
316 |
|
317 |
bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
|
318 |
put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
|
319 |
|
320 |
return 3+4; |
321 |
} |
322 |
else
|
323 |
{ |
324 |
s->mp_next_hop = a; |
325 |
return 0; |
326 |
} |
327 |
} |
328 |
|
329 |
static void |
330 |
bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
|
331 |
{ |
332 |
if (len != 4) |
333 |
WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
|
334 |
|
335 |
/* Semantic checks are done later */
|
336 |
s->ip_next_hop_len = len; |
337 |
s->ip_next_hop_data = data; |
338 |
} |
339 |
|
340 |
/* TODO: This function should use AF-specific hook */
|
341 |
static void |
342 |
bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED) |
343 |
{ |
344 |
ip_addr *nh = (void *) a->u.ptr->data;
|
345 |
uint len = a->u.ptr->length; |
346 |
|
347 |
ASSERT((len == 16) || (len == 32)); |
348 |
|
349 |
/* in IPv6, we may have two addresses in NEXT HOP */
|
350 |
if ((len == 16) || ipa_zero(nh[1])) |
351 |
bsprintf(buf, "%I", nh[0]); |
352 |
else
|
353 |
bsprintf(buf, "%I %I", nh[0], nh[1]); |
354 |
} |
355 |
|
356 |
|
357 |
static void |
358 |
bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
359 |
{ |
360 |
if (len != 4) |
361 |
WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
|
362 |
|
363 |
u32 val = get_u32(data); |
364 |
bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val); |
365 |
} |
366 |
|
367 |
|
368 |
static void |
369 |
bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
|
370 |
{ |
371 |
if (!s->proto->is_interior)
|
372 |
UNSET(a); |
373 |
} |
374 |
|
375 |
static void |
376 |
bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
377 |
{ |
378 |
if (!s->proto->is_interior)
|
379 |
DISCARD(BAD_EBGP, "LOCAL_PREF");
|
380 |
|
381 |
if (len != 4) |
382 |
WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
|
383 |
|
384 |
u32 val = get_u32(data); |
385 |
bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val); |
386 |
} |
387 |
|
388 |
|
389 |
static void |
390 |
bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
|
391 |
{ |
392 |
if (len != 0) |
393 |
DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
|
394 |
|
395 |
bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0); |
396 |
} |
397 |
|
398 |
static int |
399 |
bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
|
400 |
{ |
401 |
byte *data = a->u.ptr->data; |
402 |
uint len = a->u.ptr->length; |
403 |
|
404 |
if (!s->as4_session)
|
405 |
{ |
406 |
/* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
|
407 |
byte *src = data; |
408 |
data = alloca(6);
|
409 |
len = aggregator_32to16(data, src); |
410 |
} |
411 |
|
412 |
return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
|
413 |
} |
414 |
|
415 |
static void |
416 |
bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
417 |
{ |
418 |
if (len != (s->as4_session ? 8 : 6)) |
419 |
DISCARD(BAD_LENGTH, "AGGREGATOR", len);
|
420 |
|
421 |
if (!s->as4_session)
|
422 |
{ |
423 |
/* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
|
424 |
byte *src = data; |
425 |
data = alloca(8);
|
426 |
len = aggregator_16to32(data, src); |
427 |
} |
428 |
|
429 |
bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len); |
430 |
} |
431 |
|
432 |
static void |
433 |
bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED) |
434 |
{ |
435 |
byte *data = a->u.ptr->data; |
436 |
|
437 |
bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0)); |
438 |
} |
439 |
|
440 |
|
441 |
static void |
442 |
bgp_export_community(struct bgp_export_state *s, eattr *a)
|
443 |
{ |
444 |
if (a->u.ptr->length == 0) |
445 |
UNSET(a); |
446 |
|
447 |
a->u.ptr = int_set_sort(s->pool, a->u.ptr); |
448 |
} |
449 |
|
450 |
static void |
451 |
bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
452 |
{ |
453 |
if (!len || (len % 4)) |
454 |
WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
|
455 |
|
456 |
struct adata *ad = lp_alloc_adata(s->pool, len);
|
457 |
get_u32s(data, (u32 *) ad->data, len / 4);
|
458 |
bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad); |
459 |
} |
460 |
|
461 |
|
462 |
static void |
463 |
bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
|
464 |
{ |
465 |
if (!s->proto->is_internal)
|
466 |
UNSET(a); |
467 |
} |
468 |
|
469 |
static void |
470 |
bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
471 |
{ |
472 |
if (!s->proto->is_internal)
|
473 |
DISCARD(BAD_EBGP, "ORIGINATOR_ID");
|
474 |
|
475 |
if (len != 4) |
476 |
WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
|
477 |
|
478 |
u32 val = get_u32(data); |
479 |
bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val); |
480 |
} |
481 |
|
482 |
|
483 |
static void |
484 |
bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
|
485 |
{ |
486 |
if (!s->proto->is_internal)
|
487 |
UNSET(a); |
488 |
|
489 |
if (a->u.ptr->length == 0) |
490 |
UNSET(a); |
491 |
} |
492 |
|
493 |
static void |
494 |
bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
495 |
{ |
496 |
if (!s->proto->is_internal)
|
497 |
DISCARD(BAD_EBGP, "CLUSTER_LIST");
|
498 |
|
499 |
if (!len || (len % 4)) |
500 |
WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
|
501 |
|
502 |
struct adata *ad = lp_alloc_adata(s->pool, len);
|
503 |
get_u32s(data, (u32 *) ad->data, len / 4);
|
504 |
bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad); |
505 |
} |
506 |
|
507 |
static void |
508 |
bgp_format_cluster_list(eattr *a, byte *buf, uint size) |
509 |
{ |
510 |
/* Truncates cluster lists larger than buflen, probably not a problem */
|
511 |
int_set_format(a->u.ptr, 0, -1, buf, size); |
512 |
} |
513 |
|
514 |
|
515 |
static inline u32 |
516 |
get_af3(byte *buf) |
517 |
{ |
518 |
return (get_u16(buf) << 16) | buf[2]; |
519 |
} |
520 |
|
521 |
static void |
522 |
bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
|
523 |
{ |
524 |
/*
|
525 |
* 2 B MP_REACH_NLRI data - Address Family Identifier
|
526 |
* 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
|
527 |
* 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
|
528 |
* var MP_REACH_NLRI data - Network Address of Next Hop
|
529 |
* 1 B MP_REACH_NLRI data - Reserved (zero)
|
530 |
* var MP_REACH_NLRI data - Network Layer Reachability Information
|
531 |
*/
|
532 |
|
533 |
if ((len < 5) || (len < (5 + (uint) data[3]))) |
534 |
bgp_parse_error(s, 9);
|
535 |
|
536 |
s->mp_reach_af = get_af3(data); |
537 |
s->mp_next_hop_len = data[3];
|
538 |
s->mp_next_hop_data = data + 4;
|
539 |
s->mp_reach_len = len - 5 - s->mp_next_hop_len;
|
540 |
s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
|
541 |
} |
542 |
|
543 |
|
544 |
static void |
545 |
bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
|
546 |
{ |
547 |
/*
|
548 |
* 2 B MP_UNREACH_NLRI data - Address Family Identifier
|
549 |
* 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
|
550 |
* var MP_UNREACH_NLRI data - Network Layer Reachability Information
|
551 |
*/
|
552 |
|
553 |
if (len < 3) |
554 |
bgp_parse_error(s, 9);
|
555 |
|
556 |
s->mp_unreach_af = get_af3(data); |
557 |
s->mp_unreach_len = len - 3;
|
558 |
s->mp_unreach_nlri = data + 3;
|
559 |
} |
560 |
|
561 |
|
562 |
static void |
563 |
bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
|
564 |
{ |
565 |
if (a->u.ptr->length == 0) |
566 |
UNSET(a); |
567 |
|
568 |
a->u.ptr = ec_set_sort(s->pool, a->u.ptr); |
569 |
} |
570 |
|
571 |
static void |
572 |
bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
573 |
{ |
574 |
if (!len || (len % 8)) |
575 |
WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
|
576 |
|
577 |
struct adata *ad = lp_alloc_adata(s->pool, len);
|
578 |
get_u32s(data, (u32 *) ad->data, len / 4);
|
579 |
bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad); |
580 |
} |
581 |
|
582 |
|
583 |
static void |
584 |
bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
585 |
{ |
586 |
if (s->as4_session)
|
587 |
DISCARD(NEW_BGP, "AS4_AGGREGATOR");
|
588 |
|
589 |
if (len != 8) |
590 |
DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
|
591 |
|
592 |
bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len); |
593 |
} |
594 |
|
595 |
static void |
596 |
bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
597 |
{ |
598 |
char err[128]; |
599 |
|
600 |
if (s->as4_session)
|
601 |
DISCARD(NEW_BGP, "AS4_PATH");
|
602 |
|
603 |
if (len < 6) |
604 |
DISCARD(BAD_LENGTH, "AS4_PATH", len);
|
605 |
|
606 |
if (!as_path_valid(data, len, 4, err, sizeof(err))) |
607 |
DISCARD("Malformed AS4_PATH attribute - %s", err);
|
608 |
|
609 |
/* XXXX remove CONFED segments */
|
610 |
bgp_set_attr_data(to, s->pool, BA_AS4_PATH, flags, data, len); |
611 |
} |
612 |
|
613 |
static void |
614 |
bgp_export_large_community(struct bgp_export_state *s, eattr *a)
|
615 |
{ |
616 |
if (a->u.ptr->length == 0) |
617 |
UNSET(a); |
618 |
|
619 |
a->u.ptr = lc_set_sort(s->pool, a->u.ptr); |
620 |
} |
621 |
|
622 |
static void |
623 |
bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
|
624 |
{ |
625 |
if (!len || (len % 12)) |
626 |
WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
|
627 |
|
628 |
struct adata *ad = lp_alloc_adata(s->pool, len);
|
629 |
get_u32s(data, (u32 *) ad->data, len / 4);
|
630 |
bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); |
631 |
} |
632 |
|
633 |
static inline void |
634 |
bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
|
635 |
{ |
636 |
bgp_set_attr_data(to, s->pool, code, flags, data, len); |
637 |
} |
638 |
|
639 |
|
640 |
/*
|
641 |
* Attribute table
|
642 |
*/
|
643 |
|
644 |
static const struct bgp_attr_desc bgp_attr_table[] = { |
645 |
[BA_ORIGIN] = { |
646 |
.name = "origin",
|
647 |
.type = EAF_TYPE_INT, |
648 |
.flags = BAF_TRANSITIVE, |
649 |
.export = bgp_export_origin, |
650 |
.encode = bgp_encode_u8, |
651 |
.decode = bgp_decode_origin, |
652 |
.format = bgp_format_origin, |
653 |
}, |
654 |
[BA_AS_PATH] = { |
655 |
.name = "as_path",
|
656 |
.type = EAF_TYPE_AS_PATH, |
657 |
.flags = BAF_TRANSITIVE, |
658 |
.encode = bgp_encode_as_path, |
659 |
.decode = bgp_decode_as_path, |
660 |
}, |
661 |
[BA_NEXT_HOP] = { |
662 |
.name = "next_hop",
|
663 |
.type = EAF_TYPE_IP_ADDRESS, |
664 |
.flags = BAF_TRANSITIVE, |
665 |
.encode = bgp_encode_next_hop, |
666 |
.decode = bgp_decode_next_hop, |
667 |
.format = bgp_format_next_hop, |
668 |
}, |
669 |
[BA_MULTI_EXIT_DISC] = { |
670 |
.name = "med",
|
671 |
.type = EAF_TYPE_INT, |
672 |
.flags = BAF_OPTIONAL, |
673 |
.encode = bgp_encode_u32, |
674 |
.decode = bgp_decode_med, |
675 |
}, |
676 |
[BA_LOCAL_PREF] = { |
677 |
.name = "local_pref",
|
678 |
.type = EAF_TYPE_INT, |
679 |
.flags = BAF_TRANSITIVE, |
680 |
.export = bgp_export_local_pref, |
681 |
.encode = bgp_encode_u32, |
682 |
.decode = bgp_decode_local_pref, |
683 |
}, |
684 |
[BA_ATOMIC_AGGR] = { |
685 |
.name = "atomic_aggr",
|
686 |
.type = EAF_TYPE_OPAQUE, |
687 |
.flags = BAF_TRANSITIVE, |
688 |
.encode = bgp_encode_raw, |
689 |
.decode = bgp_decode_atomic_aggr, |
690 |
}, |
691 |
[BA_AGGREGATOR] = { |
692 |
.name = "aggregator",
|
693 |
.type = EAF_TYPE_OPAQUE, |
694 |
.flags = BAF_OPTIONAL | BAF_TRANSITIVE, |
695 |
.encode = bgp_encode_aggregator, |
696 |
.decode = bgp_decode_aggregator, |
697 |
.format = bgp_format_aggregator, |
698 |
}, |
699 |
[BA_COMMUNITY] = { |
700 |
.name = "community",
|
701 |
.type = EAF_TYPE_INT_SET, |
702 |
.flags = BAF_OPTIONAL | BAF_TRANSITIVE, |
703 |
.export = bgp_export_community, |
704 |
.encode = bgp_encode_u32s, |
705 |
.decode = bgp_decode_community, |
706 |
}, |
707 |
[BA_ORIGINATOR_ID] = { |
708 |
.name = "originator_id",
|
709 |
.type = EAF_TYPE_ROUTER_ID, |
710 |
.flags = BAF_OPTIONAL, |
711 |
.export = bgp_export_originator_id, |
712 |
.encode = bgp_encode_u32, |
713 |
.decode = bgp_decode_originator_id, |
714 |
}, |
715 |
[BA_CLUSTER_LIST] = { |
716 |
.name = "cluster_list",
|
717 |
.type = EAF_TYPE_INT_SET, |
718 |
.flags = BAF_OPTIONAL, |
719 |
.export = bgp_export_cluster_list, |
720 |
.encode = bgp_encode_u32s, |
721 |
.decode = bgp_decode_cluster_list, |
722 |
.format = bgp_format_cluster_list, |
723 |
}, |
724 |
[BA_MP_REACH_NLRI] = { |
725 |
.name = "mp_reach_nlri",
|
726 |
.type = EAF_TYPE_OPAQUE, |
727 |
.flags = BAF_OPTIONAL, |
728 |
.decode = bgp_decode_mp_reach_nlri, |
729 |
}, |
730 |
[BA_MP_UNREACH_NLRI] = { |
731 |
.name = "mp_unreach_nlri",
|
732 |
.type = EAF_TYPE_OPAQUE, |
733 |
.flags = BAF_OPTIONAL, |
734 |
.decode = bgp_decode_mp_unreach_nlri, |
735 |
}, |
736 |
[BA_EXT_COMMUNITY] = { |
737 |
.name = "ext_community",
|
738 |
.type = EAF_TYPE_EC_SET, |
739 |
.flags = BAF_OPTIONAL | BAF_TRANSITIVE, |
740 |
.export = bgp_export_ext_community, |
741 |
.encode = bgp_encode_u32s, |
742 |
.decode = bgp_decode_ext_community, |
743 |
}, |
744 |
[BA_AS4_PATH] = { |
745 |
.name = "as4_path",
|
746 |
.type = EAF_TYPE_AS_PATH, |
747 |
.flags = BAF_OPTIONAL | BAF_TRANSITIVE, |
748 |
.encode = bgp_encode_raw, |
749 |
.decode = bgp_decode_as4_path, |
750 |
}, |
751 |
[BA_AS4_AGGREGATOR] = { |
752 |
.name = "as4_aggregator",
|
753 |
.type = EAF_TYPE_OPAQUE, |
754 |
.flags = BAF_OPTIONAL | BAF_TRANSITIVE, |
755 |
.encode = bgp_encode_raw, |
756 |
.decode = bgp_decode_as4_aggregator, |
757 |
.format = bgp_format_aggregator, |
758 |
}, |
759 |
[BA_LARGE_COMMUNITY] = { |
760 |
.name = "large_community",
|
761 |
.type = EAF_TYPE_LC_SET, |
762 |
.flags = BAF_OPTIONAL | BAF_TRANSITIVE, |
763 |
.export = bgp_export_large_community, |
764 |
.encode = bgp_encode_u32s, |
765 |
.decode = bgp_decode_large_community, |
766 |
}, |
767 |
}; |
768 |
|
769 |
static inline int |
770 |
bgp_attr_known(uint code) |
771 |
{ |
772 |
return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
|
773 |
} |
774 |
|
775 |
|
776 |
/*
|
777 |
* Attribute export
|
778 |
*/
|
779 |
|
780 |
static inline void |
781 |
bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
|
782 |
{ |
783 |
if (EA_PROTO(a->id) != EAP_BGP)
|
784 |
return;
|
785 |
|
786 |
uint code = EA_ID(a->id); |
787 |
|
788 |
if (bgp_attr_known(code))
|
789 |
{ |
790 |
const struct bgp_attr_desc *desc = &bgp_attr_table[code]; |
791 |
|
792 |
/* The flags might have been zero if the attr was added by filters */
|
793 |
a->flags = (a->flags & BAF_PARTIAL) | desc->flags; |
794 |
|
795 |
/* Set partial bit if new opt-trans attribute is attached to non-local route */
|
796 |
if ((s->src != NULL) && (a->type & EAF_ORIGINATED) && |
797 |
(a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) |
798 |
a->flags |= BAF_PARTIAL; |
799 |
|
800 |
/* Call specific hook */
|
801 |
CALL(desc->export, s, a); |
802 |
|
803 |
/* Attribute might become undefined in hook */
|
804 |
if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
|
805 |
return;
|
806 |
} |
807 |
else
|
808 |
{ |
809 |
/* Don't re-export unknown non-transitive attributes */
|
810 |
if (!(a->flags & BAF_TRANSITIVE))
|
811 |
return;
|
812 |
|
813 |
a->flags |= BAF_PARTIAL; |
814 |
} |
815 |
|
816 |
/* Append updated attribute */
|
817 |
to->attrs[to->count++] = *a; |
818 |
} |
819 |
|
820 |
/**
|
821 |
* bgp_export_attrs - export BGP attributes
|
822 |
* @s: BGP export state
|
823 |
* @attrs: a list of extended attributes
|
824 |
*
|
825 |
* The bgp_export_attrs() function takes a list of attributes and merges it to
|
826 |
* one newly allocated and sorted segment. Attributes are validated and
|
827 |
* normalized by type-specific export hooks and attribute flags are updated.
|
828 |
* Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
|
829 |
* empty community sets).
|
830 |
*
|
831 |
* Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
|
832 |
*/
|
833 |
static inline ea_list * |
834 |
bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
|
835 |
{ |
836 |
/* Merge the attribute list */
|
837 |
ea_list *new = lp_alloc(s->pool, ea_scan(attrs)); |
838 |
ea_merge(attrs, new); |
839 |
ea_sort(new); |
840 |
|
841 |
uint i, count; |
842 |
count = new->count; |
843 |
new->count = 0;
|
844 |
|
845 |
/* Export each attribute */
|
846 |
for (i = 0; i < count; i++) |
847 |
bgp_export_attr(s, &new->attrs[i], new); |
848 |
|
849 |
if (s->err_withdraw)
|
850 |
return NULL; |
851 |
|
852 |
return new;
|
853 |
|
854 |
} |
855 |
|
856 |
|
857 |
/*
|
858 |
* Attribute encoding
|
859 |
*/
|
860 |
|
861 |
static inline int |
862 |
bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
|
863 |
{ |
864 |
ASSERT(EA_PROTO(a->id) == EAP_BGP); |
865 |
|
866 |
uint code = EA_ID(a->id); |
867 |
|
868 |
if (bgp_attr_known(code))
|
869 |
return bgp_attr_table[code].encode(s, a, buf, size);
|
870 |
else
|
871 |
return bgp_encode_raw(s, a, buf, size);
|
872 |
} |
873 |
|
874 |
/**
|
875 |
* bgp_encode_attrs - encode BGP attributes
|
876 |
* @s: BGP write state
|
877 |
* @attrs: a list of extended attributes
|
878 |
* @buf: buffer
|
879 |
* @end: buffer end
|
880 |
*
|
881 |
* The bgp_encode_attrs() function takes a list of extended attributes
|
882 |
* and converts it to its BGP representation (a part of an Update message).
|
883 |
*
|
884 |
* Result: Length of the attribute block generated or -1 if not enough space.
|
885 |
*/
|
886 |
int
|
887 |
bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
|
888 |
{ |
889 |
byte *pos = buf; |
890 |
int i, len;
|
891 |
|
892 |
for (i = 0; i < attrs->count; i++) |
893 |
{ |
894 |
len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos); |
895 |
|
896 |
if (len < 0) |
897 |
return -1; |
898 |
|
899 |
pos += len; |
900 |
} |
901 |
|
902 |
return pos - buf;
|
903 |
} |
904 |
|
905 |
|
906 |
/*
|
907 |
* Attribute decoding
|
908 |
*/
|
909 |
|
910 |
static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool); |
911 |
|
912 |
static inline int |
913 |
bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
|
914 |
{ |
915 |
eattr *e = bgp_find_attr(attrs, BA_AS_PATH); |
916 |
int num = p->cf->allow_local_as + 1; |
917 |
return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num)); |
918 |
} |
919 |
|
920 |
static inline int |
921 |
bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
|
922 |
{ |
923 |
eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID); |
924 |
return (e && (e->u.data == p->local_id));
|
925 |
} |
926 |
|
927 |
static inline int |
928 |
bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
|
929 |
{ |
930 |
eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST); |
931 |
return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
|
932 |
} |
933 |
|
934 |
static inline void |
935 |
bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
|
936 |
{ |
937 |
/* Handle duplicate attributes; RFC 7606 3 (g) */
|
938 |
if (BIT32_TEST(s->attrs_seen, code))
|
939 |
{ |
940 |
if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
|
941 |
bgp_parse_error(s, 1);
|
942 |
else
|
943 |
DISCARD("Discarding duplicate attribute (code %u)", code);
|
944 |
} |
945 |
BIT32_SET(s->attrs_seen, code); |
946 |
|
947 |
if (bgp_attr_known(code))
|
948 |
{ |
949 |
const struct bgp_attr_desc *desc = &bgp_attr_table[code]; |
950 |
|
951 |
/* Handle conflicting flags; RFC 7606 3 (c) */
|
952 |
if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
|
953 |
WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
|
954 |
|
955 |
desc->decode(s, code, flags, data, len, to); |
956 |
} |
957 |
else /* Unknown attribute */ |
958 |
{ |
959 |
if (!(flags & BAF_OPTIONAL))
|
960 |
WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
|
961 |
|
962 |
bgp_decode_unknown(s, code, flags, data, len, to); |
963 |
} |
964 |
} |
965 |
|
966 |
/**
|
967 |
* bgp_decode_attrs - check and decode BGP attributes
|
968 |
* @s: BGP parse state
|
969 |
* @data: start of attribute block
|
970 |
* @len: length of attribute block
|
971 |
*
|
972 |
* This function takes a BGP attribute block (a part of an Update message), checks
|
973 |
* its consistency and converts it to a list of BIRD route attributes represented
|
974 |
* by an (uncached) &rta.
|
975 |
*/
|
976 |
ea_list * |
977 |
bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
|
978 |
{ |
979 |
struct bgp_proto *p = s->proto;
|
980 |
ea_list *attrs = NULL;
|
981 |
uint code, flags, alen; |
982 |
byte *pos = data; |
983 |
|
984 |
/* Parse the attributes */
|
985 |
while (len)
|
986 |
{ |
987 |
alen = 0;
|
988 |
|
989 |
/* Read attribute type */
|
990 |
if (len < 2) |
991 |
goto framing_error;
|
992 |
flags = pos[0];
|
993 |
code = pos[1];
|
994 |
ADVANCE(pos, len, 2);
|
995 |
|
996 |
/* Read attribute length */
|
997 |
if (flags & BAF_EXT_LEN)
|
998 |
{ |
999 |
if (len < 2) |
1000 |
goto framing_error;
|
1001 |
alen = get_u16(pos); |
1002 |
ADVANCE(pos, len, 2);
|
1003 |
} |
1004 |
else
|
1005 |
{ |
1006 |
if (len < 1) |
1007 |
goto framing_error;
|
1008 |
alen = *pos; |
1009 |
ADVANCE(pos, len, 1);
|
1010 |
} |
1011 |
|
1012 |
if (alen > len)
|
1013 |
goto framing_error;
|
1014 |
|
1015 |
DBG("Attr %02x %02x %u\n", code, flags, alen);
|
1016 |
|
1017 |
bgp_decode_attr(s, code, flags, pos, alen, &attrs); |
1018 |
ADVANCE(pos, len, alen); |
1019 |
} |
1020 |
|
1021 |
if (s->err_withdraw)
|
1022 |
goto withdraw;
|
1023 |
|
1024 |
/* If there is no reachability NLRI, we are finished */
|
1025 |
if (!s->ip_reach_len && !s->mp_reach_len)
|
1026 |
return NULL; |
1027 |
|
1028 |
|
1029 |
/* Handle missing mandatory attributes; RFC 7606 3 (d) */
|
1030 |
if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
|
1031 |
{ REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; } |
1032 |
|
1033 |
if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
|
1034 |
{ REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; } |
1035 |
|
1036 |
/* When receiving attributes from non-AS4-aware BGP speaker, we have to
|
1037 |
reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
|
1038 |
if (!p->as4_session)
|
1039 |
bgp_process_as4_attrs(&attrs, s->pool); |
1040 |
|
1041 |
/* Reject routes with our ASN in AS_PATH attribute */
|
1042 |
if (bgp_as_path_loopy(p, attrs, p->local_as))
|
1043 |
goto withdraw;
|
1044 |
|
1045 |
/* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4 */
|
1046 |
if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
|
1047 |
goto withdraw;
|
1048 |
|
1049 |
/* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
|
1050 |
if (p->is_internal && bgp_originator_id_loopy(p, attrs))
|
1051 |
goto withdraw;
|
1052 |
|
1053 |
/* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
|
1054 |
if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
|
1055 |
goto withdraw;
|
1056 |
|
1057 |
/* If there is no local preference, define one */
|
1058 |
if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
|
1059 |
bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
|
1060 |
|
1061 |
return attrs;
|
1062 |
|
1063 |
|
1064 |
framing_error:
|
1065 |
/* RFC 7606 4 - handle attribute framing errors */
|
1066 |
REPORT("Malformed attribute list - framing error (%u/%u) at %d",
|
1067 |
alen, len, (int) (pos - s->attrs));
|
1068 |
|
1069 |
withdraw:
|
1070 |
/* RFC 7606 5.2 - handle missing NLRI during errors */
|
1071 |
if (!s->ip_reach_len && !s->mp_reach_len)
|
1072 |
bgp_parse_error(s, 1);
|
1073 |
|
1074 |
s->err_withdraw = 1;
|
1075 |
return NULL; |
1076 |
} |
1077 |
|
1078 |
|
1079 |
/*
|
1080 |
* Route bucket hash table
|
1081 |
*/
|
1082 |
|
1083 |
#define RBH_KEY(b) b->eattrs, b->hash
|
1084 |
#define RBH_NEXT(b) b->next
|
1085 |
#define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
|
1086 |
#define RBH_FN(a,h) h
|
1087 |
|
1088 |
#define RBH_REHASH bgp_rbh_rehash
|
1089 |
#define RBH_PARAMS /8, *2, 2, 2, 8, 20 |
1090 |
|
1091 |
|
1092 |
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
|
1093 |
|
1094 |
void
|
1095 |
bgp_init_bucket_table(struct bgp_channel *c)
|
1096 |
{ |
1097 |
HASH_INIT(c->bucket_hash, c->pool, 8);
|
1098 |
|
1099 |
init_list(&c->bucket_queue); |
1100 |
c->withdraw_bucket = NULL;
|
1101 |
} |
1102 |
|
1103 |
static struct bgp_bucket * |
1104 |
bgp_get_bucket(struct bgp_channel *c, ea_list *new)
|
1105 |
{ |
1106 |
/* Hash and lookup */
|
1107 |
u32 hash = ea_hash(new); |
1108 |
struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
|
1109 |
|
1110 |
if (b)
|
1111 |
return b;
|
1112 |
|
1113 |
uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr); |
1114 |
uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN); |
1115 |
uint size = sizeof(struct bgp_bucket) + ea_size_aligned; |
1116 |
uint i; |
1117 |
byte *dest; |
1118 |
|
1119 |
/* Gather total size of non-inline attributes */
|
1120 |
for (i = 0; i < new->count; i++) |
1121 |
{ |
1122 |
eattr *a = &new->attrs[i]; |
1123 |
|
1124 |
if (!(a->type & EAF_EMBEDDED))
|
1125 |
size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN); |
1126 |
} |
1127 |
|
1128 |
/* Create the bucket */
|
1129 |
b = mb_alloc(c->pool, size); |
1130 |
init_list(&b->prefixes); |
1131 |
b->hash = hash; |
1132 |
|
1133 |
/* Copy list of extended attributes */
|
1134 |
memcpy(b->eattrs, new, ea_size); |
1135 |
dest = ((byte *) b->eattrs) + ea_size_aligned; |
1136 |
|
1137 |
/* Copy values of non-inline attributes */
|
1138 |
for (i = 0; i < new->count; i++) |
1139 |
{ |
1140 |
eattr *a = &b->eattrs->attrs[i]; |
1141 |
|
1142 |
if (!(a->type & EAF_EMBEDDED))
|
1143 |
{ |
1144 |
struct adata *oa = a->u.ptr;
|
1145 |
struct adata *na = (struct adata *) dest; |
1146 |
memcpy(na, oa, sizeof(struct adata) + oa->length); |
1147 |
a->u.ptr = na; |
1148 |
dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN); |
1149 |
} |
1150 |
} |
1151 |
|
1152 |
/* Insert the bucket to send queue and bucket hash */
|
1153 |
add_tail(&c->bucket_queue, &b->send_node); |
1154 |
HASH_INSERT2(c->bucket_hash, RBH, c->pool, b); |
1155 |
|
1156 |
return b;
|
1157 |
} |
1158 |
|
1159 |
static struct bgp_bucket * |
1160 |
bgp_get_withdraw_bucket(struct bgp_channel *c)
|
1161 |
{ |
1162 |
if (!c->withdraw_bucket)
|
1163 |
{ |
1164 |
c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket)); |
1165 |
init_list(&c->withdraw_bucket->prefixes); |
1166 |
} |
1167 |
|
1168 |
return c->withdraw_bucket;
|
1169 |
} |
1170 |
|
1171 |
void
|
1172 |
bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b) |
1173 |
{ |
1174 |
rem_node(&b->send_node); |
1175 |
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b); |
1176 |
mb_free(b); |
1177 |
} |
1178 |
|
1179 |
void
|
1180 |
bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b) |
1181 |
{ |
1182 |
rem_node(&b->send_node); |
1183 |
add_tail(&c->bucket_queue, &b->send_node); |
1184 |
} |
1185 |
|
1186 |
void
|
1187 |
bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) |
1188 |
{ |
1189 |
struct bgp_proto *p = (void *) c->c.proto; |
1190 |
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
|
1191 |
|
1192 |
log(L_ERR "%s: Attribute list too long", p->p.name);
|
1193 |
while (!EMPTY_LIST(b->prefixes))
|
1194 |
{ |
1195 |
struct bgp_prefix *px = HEAD(b->prefixes);
|
1196 |
|
1197 |
log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
|
1198 |
rem_node(&px->buck_node); |
1199 |
add_tail(&wb->prefixes, &px->buck_node); |
1200 |
} |
1201 |
} |
1202 |
|
1203 |
|
1204 |
/*
|
1205 |
* Prefix hash table
|
1206 |
*/
|
1207 |
|
1208 |
#define PXH_KEY(px) px->net, px->path_id, px->hash
|
1209 |
#define PXH_NEXT(px) px->next
|
1210 |
#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
|
1211 |
#define PXH_FN(n,i,h) h
|
1212 |
|
1213 |
#define PXH_REHASH bgp_pxh_rehash
|
1214 |
#define PXH_PARAMS /8, *2, 2, 2, 8, 20 |
1215 |
|
1216 |
|
1217 |
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
|
1218 |
|
1219 |
void
|
1220 |
bgp_init_prefix_table(struct bgp_channel *c)
|
1221 |
{ |
1222 |
HASH_INIT(c->prefix_hash, c->pool, 8);
|
1223 |
|
1224 |
c->prefix_slab = sl_new(c->pool, sizeof(struct bgp_prefix) + |
1225 |
net_addr_length[c->c.net_type]); |
1226 |
} |
1227 |
|
1228 |
static struct bgp_prefix * |
1229 |
bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
|
1230 |
{ |
1231 |
u32 hash = net_hash(net) ^ u32_hash(path_id); |
1232 |
struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
|
1233 |
|
1234 |
if (px)
|
1235 |
{ |
1236 |
rem_node(&px->buck_node); |
1237 |
return px;
|
1238 |
} |
1239 |
|
1240 |
px = sl_alloc(c->prefix_slab); |
1241 |
px->buck_node.next = NULL;
|
1242 |
px->buck_node.prev = NULL;
|
1243 |
px->hash = hash; |
1244 |
px->path_id = path_id; |
1245 |
net_copy(px->net, net); |
1246 |
|
1247 |
HASH_INSERT2(c->prefix_hash, PXH, c->pool, px); |
1248 |
|
1249 |
return px;
|
1250 |
} |
1251 |
|
1252 |
void
|
1253 |
bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) |
1254 |
{ |
1255 |
rem_node(&px->buck_node); |
1256 |
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); |
1257 |
sl_free(c->prefix_slab, px); |
1258 |
} |
1259 |
|
1260 |
|
1261 |
/*
|
1262 |
* BGP protocol glue
|
1263 |
*/
|
1264 |
|
1265 |
int
|
1266 |
bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED) |
1267 |
{ |
1268 |
rte *e = *new; |
1269 |
struct proto *SRC = e->attrs->src->proto;
|
1270 |
struct bgp_proto *p = (struct bgp_proto *) P; |
1271 |
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL; |
1272 |
|
1273 |
/* Reject our routes */
|
1274 |
if (src == p)
|
1275 |
return -1; |
1276 |
|
1277 |
/* Accept non-BGP routes */
|
1278 |
if (src == NULL) |
1279 |
return 0; |
1280 |
|
1281 |
/* IBGP route reflection, RFC 4456 */
|
1282 |
if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
|
1283 |
{ |
1284 |
/* Rejected unless configured as route reflector */
|
1285 |
if (!p->rr_client && !src->rr_client)
|
1286 |
return -1; |
1287 |
|
1288 |
/* Generally, this should be handled when path is received, but we check it
|
1289 |
also here as rr_cluster_id may be undefined or different in src. */
|
1290 |
if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
|
1291 |
return -1; |
1292 |
} |
1293 |
|
1294 |
/* Handle well-known communities, RFC 1997 */
|
1295 |
struct eattr *c;
|
1296 |
if (p->cf->interpret_communities &&
|
1297 |
(c = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY)))) |
1298 |
{ |
1299 |
struct adata *d = c->u.ptr;
|
1300 |
|
1301 |
/* Do not export anywhere */
|
1302 |
if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
|
1303 |
return -1; |
1304 |
|
1305 |
/* Do not export outside of AS (or member-AS) */
|
1306 |
if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
|
1307 |
return -1; |
1308 |
|
1309 |
/* Do not export outside of AS (or confederation) */
|
1310 |
if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
|
1311 |
return -1; |
1312 |
} |
1313 |
|
1314 |
return 0; |
1315 |
} |
1316 |
|
1317 |
static const adata null_adata; /* adata of length 0 */ |
1318 |
|
1319 |
static inline void |
1320 |
bgp_path_prepend(ea_list **attrs, struct linpool *pool, int seg, u32 as, int strip) |
1321 |
{ |
1322 |
eattr *a = bgp_find_attr(*attrs, BA_AS_PATH); |
1323 |
adata *d = as_path_prepend2(pool, a ? a->u.ptr : &null_adata, seg, as, strip); |
1324 |
bgp_set_attr_ptr(attrs, pool, BA_AS_PATH, 0, d);
|
1325 |
} |
1326 |
|
1327 |
static inline void |
1328 |
bgp_cluster_list_prepend(ea_list **attrs, struct linpool *pool, u32 id)
|
1329 |
{ |
1330 |
eattr *a = bgp_find_attr(*attrs, BA_CLUSTER_LIST); |
1331 |
adata *d = int_set_add(pool, a ? a->u.ptr : NULL, id);
|
1332 |
bgp_set_attr_ptr(attrs, pool, BA_CLUSTER_LIST, 0, d);
|
1333 |
} |
1334 |
|
1335 |
static ea_list *
|
1336 |
bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs, struct linpool *pool) |
1337 |
{ |
1338 |
struct proto *SRC = e->attrs->src->proto;
|
1339 |
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; |
1340 |
struct bgp_export_state s = { .proto = p, .channel =c, .pool = pool, .src = src, .route = e };
|
1341 |
eattr *a; |
1342 |
|
1343 |
/* ORIGIN attribute - mandatory, attach if missing */
|
1344 |
if (! bgp_find_attr(attrs, BA_ORIGIN))
|
1345 |
bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
|
1346 |
|
1347 |
/* AS_PATH attribute - keep or prepend ASN */
|
1348 |
if (p->is_internal ||
|
1349 |
(p->rs_client && src && src->rs_client)) |
1350 |
{ |
1351 |
/* IBGP or route server -> just ensure there is one */
|
1352 |
if (! bgp_find_attr(attrs, BA_AS_PATH))
|
1353 |
bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, lp_alloc_adata(pool, 0)); |
1354 |
} |
1355 |
else if (p->is_interior) |
1356 |
{ |
1357 |
/* Confederation -> prepend ASN as CONFED_SEQUENCE, keep CONFED_* segments */
|
1358 |
bgp_path_prepend(&attrs, pool, AS_PATH_CONFED_SEQUENCE, p->public_as, 0);
|
1359 |
} |
1360 |
else /* Regular EBGP (no RS, no confederation) */ |
1361 |
{ |
1362 |
/* Regular EBGP -> prepend ASN as regular segment, strip CONFED_* segments */
|
1363 |
bgp_path_prepend(&attrs, pool, AS_PATH_SEQUENCE, p->public_as, 1);
|
1364 |
|
1365 |
/* MULTI_EXIT_DESC attribute - accept only if set in export filter */
|
1366 |
a = bgp_find_attr(attrs, BA_MULTI_EXIT_DISC); |
1367 |
if (a && !(a->type & EAF_FRESH))
|
1368 |
bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC); |
1369 |
} |
1370 |
|
1371 |
/* NEXT_HOP attribute - delegated to AF-specific hook */
|
1372 |
a = bgp_find_attr(attrs, BA_NEXT_HOP); |
1373 |
bgp_update_next_hop(&s, a, &attrs); |
1374 |
|
1375 |
/* LOCAL_PREF attribute - required for IBGP, attach if missing */
|
1376 |
if (p->is_interior && ! bgp_find_attr(attrs, BA_LOCAL_PREF))
|
1377 |
bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
|
1378 |
|
1379 |
/* IBGP route reflection, RFC 4456 */
|
1380 |
if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
|
1381 |
{ |
1382 |
/* ORIGINATOR_ID attribute - attach if not already set */
|
1383 |
if (! bgp_find_attr(attrs, BA_ORIGINATOR_ID))
|
1384 |
bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
|
1385 |
|
1386 |
/* CLUSTER_LIST attribute - prepend cluster ID */
|
1387 |
if (src->rr_cluster_id)
|
1388 |
bgp_cluster_list_prepend(&attrs, pool, src->rr_cluster_id); |
1389 |
|
1390 |
/* Handle different src and dst cluster ID - prepend both ones */
|
1391 |
if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
|
1392 |
bgp_cluster_list_prepend(&attrs, pool, p->rr_cluster_id); |
1393 |
} |
1394 |
|
1395 |
/* AS4_* transition attributes, RFC 6793 4.2.2 */
|
1396 |
if (! p->as4_session)
|
1397 |
{ |
1398 |
a = bgp_find_attr(attrs, BA_AS_PATH); |
1399 |
if (a && as_path_contains_as4(a->u.ptr))
|
1400 |
{ |
1401 |
bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
|
1402 |
bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
|
1403 |
} |
1404 |
|
1405 |
a = bgp_find_attr(attrs, BA_AGGREGATOR); |
1406 |
if (a && aggregator_contains_as4(a->u.ptr))
|
1407 |
{ |
1408 |
bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
|
1409 |
bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
|
1410 |
} |
1411 |
} |
1412 |
|
1413 |
/* Apply per-attribute export hooks for validatation and normalization */
|
1414 |
return bgp_export_attrs(&s, attrs);
|
1415 |
} |
1416 |
|
1417 |
void
|
1418 |
bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs) |
1419 |
{ |
1420 |
struct bgp_proto *p = (void *) P; |
1421 |
struct bgp_channel *c = (void *) C; |
1422 |
struct bgp_bucket *buck;
|
1423 |
struct bgp_prefix *px;
|
1424 |
u32 path; |
1425 |
|
1426 |
if (new)
|
1427 |
{ |
1428 |
attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool); |
1429 |
|
1430 |
/* If attributes are invalid, we fail back to withdraw */
|
1431 |
buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); |
1432 |
path = new->attrs->src->global_id; |
1433 |
|
1434 |
lp_flush(bgp_linpool); |
1435 |
} |
1436 |
else
|
1437 |
{ |
1438 |
buck = bgp_get_withdraw_bucket(c); |
1439 |
path = old->attrs->src->global_id; |
1440 |
} |
1441 |
|
1442 |
px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
|
1443 |
add_tail(&buck->prefixes, &px->buck_node); |
1444 |
|
1445 |
bgp_schedule_packet(p->conn, c, PKT_UPDATE); |
1446 |
} |
1447 |
|
1448 |
|
1449 |
static inline u32 |
1450 |
bgp_get_neighbor(rte *r) |
1451 |
{ |
1452 |
eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); |
1453 |
u32 as; |
1454 |
|
1455 |
if (e && as_path_get_first(e->u.ptr, &as))
|
1456 |
return as;
|
1457 |
else
|
1458 |
return ((struct bgp_proto *) r->attrs->src->proto)->remote_as; |
1459 |
} |
1460 |
|
1461 |
static inline int |
1462 |
rte_resolvable(rte *rt) |
1463 |
{ |
1464 |
return rt->attrs->dest == RTD_UNICAST;
|
1465 |
} |
1466 |
|
1467 |
int
|
1468 |
bgp_rte_better(rte *new, rte *old) |
1469 |
{ |
1470 |
struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto; |
1471 |
struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto; |
1472 |
eattr *x, *y; |
1473 |
u32 n, o; |
1474 |
|
1475 |
/* Skip suppressed routes (see bgp_rte_recalculate()) */
|
1476 |
n = new->u.bgp.suppressed; |
1477 |
o = old->u.bgp.suppressed; |
1478 |
if (n > o)
|
1479 |
return 0; |
1480 |
if (n < o)
|
1481 |
return 1; |
1482 |
|
1483 |
/* RFC 4271 9.1.2.1. Route resolvability test */
|
1484 |
n = rte_resolvable(new); |
1485 |
o = rte_resolvable(old); |
1486 |
if (n > o)
|
1487 |
return 1; |
1488 |
if (n < o)
|
1489 |
return 0; |
1490 |
|
1491 |
/* Start with local preferences */
|
1492 |
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); |
1493 |
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); |
1494 |
n = x ? x->u.data : new_bgp->cf->default_local_pref; |
1495 |
o = y ? y->u.data : old_bgp->cf->default_local_pref; |
1496 |
if (n > o)
|
1497 |
return 1; |
1498 |
if (n < o)
|
1499 |
return 0; |
1500 |
|
1501 |
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
|
1502 |
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
|
1503 |
{ |
1504 |
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); |
1505 |
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); |
1506 |
n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; |
1507 |
o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; |
1508 |
if (n < o)
|
1509 |
return 1; |
1510 |
if (n > o)
|
1511 |
return 0; |
1512 |
} |
1513 |
|
1514 |
/* RFC 4271 9.1.2.2. b) Use origins */
|
1515 |
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); |
1516 |
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); |
1517 |
n = x ? x->u.data : ORIGIN_INCOMPLETE; |
1518 |
o = y ? y->u.data : ORIGIN_INCOMPLETE; |
1519 |
if (n < o)
|
1520 |
return 1; |
1521 |
if (n > o)
|
1522 |
return 0; |
1523 |
|
1524 |
/* RFC 4271 9.1.2.2. c) Compare MED's */
|
1525 |
/* Proper RFC 4271 path selection cannot be interpreted as finding
|
1526 |
* the best path in some ordering. It is implemented partially in
|
1527 |
* bgp_rte_recalculate() when deterministic_med option is
|
1528 |
* active. Without that option, the behavior is just an
|
1529 |
* approximation, which in specific situations may lead to
|
1530 |
* persistent routing loops, because it is nondeterministic - it
|
1531 |
* depends on the order in which routes appeared. But it is also the
|
1532 |
* same behavior as used by default in Cisco routers, so it is
|
1533 |
* probably not a big issue.
|
1534 |
*/
|
1535 |
if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
|
1536 |
(bgp_get_neighbor(new) == bgp_get_neighbor(old))) |
1537 |
{ |
1538 |
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); |
1539 |
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); |
1540 |
n = x ? x->u.data : new_bgp->cf->default_med; |
1541 |
o = y ? y->u.data : old_bgp->cf->default_med; |
1542 |
if (n < o)
|
1543 |
return 1; |
1544 |
if (n > o)
|
1545 |
return 0; |
1546 |
} |
1547 |
|
1548 |
/* RFC 4271 9.1.2.2. d) Prefer external peers */
|
1549 |
if (new_bgp->is_interior > old_bgp->is_interior)
|
1550 |
return 0; |
1551 |
if (new_bgp->is_interior < old_bgp->is_interior)
|
1552 |
return 1; |
1553 |
|
1554 |
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
|
1555 |
n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
|
1556 |
o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
|
1557 |
if (n < o)
|
1558 |
return 1; |
1559 |
if (n > o)
|
1560 |
return 0; |
1561 |
|
1562 |
/* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
|
1563 |
/* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
|
1564 |
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); |
1565 |
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); |
1566 |
n = x ? x->u.data : new_bgp->remote_id; |
1567 |
o = y ? y->u.data : old_bgp->remote_id; |
1568 |
|
1569 |
/* RFC 5004 - prefer older routes */
|
1570 |
/* (if both are external and from different peer) */
|
1571 |
if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
|
1572 |
!new_bgp->is_internal && n != o) |
1573 |
return 0; |
1574 |
|
1575 |
/* rest of RFC 4271 9.1.2.2. f) */
|
1576 |
if (n < o)
|
1577 |
return 1; |
1578 |
if (n > o)
|
1579 |
return 0; |
1580 |
|
1581 |
/* RFC 4456 9. b) Compare cluster list lengths */
|
1582 |
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); |
1583 |
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); |
1584 |
n = x ? int_set_get_size(x->u.ptr) : 0;
|
1585 |
o = y ? int_set_get_size(y->u.ptr) : 0;
|
1586 |
if (n < o)
|
1587 |
return 1; |
1588 |
if (n > o)
|
1589 |
return 0; |
1590 |
|
1591 |
/* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
|
1592 |
return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0); |
1593 |
} |
1594 |
|
1595 |
|
1596 |
int
|
1597 |
bgp_rte_mergable(rte *pri, rte *sec) |
1598 |
{ |
1599 |
struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto; |
1600 |
struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto; |
1601 |
eattr *x, *y; |
1602 |
u32 p, s; |
1603 |
|
1604 |
/* Skip suppressed routes (see bgp_rte_recalculate()) */
|
1605 |
if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
|
1606 |
return 0; |
1607 |
|
1608 |
/* RFC 4271 9.1.2.1. Route resolvability test */
|
1609 |
if (!rte_resolvable(sec))
|
1610 |
return 0; |
1611 |
|
1612 |
/* Start with local preferences */
|
1613 |
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); |
1614 |
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); |
1615 |
p = x ? x->u.data : pri_bgp->cf->default_local_pref; |
1616 |
s = y ? y->u.data : sec_bgp->cf->default_local_pref; |
1617 |
if (p != s)
|
1618 |
return 0; |
1619 |
|
1620 |
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
|
1621 |
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
|
1622 |
{ |
1623 |
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); |
1624 |
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); |
1625 |
p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; |
1626 |
s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; |
1627 |
|
1628 |
if (p != s)
|
1629 |
return 0; |
1630 |
|
1631 |
// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
|
1632 |
// return 0;
|
1633 |
} |
1634 |
|
1635 |
/* RFC 4271 9.1.2.2. b) Use origins */
|
1636 |
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); |
1637 |
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); |
1638 |
p = x ? x->u.data : ORIGIN_INCOMPLETE; |
1639 |
s = y ? y->u.data : ORIGIN_INCOMPLETE; |
1640 |
if (p != s)
|
1641 |
return 0; |
1642 |
|
1643 |
/* RFC 4271 9.1.2.2. c) Compare MED's */
|
1644 |
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
|
1645 |
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec))) |
1646 |
{ |
1647 |
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); |
1648 |
y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); |
1649 |
p = x ? x->u.data : pri_bgp->cf->default_med; |
1650 |
s = y ? y->u.data : sec_bgp->cf->default_med; |
1651 |
if (p != s)
|
1652 |
return 0; |
1653 |
} |
1654 |
|
1655 |
/* RFC 4271 9.1.2.2. d) Prefer external peers */
|
1656 |
if (pri_bgp->is_internal != sec_bgp->is_internal)
|
1657 |
return 0; |
1658 |
|
1659 |
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
|
1660 |
p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
|
1661 |
s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
|
1662 |
if (p != s)
|
1663 |
return 0; |
1664 |
|
1665 |
/* Remaining criteria are ignored */
|
1666 |
|
1667 |
return 1; |
1668 |
} |
1669 |
|
1670 |
|
1671 |
static inline int |
1672 |
same_group(rte *r, u32 lpref, u32 lasn) |
1673 |
{ |
1674 |
return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
|
1675 |
} |
1676 |
|
1677 |
static inline int |
1678 |
use_deterministic_med(rte *r) |
1679 |
{ |
1680 |
struct proto *P = r->attrs->src->proto;
|
1681 |
return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med; |
1682 |
} |
1683 |
|
1684 |
int
|
1685 |
bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) |
1686 |
{ |
1687 |
rte *r, *s; |
1688 |
rte *key = new ? new : old; |
1689 |
u32 lpref = key->pref; |
1690 |
u32 lasn = bgp_get_neighbor(key); |
1691 |
int old_is_group_best = 0; |
1692 |
|
1693 |
/*
|
1694 |
* Proper RFC 4271 path selection is a bit complicated, it cannot be
|
1695 |
* implemented just by rte_better(), because it is not a linear
|
1696 |
* ordering. But it can be splitted to two levels, where the lower
|
1697 |
* level chooses the best routes in each group of routes from the
|
1698 |
* same neighboring AS and higher level chooses the best route (with
|
1699 |
* a slightly different ordering) between the best-in-group routes.
|
1700 |
*
|
1701 |
* When deterministic_med is disabled, we just ignore this issue and
|
1702 |
* choose the best route by bgp_rte_better() alone. If enabled, the
|
1703 |
* lower level of the route selection is done here (for the group
|
1704 |
* to which the changed route belongs), all routes in group are
|
1705 |
* marked as suppressed, just chosen best-in-group is not.
|
1706 |
*
|
1707 |
* Global best route selection then implements higher level by
|
1708 |
* choosing between non-suppressed routes (as they are always
|
1709 |
* preferred over suppressed routes). Routes from BGP protocols
|
1710 |
* that do not set deterministic_med are just never suppressed. As
|
1711 |
* they do not participate in the lower level selection, it is OK
|
1712 |
* that this fn is not called for them.
|
1713 |
*
|
1714 |
* The idea is simple, the implementation is more problematic,
|
1715 |
* mostly because of optimizations in rte_recalculate() that
|
1716 |
* avoids full recalculation in most cases.
|
1717 |
*
|
1718 |
* We can assume that at least one of new, old is non-NULL and both
|
1719 |
* are from the same protocol with enabled deterministic_med. We
|
1720 |
* group routes by both neighbor AS (lasn) and preference (lpref),
|
1721 |
* because bgp_rte_better() does not handle preference itself.
|
1722 |
*/
|
1723 |
|
1724 |
/* If new and old are from different groups, we just process that
|
1725 |
as two independent events */
|
1726 |
if (new && old && !same_group(old, lpref, lasn))
|
1727 |
{ |
1728 |
int i1, i2;
|
1729 |
i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
|
1730 |
i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
|
1731 |
return i1 || i2;
|
1732 |
} |
1733 |
|
1734 |
/*
|
1735 |
* We could find the best-in-group and then make some shortcuts like
|
1736 |
* in rte_recalculate, but as we would have to walk through all
|
1737 |
* net->routes just to find it, it is probably not worth. So we
|
1738 |
* just have two simpler fast cases that use just the old route.
|
1739 |
* We also set suppressed flag to avoid using it in bgp_rte_better().
|
1740 |
*/
|
1741 |
|
1742 |
if (new)
|
1743 |
new->u.bgp.suppressed = 1;
|
1744 |
|
1745 |
if (old)
|
1746 |
{ |
1747 |
old_is_group_best = !old->u.bgp.suppressed; |
1748 |
old->u.bgp.suppressed = 1;
|
1749 |
int new_is_better = new && bgp_rte_better(new, old);
|
1750 |
|
1751 |
/* The first case - replace not best with worse (or remove not best) */
|
1752 |
if (!old_is_group_best && !new_is_better)
|
1753 |
return 0; |
1754 |
|
1755 |
/* The second case - replace the best with better */
|
1756 |
if (old_is_group_best && new_is_better)
|
1757 |
{ |
1758 |
/* new is best-in-group, the see discussion below - this is
|
1759 |
a special variant of NBG && OBG. From OBG we can deduce
|
1760 |
that same_group(old_best) iff (old == old_best) */
|
1761 |
new->u.bgp.suppressed = 0;
|
1762 |
return (old == old_best);
|
1763 |
} |
1764 |
} |
1765 |
|
1766 |
/* The default case - find a new best-in-group route */
|
1767 |
r = new; /* new may not be in the list */
|
1768 |
for (s=net->routes; rte_is_valid(s); s=s->next)
|
1769 |
if (use_deterministic_med(s) && same_group(s, lpref, lasn))
|
1770 |
{ |
1771 |
s->u.bgp.suppressed = 1;
|
1772 |
if (!r || bgp_rte_better(s, r))
|
1773 |
r = s; |
1774 |
} |
1775 |
|
1776 |
/* Simple case - the last route in group disappears */
|
1777 |
if (!r)
|
1778 |
return 0; |
1779 |
|
1780 |
/* Found best-in-group */
|
1781 |
r->u.bgp.suppressed = 0;
|
1782 |
|
1783 |
/*
|
1784 |
* There are generally two reasons why we have to force
|
1785 |
* recalculation (return 1): First, the new route may be wrongfully
|
1786 |
* chosen to be the best in the first case check in
|
1787 |
* rte_recalculate(), this may happen only if old_best is from the
|
1788 |
* same group. Second, another (different than new route)
|
1789 |
* best-in-group is chosen and that may be the proper best (although
|
1790 |
* rte_recalculate() without ignore that possibility).
|
1791 |
*
|
1792 |
* There are three possible cases according to whether the old route
|
1793 |
* was the best in group (OBG, stored in old_is_group_best) and
|
1794 |
* whether the new route is the best in group (NBG, tested by r == new).
|
1795 |
* These cases work even if old or new is NULL.
|
1796 |
*
|
1797 |
* NBG -> new is a possible candidate for the best route, so we just
|
1798 |
* check for the first reason using same_group().
|
1799 |
*
|
1800 |
* !NBG && OBG -> Second reason applies, return 1
|
1801 |
*
|
1802 |
* !NBG && !OBG -> Best in group does not change, old != old_best,
|
1803 |
* rte_better(new, old_best) is false and therefore
|
1804 |
* the first reason does not apply, return 0
|
1805 |
*/
|
1806 |
|
1807 |
if (r == new)
|
1808 |
return old_best && same_group(old_best, lpref, lasn);
|
1809 |
else
|
1810 |
return old_is_group_best;
|
1811 |
} |
1812 |
|
1813 |
|
1814 |
/*
|
1815 |
* Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
|
1816 |
*/
|
1817 |
static void |
1818 |
bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
|
1819 |
{ |
1820 |
eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH); |
1821 |
eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH); |
1822 |
eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR); |
1823 |
eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR); |
1824 |
|
1825 |
/* First, unset AS4_* attributes */
|
1826 |
if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
|
1827 |
if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
|
1828 |
|
1829 |
/* Handle AGGREGATOR attribute */
|
1830 |
if (a2 && a4)
|
1831 |
{ |
1832 |
u32 a2_asn = get_u32(a2->u.ptr->data); |
1833 |
|
1834 |
/* If routes were aggregated by an old router, then AS4_PATH and
|
1835 |
AS4_AGGREGATOR are invalid. In that case we give up. */
|
1836 |
if (a2_asn != AS_TRANS)
|
1837 |
return;
|
1838 |
|
1839 |
/* Use AS4_AGGREGATOR instead of AGGREGATOR */
|
1840 |
a2->u.ptr = a4->u.ptr; |
1841 |
} |
1842 |
|
1843 |
/* Handle AS_PATH attribute */
|
1844 |
if (p2 && p4)
|
1845 |
{ |
1846 |
int p2_len = as_path_getlen(p2->u.ptr);
|
1847 |
int p4_len = as_path_getlen(p4->u.ptr);
|
1848 |
|
1849 |
/* AS_PATH is too short, give up */
|
1850 |
if (p2_len < p4_len)
|
1851 |
return;
|
1852 |
|
1853 |
/* Merge AS_PATH and AS4_PATH */
|
1854 |
as_path_cut(p2->u.ptr, p2_len - p4_len); |
1855 |
p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr); |
1856 |
} |
1857 |
} |
1858 |
|
1859 |
int
|
1860 |
bgp_get_attr(eattr *a, byte *buf, int buflen)
|
1861 |
{ |
1862 |
uint i = EA_ID(a->id); |
1863 |
const struct bgp_attr_desc *d; |
1864 |
int len;
|
1865 |
|
1866 |
if (bgp_attr_known(i))
|
1867 |
{ |
1868 |
d = &bgp_attr_table[i]; |
1869 |
len = bsprintf(buf, "%s", d->name);
|
1870 |
buf += len; |
1871 |
if (d->format)
|
1872 |
{ |
1873 |
*buf++ = ':';
|
1874 |
*buf++ = ' ';
|
1875 |
d->format(a, buf, buflen - len - 2);
|
1876 |
return GA_FULL;
|
1877 |
} |
1878 |
return GA_NAME;
|
1879 |
} |
1880 |
|
1881 |
bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : ""); |
1882 |
return GA_NAME;
|
1883 |
} |
1884 |
|
1885 |
void
|
1886 |
bgp_get_route_info(rte *e, byte *buf, ea_list *attrs) |
1887 |
{ |
1888 |
eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH)); |
1889 |
eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN)); |
1890 |
u32 origas; |
1891 |
|
1892 |
buf += bsprintf(buf, " (%d", e->pref);
|
1893 |
|
1894 |
if (e->u.bgp.suppressed)
|
1895 |
buf += bsprintf(buf, "-");
|
1896 |
|
1897 |
if (e->attrs->hostentry)
|
1898 |
{ |
1899 |
if (!rte_resolvable(e))
|
1900 |
buf += bsprintf(buf, "/-");
|
1901 |
else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN) |
1902 |
buf += bsprintf(buf, "/?");
|
1903 |
else
|
1904 |
buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
|
1905 |
} |
1906 |
buf += bsprintf(buf, ") [");
|
1907 |
|
1908 |
if (p && as_path_get_last(p->u.ptr, &origas))
|
1909 |
buf += bsprintf(buf, "AS%u", origas);
|
1910 |
if (o)
|
1911 |
buf += bsprintf(buf, "%c", "ie?"[o->u.data]); |
1912 |
strcpy(buf, "]");
|
1913 |
} |