Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / attrs.c @ 42a0c054

History | View | Annotate | Download (42.2 KB)

1
/*
2
 *        BIRD -- BGP Attributes
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#undef LOCAL_DEBUG
10

    
11
#include <stdlib.h>
12

    
13
#include "nest/bird.h"
14
#include "nest/iface.h"
15
#include "nest/protocol.h"
16
#include "nest/route.h"
17
#include "nest/attrs.h"
18
#include "conf/conf.h"
19
#include "lib/resource.h"
20
#include "lib/string.h"
21
#include "lib/unaligned.h"
22

    
23
#include "bgp.h"
24

    
25
/*
26
 *   UPDATE message error handling
27
 *
28
 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
29
 *  - The semantic check of an IP address from NEXT_HOP attribute is missing.
30
 *  - Checks of some optional attribute values are missing.
31
 *  - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
32
 *    are probably inadequate.
33
 *
34
 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
35
 * 4271 does not explicitly specifiy the behavior in that case.
36
 *
37
 * Loop detection related to route reflection (based on ORIGINATOR_ID
38
 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
39
 * specifies that such updates should be ignored, but that is generally
40
 * a bad idea.
41
 *
42
 * Error checking of optional transitive attributes is done according to
43
 * draft-ietf-idr-optional-transitive-03, but errors are handled always
44
 * as withdraws.
45
 *
46
 * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
47
 * but unknown segments cause a session drop with Malformed AS_PATH
48
 * error (see validate_path()). The behavior in such case is not
49
 * explicitly specified by RFC 4271. RFC 5065 specifies that
50
 * inconsistent AS_CONFED_* segments should cause a session drop, but
51
 * implementations that pass invalid AS_CONFED_* segments are
52
 * widespread.
53
 *
54
 * Error handling of AS4_* attributes is done as specified by
55
 * draft-ietf-idr-rfc4893bis-03. There are several possible
56
 * inconsistencies between AGGREGATOR and AS4_AGGREGATOR that are not
57
 * handled by that draft, these are logged and ignored (see
58
 * bgp_reconstruct_4b_attrs()).
59
 */
60

    
61
static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH
62
#ifndef IPV6
63
,BA_NEXT_HOP
64
#endif
65
};
66

    
67
struct attr_desc {
68
  char *name;
69
  int expected_length;
70
  int expected_flags;
71
  int type;
72
  int allow_in_ebgp;
73
  int (*validate)(struct bgp_proto *p, byte *attr, int len);
74
  void (*format)(eattr *ea, byte *buf, int buflen);
75
};
76

    
77
#define IGNORE -1
78
#define WITHDRAW -2
79

    
80
static int
81
bgp_check_origin(struct bgp_proto *p UNUSED, byte *a, int len UNUSED)
82
{
83
  if (*a > 2)
84
    return 6;
85
  return 0;
86
}
87

    
88
static void
89
bgp_format_origin(eattr *a, byte *buf, int buflen UNUSED)
90
{
91
  static char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
92

    
93
  bsprintf(buf, bgp_origin_names[a->u.data]);
94
}
95

    
96
static int
97
path_segment_contains(byte *p, int bs, u32 asn)
98
{
99
  int i;
100
  int len = p[1];
101
  p += 2;
102

    
103
  for(i=0; i<len; i++)
104
    {
105
      u32 asn2 = (bs == 4) ? get_u32(p) : get_u16(p);
106
      if (asn2 == asn)
107
        return 1;
108
      p += bs;
109
    }
110

    
111
  return 0;
112
}
113

    
114
/* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */
115
static int
116
validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, unsigned int *ilength)
117
{
118
  int res = 0;
119
  u8 *a, *dst;
120
  int len, plen, copy;
121

    
122
  dst = a = idata;
123
  len = *ilength;
124

    
125
  while (len)
126
    {
127
      if (len < 2)
128
        return -1;
129

    
130
      plen = 2 + bs * a[1];
131
      if (len < plen)
132
        return -1;
133

    
134
      switch (a[0])
135
        {
136
        case AS_PATH_SET:
137
          copy = 1;
138
          res++;
139
          break;
140

    
141
        case AS_PATH_SEQUENCE:
142
          copy = 1;
143
          res += a[1];
144
          break;
145

    
146
        case AS_PATH_CONFED_SEQUENCE:
147
        case AS_PATH_CONFED_SET:
148
          if (as_path && path_segment_contains(a, bs, p->remote_as))
149
            {
150
              log(L_WARN "%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p->p.name);
151
              return -1;
152
            }
153

    
154
          log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment",
155
              p->p.name, as_path ? "AS" : "AS4");
156
          copy = 0;
157
          break;
158

    
159
        default:
160
          return -1;
161
        }
162

    
163
      if (copy)
164
        {
165
          if (dst != a)
166
            memmove(dst, a, plen);
167
          dst += plen;
168
        }
169

    
170
      len -= plen;
171
      a += plen;
172
    }
173

    
174
  *ilength = dst - idata;
175
  return res;
176
}
177

    
178
static inline int
179
validate_as_path(struct bgp_proto *p, byte *a, int *len)
180
{
181
  return validate_path(p, 1, p->as4_session ? 4 : 2, a, len);
182
}
183

    
184
static inline int
185
validate_as4_path(struct bgp_proto *p, struct adata *path)
186
{
187
  return validate_path(p, 0, 4, path->data, &path->length);
188
}
189

    
190
static int
191
bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len)
192
{
193
#ifdef IPV6
194
  return IGNORE;
195
#else
196
  ip_addr addr;
197

    
198
  memcpy(&addr, a, len);
199
  ipa_ntoh(addr);
200
  if (ipa_classify(addr) & IADDR_HOST)
201
    return 0;
202
  else
203
    return 8;
204
#endif
205
}
206

    
207
static void
208
bgp_format_next_hop(eattr *a, byte *buf, int buflen UNUSED)
209
{
210
  ip_addr *ipp = (ip_addr *) a->u.ptr->data;
211
#ifdef IPV6
212
  /* in IPv6, we might have two addresses in NEXT HOP */
213
  if ((a->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(ipp[1]))
214
    {
215
      bsprintf(buf, "%I %I", ipp[0], ipp[1]);
216
      return;
217
    }
218
#endif
219

    
220
  bsprintf(buf, "%I", ipp[0]);
221
}
222

    
223
static int
224
bgp_check_aggregator(struct bgp_proto *p, byte *a UNUSED, int len)
225
{
226
  int exp_len = p->as4_session ? 8 : 6;
227
  
228
  return (len == exp_len) ? 0 : WITHDRAW;
229
}
230

    
231
static void
232
bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED)
233
{
234
  struct adata *ad =  a->u.ptr;
235
  byte *data = ad->data;
236
  u32 as;
237

    
238
  as = get_u32(data);
239
  data += 4;
240

    
241
  bsprintf(buf, "%d.%d.%d.%d AS%d", data[0], data[1], data[2], data[3], as);
242
}
243

    
244
static int
245
bgp_check_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
246
{
247
  return ((len % 4) == 0) ? 0 : WITHDRAW;
248
}
249

    
250
static int
251
bgp_check_cluster_list(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
252
{
253
  return ((len % 4) == 0) ? 0 : 5;
254
}
255

    
256
static void
257
bgp_format_cluster_list(eattr *a, byte *buf, int buflen)
258
{
259
  /* Truncates cluster lists larger than buflen, probably not a problem */
260
  int_set_format(a->u.ptr, 0, -1, buf, buflen);
261
}
262

    
263
static int
264
bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
265
{
266
#ifdef IPV6
267
  p->mp_reach_start = a;
268
  p->mp_reach_len = len;
269
#endif
270
  return IGNORE;
271
}
272

    
273
static int
274
bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
275
{
276
#ifdef IPV6
277
  p->mp_unreach_start = a;
278
  p->mp_unreach_len = len;
279
#endif
280
  return IGNORE;
281
}
282

    
283
static int
284
bgp_check_ext_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
285
{
286
  return ((len % 8) == 0) ? 0 : WITHDRAW;
287
}
288

    
289

    
290
static struct attr_desc bgp_attr_table[] = {
291
  { NULL, -1, 0, 0, 0,                                                                /* Undefined */
292
    NULL, NULL },
293
  { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1,                                /* BA_ORIGIN */
294
    bgp_check_origin, bgp_format_origin },
295
  { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1,                                /* BA_AS_PATH */
296
    NULL, NULL }, /* is checked by validate_as_path() as a special case */
297
  { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1,                        /* BA_NEXT_HOP */
298
    bgp_check_next_hop, bgp_format_next_hop },
299
  { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1,                                        /* BA_MULTI_EXIT_DISC */
300
    NULL, NULL },
301
  { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 0,                                /* BA_LOCAL_PREF */
302
    NULL, NULL },
303
  { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,                        /* BA_ATOMIC_AGGR */
304
    NULL, NULL },
305
  { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,        /* BA_AGGREGATOR */
306
    bgp_check_aggregator, bgp_format_aggregator },
307
  { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1,        /* BA_COMMUNITY */
308
    bgp_check_community, NULL },
309
  { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_ROUTER_ID, 0,                        /* BA_ORIGINATOR_ID */
310
    NULL, NULL },
311
  { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0,                        /* BA_CLUSTER_LIST */
312
    bgp_check_cluster_list, bgp_format_cluster_list }, 
313
  { .name = NULL },                                                                /* BA_DPA */
314
  { .name = NULL },                                                                /* BA_ADVERTISER */
315
  { .name = NULL },                                                                /* BA_RCID_PATH */
316
  { "mp_reach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1,                        /* BA_MP_REACH_NLRI */
317
    bgp_check_reach_nlri, NULL },
318
  { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1,                        /* BA_MP_UNREACH_NLRI */
319
    bgp_check_unreach_nlri, NULL },
320
  { "ext_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_EC_SET, 1,        /* BA_EXT_COMMUNITY */
321
    bgp_check_ext_community, NULL },
322
  { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,                /* BA_AS4_PATH */
323
    NULL, NULL },
324
  { "as4_aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,        /* BA_AS4_PATH */
325
    NULL, NULL }
326
};
327

    
328
/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH.
329
 * It does not matter as this attribute does not appear on routes in the routing table.
330
 */
331

    
332
#define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
333

    
334
static inline struct adata *
335
bgp_alloc_adata(struct linpool *pool, unsigned len)
336
{
337
  struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
338
  ad->length = len;
339
  return ad;
340
}
341

    
342
static void
343
bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
344
{
345
  ASSERT(ATTR_KNOWN(attr));
346
  e->id = EA_CODE(EAP_BGP, attr);
347
  e->type = bgp_attr_table[attr].type;
348
  e->flags = bgp_attr_table[attr].expected_flags;
349
  if (e->type & EAF_EMBEDDED)
350
    e->u.data = val;
351
  else
352
    e->u.ptr = (struct adata *) val;
353
}
354

    
355
static byte *
356
bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
357
{
358
  struct adata *ad = bgp_alloc_adata(pool, len);
359
  bgp_set_attr(e, attr, (uintptr_t) ad);
360
  return ad->data;
361
}
362

    
363
void
364
bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
365
{
366
  ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
367
  a->next = *to;
368
  *to = a;
369
  a->flags = EALF_SORTED;
370
  a->count = 1;
371
  bgp_set_attr(a->attrs, attr, val);
372
}
373

    
374
byte *
375
bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
376
{
377
  struct adata *ad = bgp_alloc_adata(pool, len);
378
  bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
379
  return ad->data;
380
}
381

    
382
static int
383
bgp_encode_attr_hdr(byte *dst, unsigned int flags, unsigned code, int len)
384
{
385
  int wlen;
386

    
387
  DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
388

    
389
  if (len < 256)
390
    {
391
      *dst++ = flags;
392
      *dst++ = code;
393
      *dst++ = len;
394
      wlen = 3;
395
    }
396
  else
397
    {
398
      *dst++ = flags | BAF_EXT_LEN;
399
      *dst++ = code;
400
      put_u16(dst, len);
401
      wlen = 4;
402
    }
403

    
404
  return wlen;
405
}
406

    
407
static void
408
aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
409
{
410
  byte *src = aggr->data;
411
  *new_used = 0;
412

    
413
  u32 as = get_u32(src);
414
  if (as > 0xFFFF) 
415
    {
416
      as = AS_TRANS;
417
      *new_used = 1;
418
    }
419
  put_u16(dst, as);
420

    
421
  /* Copy IPv4 address */
422
  memcpy(dst + 2, src + 4, 4);
423
}
424

    
425
static void
426
aggregator_convert_to_new(struct adata *aggr, byte *dst)
427
{
428
  byte *src = aggr->data;
429

    
430
  u32 as   = get_u16(src);
431
  put_u32(dst, as);
432

    
433
  /* Copy IPv4 address */
434
  memcpy(dst + 4, src + 2, 4);
435
}
436

    
437
static int
438
bgp_get_attr_len(eattr *a)
439
{
440
  int len;
441
  if (ATTR_KNOWN(EA_ID(a->id)))
442
    {
443
      int code = EA_ID(a->id);
444
      struct attr_desc *desc = &bgp_attr_table[code];
445
      len = desc->expected_length;
446
      if (len < 0)
447
        {
448
          ASSERT(!(a->type & EAF_EMBEDDED));
449
          len = a->u.ptr->length;
450
        }
451
    }
452
  else
453
    {
454
      ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
455
      len = a->u.ptr->length;
456
    }
457
  
458
  return len;
459
}
460

    
461
#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0)
462

    
463
/**
464
 * bgp_encode_attrs - encode BGP attributes
465
 * @p: BGP instance
466
 * @w: buffer
467
 * @attrs: a list of extended attributes
468
 * @remains: remaining space in the buffer
469
 *
470
 * The bgp_encode_attrs() function takes a list of extended attributes
471
 * and converts it to its BGP representation (a part of an Update message).
472
 *
473
 * Result: Length of the attribute block generated or -1 if not enough space.
474
 */
475
unsigned int
476
bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
477
{
478
  unsigned int i, code, type, flags;
479
  byte *start = w;
480
  int len, rv;
481

    
482
  for(i=0; i<attrs->count; i++)
483
    {
484
      eattr *a = &attrs->attrs[i];
485
      ASSERT(EA_PROTO(a->id) == EAP_BGP);
486
      code = EA_ID(a->id);
487

    
488
#ifdef IPV6
489
      /* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */
490
      if (code == BA_NEXT_HOP)
491
        continue;
492
#endif
493

    
494
      /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
495
       * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH 
496
       * as optional AS4_PATH attribute.
497
       */
498
      if ((code == BA_AS_PATH) && (! p->as4_session))
499
        {
500
          len = a->u.ptr->length;
501

    
502
          if (remains < (len + 4))
503
            goto err_no_buffer;
504

    
505
          /* Using temporary buffer because don't know a length of created attr
506
           * and therefore a length of a header. Perhaps i should better always
507
           * use BAF_EXT_LEN. */
508
          
509
          byte buf[len];
510
          int new_used;
511
          int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
512

    
513
          DBG("BGP: Encoding old AS_PATH\n");
514
          rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
515
          ADVANCE(w, remains, rv);
516
          memcpy(w, buf, nl);
517
          ADVANCE(w, remains, nl);
518

    
519
          if (! new_used)
520
            continue;
521

    
522
          if (remains < (len + 4))
523
            goto err_no_buffer;
524

    
525
          /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments 
526
           * here but we don't support confederations and such paths we already
527
           * discarded in bgp_check_as_path().
528
           */
529

    
530
          DBG("BGP: Encoding AS4_PATH\n");
531
          rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
532
          ADVANCE(w, remains, rv);
533
          memcpy(w, a->u.ptr->data, len);
534
          ADVANCE(w, remains, len);
535

    
536
          continue;
537
        }
538

    
539
      /* The same issue with AGGREGATOR attribute */
540
      if ((code == BA_AGGREGATOR) && (! p->as4_session))
541
        {
542
          int new_used;
543

    
544
          len = 6;
545
          if (remains < (len + 3))
546
            goto err_no_buffer;
547

    
548
          rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
549
          ADVANCE(w, remains, rv);
550
          aggregator_convert_to_old(a->u.ptr, w, &new_used);
551
          ADVANCE(w, remains, len);
552

    
553
          if (! new_used)
554
            continue;
555

    
556
          len = 8;
557
          if (remains < (len + 3))
558
            goto err_no_buffer;
559

    
560
          rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
561
          ADVANCE(w, remains, rv);
562
          memcpy(w, a->u.ptr->data, len);
563
          ADVANCE(w, remains, len);
564

    
565
          continue;
566
        }
567

    
568
      /* Standard path continues here ... */
569

    
570
      type = a->type & EAF_TYPE_MASK;
571
      flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
572
      len = bgp_get_attr_len(a);
573

    
574
      /* Skip empty sets */ 
575
      if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET)) && (len == 0))
576
        continue; 
577

    
578
      if (remains < len + 4)
579
        goto err_no_buffer;
580

    
581
      rv = bgp_encode_attr_hdr(w, flags, code, len);
582
      ADVANCE(w, remains, rv);
583

    
584
      switch (type)
585
        {
586
        case EAF_TYPE_INT:
587
        case EAF_TYPE_ROUTER_ID:
588
          if (len == 4)
589
            put_u32(w, a->u.data);
590
          else
591
            *w = a->u.data;
592
          break;
593
        case EAF_TYPE_IP_ADDRESS:
594
          {
595
            ip_addr ip = *(ip_addr *)a->u.ptr->data;
596
            ipa_hton(ip);
597
            memcpy(w, &ip, len);
598
            break;
599
          }
600
        case EAF_TYPE_INT_SET:
601
        case EAF_TYPE_EC_SET:
602
          {
603
            u32 *z = int_set_get_data(a->u.ptr);
604
            int i;
605
            for(i=0; i<len; i+=4)
606
              put_u32(w+i, *z++);
607
            break;
608
          }
609
        case EAF_TYPE_OPAQUE:
610
        case EAF_TYPE_AS_PATH:
611
          memcpy(w, a->u.ptr->data, len);
612
          break;
613
        default:
614
          bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
615
        }
616
      ADVANCE(w, remains, len);
617
    }
618
  return w - start;
619

    
620
 err_no_buffer:
621
  return -1;
622
}
623

    
624
static void
625
bgp_init_prefix(struct fib_node *N)
626
{
627
  struct bgp_prefix *p = (struct bgp_prefix *) N;
628
  p->bucket_node.next = NULL;
629
}
630

    
631
static int
632
bgp_compare_u32(const u32 *x, const u32 *y)
633
{
634
  return (*x < *y) ? -1 : (*x > *y) ? 1 : 0;
635
}
636

    
637
static inline void
638
bgp_normalize_int_set(u32 *dest, u32 *src, unsigned cnt)
639
{
640
  memcpy(dest, src, sizeof(u32) * cnt);
641
  qsort(dest, cnt, sizeof(u32), (int(*)(const void *, const void *)) bgp_compare_u32);
642
}
643

    
644
static int
645
bgp_compare_ec(const u32 *xp, const u32 *yp)
646
{
647
  u64 x = ec_get(xp, 0);
648
  u64 y = ec_get(yp, 0);
649
  return (x < y) ? -1 : (x > y) ? 1 : 0;
650
}
651

    
652
static inline void
653
bgp_normalize_ec_set(struct adata *ad, u32 *src, int internal)
654
{
655
  u32 *dst = int_set_get_data(ad);
656

    
657
  /* Remove non-transitive communities (EC_TBIT active) on external sessions */
658
  if (! internal)
659
    {
660
      int len = int_set_get_size(ad);
661
      u32 *t = dst;
662
      int i;
663

    
664
      for (i=0; i < len; i += 2)
665
        {
666
          if (src[i] & EC_TBIT)
667
            continue;
668
          
669
          *t++ = src[i];
670
          *t++ = src[i+1];
671
        }
672

    
673
      ad->length = (t - dst) * 4;
674
    }
675
  else
676
    memcpy(dst, src, ad->length);
677

    
678
  qsort(dst, ad->length / 8, 8, (int(*)(const void *, const void *)) bgp_compare_ec);
679
}
680

    
681
static void
682
bgp_rehash_buckets(struct bgp_proto *p)
683
{
684
  struct bgp_bucket **old = p->bucket_hash;
685
  struct bgp_bucket **new;
686
  unsigned oldn = p->hash_size;
687
  unsigned i, e, mask;
688
  struct bgp_bucket *b;
689

    
690
  p->hash_size = p->hash_limit;
691
  DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size);
692
  p->hash_limit *= 4;
693
  if (p->hash_limit >= 65536)
694
    p->hash_limit = ~0;
695
  new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
696
  mask = p->hash_size - 1;
697
  for (i=0; i<oldn; i++)
698
    while (b = old[i])
699
      {
700
        old[i] = b->hash_next;
701
        e = b->hash & mask;
702
        b->hash_next = new[e];
703
        if (b->hash_next)
704
          b->hash_next->hash_prev = b;
705
        b->hash_prev = NULL;
706
        new[e] = b;
707
      }
708
  mb_free(old);
709
}
710

    
711
static struct bgp_bucket *
712
bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash)
713
{
714
  struct bgp_bucket *b;
715
  unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
716
  unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
717
  unsigned size = sizeof(struct bgp_bucket) + ea_size;
718
  unsigned i;
719
  byte *dest;
720
  unsigned index = hash & (p->hash_size - 1);
721

    
722
  /* Gather total size of non-inline attributes */
723
  for (i=0; i<new->count; i++)
724
    {
725
      eattr *a = &new->attrs[i];
726
      if (!(a->type & EAF_EMBEDDED))
727
        size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
728
    }
729

    
730
  /* Create the bucket and hash it */
731
  b = mb_alloc(p->p.pool, size);
732
  b->hash_next = p->bucket_hash[index];
733
  if (b->hash_next)
734
    b->hash_next->hash_prev = b;
735
  p->bucket_hash[index] = b;
736
  b->hash_prev = NULL;
737
  b->hash = hash;
738
  add_tail(&p->bucket_queue, &b->send_node);
739
  init_list(&b->prefixes);
740
  memcpy(b->eattrs, new, ea_size);
741
  dest = ((byte *)b->eattrs) + ea_size_aligned;
742

    
743
  /* Copy values of non-inline attributes */
744
  for (i=0; i<new->count; i++)
745
    {
746
      eattr *a = &b->eattrs->attrs[i];
747
      if (!(a->type & EAF_EMBEDDED))
748
        {
749
          struct adata *oa = a->u.ptr;
750
          struct adata *na = (struct adata *) dest;
751
          memcpy(na, oa, sizeof(struct adata) + oa->length);
752
          a->u.ptr = na;
753
          dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
754
        }
755
    }
756

    
757
  /* If needed, rehash */
758
  p->hash_count++;
759
  if (p->hash_count > p->hash_limit)
760
    bgp_rehash_buckets(p);
761

    
762
  return b;
763
}
764

    
765
static struct bgp_bucket *
766
bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate)
767
{
768
  ea_list *new;
769
  unsigned i, cnt, hash, code;
770
  eattr *a, *d;
771
  u32 seen = 0;
772
  struct bgp_bucket *b;
773

    
774
  /* Merge the attribute list */
775
  new = alloca(ea_scan(attrs));
776
  ea_merge(attrs, new);
777
  ea_sort(new);
778

    
779
  /* Normalize attributes */
780
  d = new->attrs;
781
  cnt = new->count;
782
  new->count = 0;
783
  for(i=0; i<cnt; i++)
784
    {
785
      a = &new->attrs[i];
786
      if (EA_PROTO(a->id) != EAP_BGP)
787
        continue;
788
      code = EA_ID(a->id);
789
      if (ATTR_KNOWN(code))
790
        {
791
          if (!bgp_attr_table[code].allow_in_ebgp && !p->is_internal)
792
            continue;
793
          /* The flags might have been zero if the attr was added by filters */
794
          a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags;
795
          if (code < 32)
796
            seen |= 1 << code;
797
        }
798
      else
799
        {
800
          /* Don't re-export unknown non-transitive attributes */
801
          if (!(a->flags & BAF_TRANSITIVE))
802
            continue;
803
        }
804
      *d = *a;
805
      if ((d->type & EAF_ORIGINATED) && !originate && (d->flags & BAF_TRANSITIVE) && (d->flags & BAF_OPTIONAL))
806
        d->flags |= BAF_PARTIAL;
807
      switch (d->type & EAF_TYPE_MASK)
808
        {
809
        case EAF_TYPE_INT_SET:
810
          {
811
            struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
812
            z->length = d->u.ptr->length;
813
            bgp_normalize_int_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / 4);
814
            d->u.ptr = z;
815
            break;
816
          }
817
        case EAF_TYPE_EC_SET:
818
          {
819
            struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
820
            z->length = d->u.ptr->length;
821
            bgp_normalize_ec_set(z, (u32 *) d->u.ptr->data, p->is_internal);
822
            d->u.ptr = z;
823
            break;
824
          }
825
        default: ;
826
        }
827
      d++;
828
      new->count++;
829
    }
830

    
831
  /* Hash */
832
  hash = ea_hash(new);
833
  for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next)
834
    if (b->hash == hash && ea_same(b->eattrs, new))
835
      {
836
        DBG("Found bucket.\n");
837
        return b;
838
      }
839

    
840
  /* Ensure that there are all mandatory attributes */
841
  for(i=0; i<ARRAY_SIZE(bgp_mandatory_attrs); i++)
842
    if (!(seen & (1 << bgp_mandatory_attrs[i])))
843
      {
844
        log(L_ERR "%s: Mandatory attribute %s missing in route %I/%d", p->p.name, bgp_attr_table[bgp_mandatory_attrs[i]].name, n->n.prefix, n->n.pxlen);
845
        return NULL;
846
      }
847

    
848
  /* Check if next hop is valid */
849
  a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP));
850
  if (!a || ipa_equal(p->cf->remote_ip, *(ip_addr *)a->u.ptr->data))
851
    {
852
      log(L_ERR "%s: Invalid NEXT_HOP attribute in route %I/%d", p->p.name, n->n.prefix, n->n.pxlen);
853
      return NULL;
854
    }
855

    
856
  /* Create new bucket */
857
  DBG("Creating bucket.\n");
858
  return bgp_new_bucket(p, new, hash);
859
}
860

    
861
void
862
bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
863
{
864
  if (buck->hash_next)
865
    buck->hash_next->hash_prev = buck->hash_prev;
866
  if (buck->hash_prev)
867
    buck->hash_prev->hash_next = buck->hash_next;
868
  else
869
    p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next;
870
  mb_free(buck);
871
}
872

    
873
void
874
bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
875
{
876
  struct bgp_proto *p = (struct bgp_proto *) P;
877
  struct bgp_bucket *buck;
878
  struct bgp_prefix *px;
879

    
880
  DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
881

    
882
  if (new)
883
    {
884
      buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP);
885
      if (!buck)                        /* Inconsistent attribute list */
886
        return;
887
    }
888
  else
889
    {
890
      if (!(buck = p->withdraw_bucket))
891
        {
892
          buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
893
          init_list(&buck->prefixes);
894
        }
895
    }
896
  px = fib_get(&p->prefix_fib, &n->n.prefix, n->n.pxlen);
897
  if (px->bucket_node.next)
898
    {
899
      DBG("\tRemoving old entry.\n");
900
      rem_node(&px->bucket_node);
901
    }
902
  add_tail(&buck->prefixes, &px->bucket_node);
903
  bgp_schedule_packet(p->conn, PKT_UPDATE);
904
}
905

    
906
static int
907
bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
908
{
909
  ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr));
910
  rta *rta = e->attrs;
911
  byte *z;
912

    
913
  ea->next = *attrs;
914
  *attrs = ea;
915
  ea->flags = EALF_SORTED;
916
  ea->count = 4;
917

    
918
  bgp_set_attr(ea->attrs, BA_ORIGIN,
919
       ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
920

    
921
  if (p->is_internal)
922
    bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
923
  else
924
    {
925
      z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 6);
926
      z[0] = AS_PATH_SEQUENCE;
927
      z[1] = 1;                                /* 1 AS */
928
      put_u32(z+2, p->local_as);
929
    }
930

    
931
  /* iBGP -> use gw, eBGP multi-hop -> use source_addr,
932
     eBGP single-hop -> use gw if on the same iface */
933
  z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
934
  if (p->cf->next_hop_self ||
935
      rta->dest != RTD_ROUTER ||
936
      ipa_equal(rta->gw, IPA_NONE) ||
937
      ipa_has_link_scope(rta->gw) ||
938
      (!p->is_internal && (!p->neigh || (rta->iface != p->neigh->iface))))
939
    set_next_hop(z, p->source_addr);
940
  else
941
    set_next_hop(z, rta->gw);
942

    
943
  bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, p->cf->default_local_pref);
944

    
945
  return 0;                                /* Leave decision to the filters */
946
}
947

    
948

    
949
static inline int
950
bgp_as_path_loopy(struct bgp_proto *p, rta *a)
951
{
952
  eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
953
  return (e && as_path_is_member(e->u.ptr, p->local_as));
954
}
955

    
956
static inline int
957
bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
958
{
959
  eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
960
  return (e && (e->u.data == p->local_id));
961
}
962

    
963
static inline int
964
bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
965
{
966
  eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
967
  return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
968
}
969

    
970

    
971
static inline void
972
bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
973
{
974
  eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
975
  bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
976
}
977

    
978
static inline void
979
bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
980
{
981
  eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
982
  bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_add(pool, a ? a->u.ptr : NULL, cid));
983
}
984

    
985
static int
986
bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
987
{
988
  eattr *a;
989

    
990
  if (!p->is_internal && !p->rs_client)
991
    {
992
      bgp_path_prepend(e, attrs, pool, p->local_as);
993

    
994
      /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
995
       * propagated to other neighboring ASes.
996
       * Perhaps it would be better to undefine it.
997
       */
998
      a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
999
      if (a)
1000
        bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
1001
    }
1002

    
1003
  /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
1004
     eBGP single-hop -> keep next_hop if on the same iface */
1005
  a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
1006
  if (a && !p->cf->next_hop_self && 
1007
      (p->is_internal || (p->neigh && (e->attrs->iface == p->neigh->iface))))
1008
    {
1009
      /* Leave the original next hop attribute, will check later where does it point */
1010
    }
1011
  else
1012
    {
1013
      /* Need to create new one */
1014
      byte *b = bgp_attach_attr_wa(attrs, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
1015
      set_next_hop(b, p->source_addr);
1016
    }
1017

    
1018
  if (rr)
1019
    {
1020
      /* Handling route reflection, RFC 4456 */
1021
      struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto;
1022

    
1023
      a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1024
      if (!a)
1025
        bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
1026

    
1027
      /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
1028
      bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
1029

    
1030
      /* Two RR clients with different cluster ID, hmmm */
1031
      if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
1032
        bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
1033
    }
1034

    
1035
  return 0;                                /* Leave decision to the filters */
1036
}
1037

    
1038
static int
1039
bgp_community_filter(struct bgp_proto *p, rte *e)
1040
{
1041
  eattr *a;
1042
  struct adata *d;
1043

    
1044
  /* Check if we aren't forbidden to export the route by communities */
1045
  a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
1046
  if (a)
1047
    {
1048
      d = a->u.ptr;
1049
      if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1050
        {
1051
          DBG("\tNO_ADVERTISE\n");
1052
          return 1;
1053
        }
1054
      if (!p->is_internal &&
1055
          (int_set_contains(d, BGP_COMM_NO_EXPORT) ||
1056
           int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED)))
1057
        {
1058
          DBG("\tNO_EXPORT\n");
1059
          return 1;
1060
        }
1061
    }
1062

    
1063
  return 0;
1064
}
1065

    
1066
int
1067
bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool)
1068
{
1069
  rte *e = *new;
1070
  struct bgp_proto *p = (struct bgp_proto *) P;
1071
  struct bgp_proto *new_bgp = (e->attrs->proto->proto == &proto_bgp) ? (struct bgp_proto *) e->attrs->proto : NULL;
1072

    
1073
  if (p == new_bgp)                        /* Poison reverse updates */
1074
    return -1;
1075
  if (new_bgp)
1076
    {
1077
      /* We should check here for cluster list loop, because the receiving BGP instance
1078
         might have different cluster ID  */
1079
      if (bgp_cluster_list_loopy(p, e->attrs))
1080
        return -1;
1081

    
1082
      if (p->cf->interpret_communities && bgp_community_filter(p, e))
1083
        return -1;
1084

    
1085
      if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
1086
        {
1087
          /* Redistribution of internal routes with IBGP */
1088
          if (p->rr_client || new_bgp->rr_client)
1089
            /* Route reflection, RFC 4456 */
1090
            return bgp_update_attrs(p, e, attrs, pool, 1);
1091
          else
1092
            return -1;
1093
        }
1094
      else
1095
        return bgp_update_attrs(p, e, attrs, pool, 0);
1096
    }
1097
  else
1098
    return bgp_create_attrs(p, e, attrs, pool);
1099
}
1100

    
1101
static inline u32
1102
bgp_get_neighbor(rte *r)
1103
{
1104
  eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1105
  u32 as;
1106

    
1107
  if (e && as_path_get_first(e->u.ptr, &as))
1108
    return as;
1109
  else
1110
    return ((struct bgp_proto *) r->attrs->proto)->remote_as;
1111
}
1112

    
1113
static inline int
1114
rte_resolvable(rte *rt)
1115
{
1116
  int rd = rt->attrs->dest;  
1117
  return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
1118
}
1119

    
1120
int
1121
bgp_rte_better(rte *new, rte *old)
1122
{
1123
  struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->proto;
1124
  struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->proto;
1125
  eattr *x, *y;
1126
  u32 n, o;
1127

    
1128
  /* RFC 4271 9.1.2.1. Route resolvability test */
1129
  n = rte_resolvable(new);
1130
  o = rte_resolvable(old);
1131
  if (n > o)
1132
    return 1;
1133
  if (n < o)
1134
    return 0;
1135

    
1136
  /* Start with local preferences */
1137
  x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1138
  y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1139
  n = x ? x->u.data : new_bgp->cf->default_local_pref;
1140
  o = y ? y->u.data : old_bgp->cf->default_local_pref;
1141
  if (n > o)
1142
    return 1;
1143
  if (n < o)
1144
    return 0;
1145

    
1146
  /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
1147
  if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
1148
    {
1149
      x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1150
      y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1151
      n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1152
      o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1153
      if (n < o)
1154
        return 1;
1155
      if (n > o)
1156
        return 0;
1157
    }
1158

    
1159
  /* RFC 4271 9.1.2.2. b) Use origins */
1160
  x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1161
  y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1162
  n = x ? x->u.data : ORIGIN_INCOMPLETE;
1163
  o = y ? y->u.data : ORIGIN_INCOMPLETE;
1164
  if (n < o)
1165
    return 1;
1166
  if (n > o)
1167
    return 0;
1168

    
1169
  /* RFC 4271 9.1.2.2. c) Compare MED's */
1170
  /* This is noncompliant. Proper RFC 4271 path selection cannot be
1171
   * interpreted as finding the best path in some ordering.
1172
   * Therefore, it cannot be implemented in BIRD without some ugly
1173
   * hacks. This is just an approximation, which in specific
1174
   * situations may lead to persistent routing loops, because it is
1175
   * nondeterministic - it depends on the order in which routes
1176
   * appeared. But it is also the same behavior as used by default in
1177
   * Cisco routers, so it is probably not a big issue.
1178
   */
1179
  if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1180
      (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
1181
    {
1182
      x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1183
      y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1184
      n = x ? x->u.data : new_bgp->cf->default_med;
1185
      o = y ? y->u.data : old_bgp->cf->default_med;
1186
      if (n < o)
1187
        return 1;
1188
      if (n > o)
1189
        return 0;
1190
    }
1191

    
1192
  /* RFC 4271 9.1.2.2. d) Prefer external peers */
1193
  if (new_bgp->is_internal > old_bgp->is_internal)
1194
    return 0;
1195
  if (new_bgp->is_internal < old_bgp->is_internal)
1196
    return 1;
1197

    
1198
  /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1199
  n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1200
  o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1201
  if (n < o)
1202
    return 1;
1203
  if (n > o)
1204
    return 0;
1205

    
1206
  /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1207
  /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
1208
  x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1209
  y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1210
  n = x ? x->u.data : new_bgp->remote_id;
1211
  o = y ? y->u.data : old_bgp->remote_id;
1212

    
1213
  /* RFC 5004 - prefer older routes */
1214
  /* (if both are external and from different peer) */
1215
  if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1216
      !new_bgp->is_internal && n != o)
1217
    return 0;
1218

    
1219
  /* rest of RFC 4271 9.1.2.2. f) */
1220
  if (n < o)
1221
    return 1;
1222
  if (n > o)
1223
    return 0;
1224

    
1225
  /* RFC 4456 9. b) Compare cluster list lengths */
1226
  x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
1227
  y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
1228
  n = x ? int_set_get_size(x->u.ptr) : 0;
1229
  o = y ? int_set_get_size(y->u.ptr) : 0;
1230
  if (n < o)
1231
    return 1;
1232
  if (n > o)
1233
    return 0;
1234

    
1235
  /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1236
  return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
1237
}
1238

    
1239
static struct adata *
1240
bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
1241
{
1242
  struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
1243
  newa->length = 8;
1244
  aggregator_convert_to_new(old, newa->data);
1245
  return newa;
1246
}
1247

    
1248

    
1249
/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
1250
 * and append path old4 (in 4B format).
1251
 */
1252
static struct adata *
1253
bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
1254
{
1255
  byte buf[old2->length * 2];
1256

    
1257
  int ol = as_path_convert_to_new(old2, buf, req_as);
1258
  int nl = ol + (old4 ? old4->length : 0);
1259

    
1260
  struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
1261
  newa->length = nl;
1262
  memcpy(newa->data, buf, ol);
1263
  if (old4) memcpy(newa->data + ol, old4->data, old4->length);
1264

    
1265
  return newa;
1266
}
1267

    
1268
static int
1269
as4_aggregator_valid(struct adata *aggr)
1270
{
1271
  return aggr->length == 8;
1272
}
1273

    
1274

    
1275
/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
1276
static void
1277
bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
1278
{
1279
  eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1280
  eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
1281
  eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
1282
  eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
1283
  int a4_removed = 0;
1284

    
1285
  if (a4 && !as4_aggregator_valid(a4->u.ptr))
1286
    {
1287
      log(L_WARN "%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p->p.name);
1288
      a4 = NULL;
1289
      a4_removed = 1;
1290
    }
1291

    
1292
  if (a2)
1293
    {
1294
      u32 a2_as = get_u16(a2->u.ptr->data);
1295

    
1296
      if (a4)
1297
        {
1298
          if (a2_as != AS_TRANS)
1299
            {
1300
              /* Routes were aggregated by old router and therefore AS4_PATH
1301
               * and AS4_AGGREGATOR is invalid
1302
               *
1303
               * Convert AS_PATH and AGGREGATOR to 4B format and finish.
1304
               */
1305

    
1306
              a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
1307
              p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
1308

    
1309
              return;
1310
            }
1311
          else
1312
            {
1313
              /* Common case, use AS4_AGGREGATOR attribute */
1314
              a2->u.ptr = a4->u.ptr;
1315
            }
1316
        }
1317
      else
1318
        {
1319
          /* Common case, use old AGGREGATOR attribute */
1320
          a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
1321

    
1322
          if ((a2_as == AS_TRANS) && !a4_removed)
1323
            log(L_WARN "%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p->p.name);
1324
        }
1325
    }
1326
  else
1327
    if (a4)
1328
      log(L_WARN "%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p->p.name);
1329

    
1330
  int p2_len = as_path_getlen_int(p2->u.ptr, 2);
1331
  int p4_len = p4 ? validate_as4_path(p, p4->u.ptr) : -1;
1332

    
1333
  if (p4 && (p4_len < 0))
1334
    log(L_WARN "%s: AS4_PATH attribute is malformed, skipping attribute", p->p.name);
1335

    
1336
  if ((p4_len <= 0) || (p2_len < p4_len))
1337
    p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
1338
  else
1339
    p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
1340
}
1341

    
1342
static void
1343
bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
1344
{
1345
  unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
1346
  unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
1347
  ea_list **el = &(a->eattrs);
1348

    
1349
  /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */
1350
  while (*el != NULL)
1351
    {
1352
      unsigned fid = (*el)->attrs[0].id;
1353

    
1354
      if ((fid == id1) || (fid == id2))
1355
        {
1356
          *el = (*el)->next;
1357
          if (p->as4_session)
1358
            log(L_WARN "%s: Unexpected AS4_* attributes received", p->p.name);
1359
        }
1360
      else
1361
        el = &((*el)->next);
1362
    }
1363
}
1364

    
1365
/**
1366
 * bgp_decode_attrs - check and decode BGP attributes
1367
 * @conn: connection
1368
 * @attr: start of attribute block
1369
 * @len: length of attribute block
1370
 * @pool: linear pool to make all the allocations in
1371
 * @mandatory: 1 iff presence of mandatory attributes has to be checked
1372
 *
1373
 * This function takes a BGP attribute block (a part of an Update message), checks
1374
 * its consistency and converts it to a list of BIRD route attributes represented
1375
 * by a &rta.
1376
 */
1377
struct rta *
1378
bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct linpool *pool, int mandatory)
1379
{
1380
  struct bgp_proto *bgp = conn->bgp;
1381
  rta *a = lp_alloc(pool, sizeof(struct rta));
1382
  unsigned int flags, code, l, i, type;
1383
  int errcode;
1384
  byte *z, *attr_start;
1385
  byte seen[256/8];
1386
  ea_list *ea;
1387
  struct adata *ad;
1388
  int withdraw = 0;
1389

    
1390
  bzero(a, sizeof(rta));
1391
  a->proto = &bgp->p;
1392
  a->source = RTS_BGP;
1393
  a->scope = SCOPE_UNIVERSE;
1394
  a->cast = RTC_UNICAST;
1395
  /* a->dest = RTD_ROUTER;  -- set in bgp_set_next_hop() */
1396
  a->from = bgp->cf->remote_ip;
1397

    
1398
  /* Parse the attributes */
1399
  bzero(seen, sizeof(seen));
1400
  DBG("BGP: Parsing attributes\n");
1401
  while (len)
1402
    {
1403
      if (len < 2)
1404
        goto malformed;
1405
      attr_start = attr;
1406
      flags = *attr++;
1407
      code = *attr++;
1408
      len -= 2;
1409
      if (flags & BAF_EXT_LEN)
1410
        {
1411
          if (len < 2)
1412
            goto malformed;
1413
          l = get_u16(attr);
1414
          attr += 2;
1415
          len -= 2;
1416
        }
1417
      else
1418
        {
1419
          if (len < 1)
1420
            goto malformed;
1421
          l = *attr++;
1422
          len--;
1423
        }
1424
      if (l > len)
1425
        goto malformed;
1426
      len -= l;
1427
      z = attr;
1428
      attr += l;
1429
      DBG("Attr %02x %02x %d\n", code, flags, l);
1430
      if (seen[code/8] & (1 << (code%8)))
1431
        goto malformed;
1432
      if (ATTR_KNOWN(code))
1433
        {
1434
          struct attr_desc *desc = &bgp_attr_table[code];
1435
          if (desc->expected_length >= 0 && desc->expected_length != (int) l)
1436
            { errcode = 5; goto err; }
1437
          if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
1438
            { errcode = 4; goto err; }
1439
          if (!desc->allow_in_ebgp && !bgp->is_internal)
1440
            continue;
1441
          if (desc->validate)
1442
            {
1443
              errcode = desc->validate(bgp, z, l);
1444
              if (errcode > 0)
1445
                goto err;
1446
              if (errcode == IGNORE)
1447
                continue;
1448
              if (errcode <= WITHDRAW)
1449
                {
1450
                  log(L_WARN "%s: Attribute %s is malformed, withdrawing update",
1451
                      bgp->p.name, desc->name);
1452
                  withdraw = 1;
1453
                }
1454
            }
1455
          else if (code == BA_AS_PATH)
1456
            {
1457
              /* Special case as it might also trim the attribute */
1458
              if (validate_as_path(bgp, z, &l) < 0)
1459
                { errcode = 11; goto err; }
1460
            }
1461
          type = desc->type;
1462
        }
1463
      else                                /* Unknown attribute */
1464
        {
1465
          if (!(flags & BAF_OPTIONAL))
1466
            { errcode = 2; goto err; }
1467
          type = EAF_TYPE_OPAQUE;
1468
        }
1469
      
1470
      // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag
1471
      // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL))
1472
      //   { errcode = 4; goto err; }
1473

    
1474
      seen[code/8] |= (1 << (code%8));
1475
      ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
1476
      ea->next = a->eattrs;
1477
      a->eattrs = ea;
1478
      ea->flags = 0;
1479
      ea->count = 1;
1480
      ea->attrs[0].id = EA_CODE(EAP_BGP, code);
1481
      ea->attrs[0].flags = flags;
1482
      ea->attrs[0].type = type;
1483
      if (type & EAF_EMBEDDED)
1484
        ad = NULL;
1485
      else
1486
        {
1487
          ad = lp_alloc(pool, sizeof(struct adata) + l);
1488
          ea->attrs[0].u.ptr = ad;
1489
          ad->length = l;
1490
          memcpy(ad->data, z, l);
1491
        }
1492
      switch (type)
1493
        {
1494
        case EAF_TYPE_ROUTER_ID:
1495
        case EAF_TYPE_INT:
1496
          if (l == 1)
1497
            ea->attrs[0].u.data = *z;
1498
          else
1499
            ea->attrs[0].u.data = get_u32(z);
1500
          break;
1501
        case EAF_TYPE_IP_ADDRESS:
1502
          ipa_ntoh(*(ip_addr *)ad->data);
1503
          break;
1504
        case EAF_TYPE_INT_SET:
1505
        case EAF_TYPE_EC_SET:
1506
          {
1507
            u32 *z = (u32 *) ad->data;
1508
            for(i=0; i<ad->length/4; i++)
1509
              z[i] = ntohl(z[i]);
1510
            break;
1511
          }
1512
        }
1513
    }
1514

    
1515
  if (withdraw)
1516
    goto withdraw;
1517

    
1518
#ifdef IPV6
1519
  /* If we received MP_REACH_NLRI we should check mandatory attributes */
1520
  if (bgp->mp_reach_len != 0)
1521
    mandatory = 1;
1522
#endif
1523

    
1524
  /* If there is no (reachability) NLRI, we should exit now */
1525
  if (! mandatory)
1526
    return a;
1527

    
1528
  /* Check if all mandatory attributes are present */
1529
  for(i=0; i < ARRAY_SIZE(bgp_mandatory_attrs); i++)
1530
    {
1531
      code = bgp_mandatory_attrs[i];
1532
      if (!(seen[code/8] & (1 << (code%8))))
1533
        {
1534
          bgp_error(conn, 3, 3, &bgp_mandatory_attrs[i], 1);
1535
          return NULL;
1536
        }
1537
    }
1538

    
1539
  /* When receiving attributes from non-AS4-aware BGP speaker,
1540
   * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
1541
   */
1542
  if (! bgp->as4_session)
1543
    bgp_reconstruct_4b_atts(bgp, a, pool);
1544

    
1545
  bgp_remove_as4_attrs(bgp, a);
1546

    
1547
  /* If the AS path attribute contains our AS, reject the routes */
1548
  if (bgp_as_path_loopy(bgp, a))
1549
    goto withdraw;
1550

    
1551
  /* Two checks for IBGP loops caused by route reflection, RFC 4456 */ 
1552
  if (bgp_originator_id_loopy(bgp, a) ||
1553
      bgp_cluster_list_loopy(bgp, a))
1554
    goto withdraw;
1555

    
1556
  /* If there's no local preference, define one */
1557
  if (!(seen[0] & (1 << BA_LOCAL_PREF)))
1558
    bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, bgp->cf->default_local_pref);
1559

    
1560
  return a;
1561

    
1562
withdraw:
1563
  return NULL;
1564

    
1565
malformed:
1566
  bgp_error(conn, 3, 1, NULL, 0);
1567
  return NULL;
1568

    
1569
err:
1570
  bgp_error(conn, 3, errcode, attr_start, z+l-attr_start);
1571
  return NULL;
1572
}
1573

    
1574
int
1575
bgp_get_attr(eattr *a, byte *buf, int buflen)
1576
{
1577
  unsigned int i = EA_ID(a->id);
1578
  struct attr_desc *d;
1579

    
1580
  if (ATTR_KNOWN(i))
1581
    {
1582
      d = &bgp_attr_table[i];
1583
      buf += bsprintf(buf, "%s", d->name);
1584
      if (d->format)
1585
        {
1586
          *buf++ = ':';
1587
          *buf++ = ' ';
1588
          d->format(a, buf, buflen);
1589
          return GA_FULL;
1590
        }
1591
      return GA_NAME;
1592
    }
1593
  bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
1594
  return GA_NAME;
1595
}
1596

    
1597
void
1598
bgp_attr_init(struct bgp_proto *p)
1599
{
1600
  p->hash_size = 256;
1601
  p->hash_limit = p->hash_size * 4;
1602
  p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
1603
  init_list(&p->bucket_queue);
1604
  p->withdraw_bucket = NULL;
1605
  fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
1606
}
1607

    
1608
void
1609
bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
1610
{
1611
  eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1612
  eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1613
  u32 origas;
1614

    
1615
  buf += bsprintf(buf, " (%d", e->pref);
1616
  if (e->attrs->hostentry)
1617
    {
1618
      if (!rte_resolvable(e))
1619
        buf += bsprintf(buf, "/-");
1620
      else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
1621
        buf += bsprintf(buf, "/?");
1622
      else
1623
        buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
1624
    }
1625
  buf += bsprintf(buf, ") [");
1626

    
1627
  if (p && as_path_get_last(p->u.ptr, &origas))
1628
    buf += bsprintf(buf, "AS%u", origas);
1629
  if (o)
1630
    buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
1631
  strcpy(buf, "]");
1632
}