Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / packets.c @ 4847a894

History | View | Annotate | Download (22.9 KB)

1
/*
2
 *        BIRD -- BGP Packet Processing
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#undef LOCAL_DEBUG
10

    
11
#include "nest/bird.h"
12
#include "nest/iface.h"
13
#include "nest/protocol.h"
14
#include "nest/route.h"
15
#include "nest/attrs.h"
16
#include "conf/conf.h"
17
#include "lib/unaligned.h"
18
#include "lib/socket.h"
19

    
20
#include "bgp.h"
21

    
22
static byte *
23
bgp_create_notification(struct bgp_conn *conn, byte *buf)
24
{
25
  struct bgp_proto *p = conn->bgp;
26

    
27
  BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
28
  buf[0] = conn->notify_code;
29
  buf[1] = conn->notify_subcode;
30
  memcpy(buf+2, conn->notify_data, conn->notify_size);
31
  return buf + 2 + conn->notify_size;
32
}
33

    
34
#ifdef IPV6
35
static byte *
36
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
37
{
38
  *buf++ = 1;                /* Capability 1: Multiprotocol extensions */
39
  *buf++ = 4;                /* Capability data length */
40
  *buf++ = 0;                /* We support AF IPv6 */
41
  *buf++ = BGP_AF_IPV6;
42
  *buf++ = 0;                /* RFU */
43
  *buf++ = 1;                /* and SAFI 1 */
44
  return buf;
45
}
46
#endif
47

    
48
static byte *
49
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
50
{
51
  *buf++ = 65;                /* Capability 65: Support for 4-octet AS number */
52
  *buf++ = 4;                /* Capability data length */
53
  put_u32(buf, conn->bgp->local_as);
54
  return buf + 4;
55
}
56

    
57
static byte *
58
bgp_create_open(struct bgp_conn *conn, byte *buf)
59
{
60
  struct bgp_proto *p = conn->bgp;
61
  byte *cap;
62
  int cap_len;
63

    
64
  BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
65
            BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
66
  buf[0] = BGP_VERSION;
67
  put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
68
  put_u16(buf+3, p->cf->hold_time);
69
  put_u32(buf+5, p->local_id);
70
  /* Skipped 3 B for length field and Capabilities parameter header */
71
  cap = buf + 12;
72

    
73
#ifdef IPV6
74
  cap = bgp_put_cap_ipv6(conn, cap);
75
#endif
76
  if (bgp_as4_support)
77
    cap = bgp_put_cap_as4(conn, cap);
78

    
79
  cap_len = cap - buf - 12;
80
  if (cap_len > 0)
81
    {
82
      buf[9]  = cap_len + 2;        /* Optional params len */
83
      buf[10] = 2;                /* Option: Capability list */
84
      buf[11] = cap_len;        /* Option length */
85
      return cap;
86
    }
87
  else
88
    {
89
      buf[9] = 0;                /* No optional parameters */
90
      return buf + 10;
91
    }
92
}
93

    
94
static unsigned int
95
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
96
{
97
  byte *start = w;
98
  ip_addr a;
99
  int bytes;
100

    
101
  while (!EMPTY_LIST(buck->prefixes) && remains >= 5)
102
    {
103
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
104
      DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
105
      *w++ = px->n.pxlen;
106
      bytes = (px->n.pxlen + 7) / 8;
107
      a = px->n.prefix;
108
      ipa_hton(a);
109
      memcpy(w, &a, bytes);
110
      w += bytes;
111
      remains -= bytes + 1;
112
      rem_node(&px->bucket_node);
113
      fib_delete(&p->prefix_fib, px);
114
    }
115
  return w - start;
116
}
117

    
118
#ifndef IPV6                /* IPv4 version */
119

    
120
static byte *
121
bgp_create_update(struct bgp_conn *conn, byte *buf)
122
{
123
  struct bgp_proto *p = conn->bgp;
124
  struct bgp_bucket *buck;
125
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
126
  byte *w;
127
  int wd_size = 0;
128
  int r_size = 0;
129
  int a_size = 0;
130

    
131
  w = buf+2;
132
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
133
    {
134
      DBG("Withdrawn routes:\n");
135
      wd_size = bgp_encode_prefixes(p, w, buck, remains);
136
      w += wd_size;
137
      remains -= wd_size;
138
    }
139
  put_u16(buf, wd_size);
140

    
141
  if (remains >= 2048)
142
    {
143
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
144
        {
145
          if (EMPTY_LIST(buck->prefixes))
146
            {
147
              DBG("Deleting empty bucket %p\n", buck);
148
              rem_node(&buck->send_node);
149
              bgp_free_bucket(p, buck);
150
              continue;
151
            }
152
          DBG("Processing bucket %p\n", buck);
153
          a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 1024);
154
          put_u16(w, a_size);
155
          w += a_size + 2;
156
          r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
157
          w += r_size;
158
          break;
159
        }
160
    }
161
  if (!a_size)                                /* Attributes not already encoded */
162
    {
163
      put_u16(w, 0);
164
      w += 2;
165
    }
166
  if (wd_size || r_size)
167
    {
168
      BGP_TRACE(D_PACKETS, "Sending UPDATE");
169
      return w;
170
    }
171
  else
172
    return NULL;
173
}
174

    
175
#else                /* IPv6 version */
176

    
177
static byte *
178
bgp_create_update(struct bgp_conn *conn, byte *buf)
179
{
180
  struct bgp_proto *p = conn->bgp;
181
  struct bgp_bucket *buck;
182
  int size, is_ll;
183
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
184
  byte *w, *tmp, *tstart;
185
  ip_addr ip, ip_ll;
186
  ea_list *ea;
187
  eattr *nh;
188
  neighbor *n;
189

    
190
  put_u16(buf, 0);
191
  w = buf+4;
192

    
193
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
194
    {
195
      DBG("Withdrawn routes:\n");
196
      tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
197
      *tmp++ = 0;
198
      *tmp++ = BGP_AF_IPV6;
199
      *tmp++ = 1;
200
      ea->attrs[0].u.ptr->length = bgp_encode_prefixes(p, tmp, buck, remains-11);
201
      size = bgp_encode_attrs(p, w, ea, remains);
202
      w += size;
203
      remains -= size;
204
    }
205

    
206
  if (remains >= 2048)
207
    {
208
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
209
        {
210
          if (EMPTY_LIST(buck->prefixes))
211
            {
212
              DBG("Deleting empty bucket %p\n", buck);
213
              rem_node(&buck->send_node);
214
              bgp_free_bucket(p, buck);
215
              continue;
216
            }
217
          DBG("Processing bucket %p\n", buck);
218
          size = bgp_encode_attrs(p, w, buck->eattrs, 1024);
219
          w += size;
220
          remains -= size;
221
          tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
222
          *tmp++ = 0;
223
          *tmp++ = BGP_AF_IPV6;
224
          *tmp++ = 1;
225
          nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
226
          ASSERT(nh);
227
          ip = *(ip_addr *) nh->u.ptr->data;
228
          is_ll = 0;
229
          if (ipa_equal(ip, p->local_addr))
230
            {
231
              is_ll = 1;
232
              ip_ll = p->local_link;
233
            }
234
          else
235
            {
236
              n = neigh_find(&p->p, &ip, 0);
237
              if (n && n->iface == p->neigh->iface)
238
                {
239
                  /* FIXME: We are assuming the global scope addresses use the lower 64 bits
240
                   * as an interface identifier which hasn't necessarily to be true.
241
                   */
242
                  is_ll = 1;
243
                  ip_ll = ipa_or(ipa_build(0xfe800000,0,0,0), ipa_and(ip, ipa_build(0,0,~0,~0)));
244
                }
245
            }
246
          if (is_ll)
247
            {
248
              *tmp++ = 32;
249
              ipa_hton(ip);
250
              memcpy(tmp, &ip, 16);
251
              ipa_hton(ip_ll);
252
              memcpy(tmp+16, &ip_ll, 16);
253
              tmp += 32;
254
            }
255
          else
256
            {
257
              *tmp++ = 16;
258
              ipa_hton(ip);
259
              memcpy(tmp, &ip, 16);
260
              tmp += 16;
261
            }
262
          *tmp++ = 0;                        /* No SNPA information */
263
          tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
264
          ea->attrs[0].u.ptr->length = tmp - tstart;
265
          w += bgp_encode_attrs(p, w, ea, remains);
266
          break;
267
        }
268
    }
269

    
270
  size = w - (buf+4);
271
  put_u16(buf+2, size);
272
  lp_flush(bgp_linpool);
273
  if (size)
274
    {
275
      BGP_TRACE(D_PACKETS, "Sending UPDATE");
276
      return w;
277
    }
278
  else
279
    return NULL;
280
}
281

    
282
#endif
283

    
284
static void
285
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
286
{
287
  memset(buf, 0xff, 16);                /* Marker */
288
  put_u16(buf+16, len);
289
  buf[18] = type;
290
}
291

    
292
/**
293
 * bgp_fire_tx - transmit packets
294
 * @conn: connection
295
 *
296
 * Whenever the transmit buffers of the underlying TCP connection
297
 * are free and we have any packets queued for sending, the socket functions
298
 * call bgp_fire_tx() which takes care of selecting the highest priority packet
299
 * queued (Notification > Keepalive > Open > Update), assembling its header
300
 * and body and sending it to the connection.
301
 */
302
static int
303
bgp_fire_tx(struct bgp_conn *conn)
304
{
305
  struct bgp_proto *p = conn->bgp;
306
  unsigned int s = conn->packets_to_send;
307
  sock *sk = conn->sk;
308
  byte *buf, *pkt, *end;
309
  int type;
310

    
311
  if (!sk)
312
    {
313
      conn->packets_to_send = 0;
314
      return 0;
315
    }
316
  buf = sk->tbuf;
317
  pkt = buf + BGP_HEADER_LENGTH;
318

    
319
  if (s & (1 << PKT_SCHEDULE_CLOSE))
320
    {
321
      bgp_close_conn(conn);
322
      return 0;
323
    }
324
  if (s & (1 << PKT_NOTIFICATION))
325
    {
326
      s = 1 << PKT_SCHEDULE_CLOSE;
327
      type = PKT_NOTIFICATION;
328
      end = bgp_create_notification(conn, pkt);
329
    }
330
  else if (s & (1 << PKT_KEEPALIVE))
331
    {
332
      s &= ~(1 << PKT_KEEPALIVE);
333
      type = PKT_KEEPALIVE;
334
      end = pkt;                        /* Keepalives carry no data */
335
      BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
336
      bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
337
    }
338
  else if (s & (1 << PKT_OPEN))
339
    {
340
      s &= ~(1 << PKT_OPEN);
341
      type = PKT_OPEN;
342
      end = bgp_create_open(conn, pkt);
343
    }
344
  else if (s & (1 << PKT_UPDATE))
345
    {
346
      end = bgp_create_update(conn, pkt);
347
      type = PKT_UPDATE;
348
      if (!end)
349
        {
350
          conn->packets_to_send = 0;
351
          return 0;
352
        }
353
    }
354
  else
355
    return 0;
356
  conn->packets_to_send = s;
357
  bgp_create_header(buf, end - buf, type);
358
  return sk_send(sk, end - buf);
359
}
360

    
361
/**
362
 * bgp_schedule_packet - schedule a packet for transmission
363
 * @conn: connection
364
 * @type: packet type
365
 *
366
 * Schedule a packet of type @type to be sent as soon as possible.
367
 */
368
void
369
bgp_schedule_packet(struct bgp_conn *conn, int type)
370
{
371
  DBG("BGP: Scheduling packet type %d\n", type);
372
  conn->packets_to_send |= 1 << type;
373
  if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
374
    while (bgp_fire_tx(conn))
375
      ;
376
}
377

    
378
void
379
bgp_tx(sock *sk)
380
{
381
  struct bgp_conn *conn = sk->data;
382

    
383
  DBG("BGP: TX hook\n");
384
  while (bgp_fire_tx(conn))
385
    ;
386
}
387

    
388
/* Capatibility negotiation as per RFC 2842 */
389

    
390
void
391
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
392
{
393
  struct bgp_proto *p = conn->bgp;
394
  int cl;
395
  u32 as;
396

    
397
  while (len > 0)
398
    {
399
      if (len < 2 || len < 2 + opt[1])
400
        goto err;
401
      
402
      cl = opt[1];
403

    
404
      switch (opt[0])
405
        {
406
        case 65:
407
          if (cl != 4)
408
            goto err;
409
          p->as4_support = 1;
410
          if (bgp_as4_support)
411
            conn->advertised_as = get_u32(opt + 2);
412
          break;
413

    
414
          /* We can safely ignore all other capabilities */
415
        }
416
      len -= 2 + cl;
417
      opt += 2 + cl;
418
    }
419
  return;
420

    
421
    err:
422
  bgp_error(conn, 2, 0, NULL, 0);
423
  return;
424
}
425

    
426
static int
427
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
428
{
429
  int ol;
430

    
431
  while (len > 0)
432
    {
433
      if (len < 2 || len < 2 + opt[1])
434
        { bgp_error(conn, 2, 0, NULL, 0); return 0; }
435
#ifdef LOCAL_DEBUG
436
      {
437
        int i;
438
        DBG("\tOption %02x:", opt[0]);
439
        for(i=0; i<opt[1]; i++)
440
          DBG(" %02x", opt[2+i]);
441
        DBG("\n");
442
      }
443
#endif
444

    
445
      ol = opt[1];
446
      switch (opt[0])
447
        {
448
        case 2:
449
          bgp_parse_capabilities(conn, opt + 2, ol);
450
          break;
451

    
452
        default:
453
          /*
454
           *  BGP specs don't tell us to send which option
455
           *  we didn't recognize, but it's common practice
456
           *  to do so. Also, capability negotiation with
457
           *  Cisco routers doesn't work without that.
458
           */
459
          bgp_error(conn, 2, 4, opt, ol);
460
          return 0;
461
        }
462
      len -= 2 + ol;
463
      opt += 2 + ol;
464
    }
465
  return 0;
466
}
467

    
468
static void
469
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
470
{
471
  struct bgp_conn *other;
472
  struct bgp_proto *p = conn->bgp;
473
  struct bgp_config *cf = p->cf;
474
  unsigned hold;
475
  u32 id;
476

    
477
  /* Check state */
478
  if (conn->state != BS_OPENSENT)
479
    { bgp_error(conn, 5, 0, NULL, 0); }
480

    
481
  /* Check message contents */
482
  if (len < 29 || len != 29 + pkt[28])
483
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
484
  if (pkt[19] != BGP_VERSION)
485
    { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
486
  conn->advertised_as = get_u16(pkt+20);
487
  hold = get_u16(pkt+22);
488
  id = get_u32(pkt+24);
489
  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
490

    
491
  p->remote_id = id; // ???
492
  if (bgp_parse_options(conn, pkt+29, pkt[28]))
493
    return;
494

    
495
  if (hold > 0 && hold < 3)
496
    { bgp_error(conn, 2, 6, pkt+22, 2); return; }
497

    
498
  if (!id || id == 0xffffffff || id == p->local_id)
499
    { bgp_error(conn, 2, 3, pkt+24, -4); return; }
500

    
501

    
502
  if (conn->advertised_as != p->remote_as)
503
    {
504
      bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return;
505
    }
506

    
507
  /* Check the other connection */
508
  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
509
  switch (other->state)
510
    {
511
    case BS_IDLE:
512
      break;
513
    case BS_CONNECT:
514
    case BS_ACTIVE:
515
    case BS_OPENSENT:
516
      BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
517
      bgp_close_conn(other);
518
      break;
519
    case BS_OPENCONFIRM:
520
      if ((p->local_id < id) == (conn == &p->incoming_conn))
521
        {
522
          /* Should close the other connection */
523
          BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
524
          bgp_error(other, 6, 0, NULL, 0);
525
          break;
526
        }
527
      /* Fall thru */
528
    case BS_ESTABLISHED:
529
      /* Should close this connection */
530
      BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
531
      bgp_error(conn, 6, 0, NULL, 0);
532
      return;
533
    default:
534
      bug("bgp_rx_open: Unknown state");
535
    }
536

    
537
  /* Make this connection primary */
538
  conn->primary = 1;
539
  p->conn = conn;
540

    
541
  /* Update our local variables */
542
  if (hold < p->cf->hold_time)
543
    conn->hold_time = hold;
544
  else
545
    conn->hold_time = p->cf->hold_time;
546
  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
547
  // p->remote_as = conn->advertised_as;
548
  p->remote_id = id;
549
  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id);
550

    
551
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
552
  bgp_start_timer(conn->hold_timer, conn->hold_time);
553
  conn->state = BS_OPENCONFIRM;
554
}
555

    
556
#define DECODE_PREFIX(pp, ll) do {                \
557
  int b = *pp++;                                \
558
  int q;                                        \
559
  ll--;                                                \
560
  if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \
561
  q = (b+7) / 8;                                \
562
  if (ll < q) { err=1; goto bad; }                \
563
  memcpy(&prefix, pp, q);                        \
564
  pp += q;                                        \
565
  ll -= q;                                        \
566
  ipa_ntoh(prefix);                                \
567
  prefix = ipa_and(prefix, ipa_mkmask(b));        \
568
  pxlen = b;                                        \
569
} while (0)
570

    
571
static inline int
572
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
573
{
574
  neighbor *neigh;
575
  ip_addr nexthop;
576
  struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
577
  ASSERT(nh);
578
  nexthop = *(ip_addr *) nh->u.ptr->data;
579
  neigh = neigh_find(&bgp->p, &nexthop, 0);
580
  if (neigh)
581
    {
582
      if (neigh->scope == SCOPE_HOST)
583
        {
584
          DBG("BGP: Loop!\n");
585
          return 0;
586
        }
587
    }
588
  else
589
    neigh = bgp->neigh;
590
  a->gw = neigh->addr;
591
  a->iface = neigh->iface;
592
  return 1;
593
}
594

    
595
#ifndef IPV6                /* IPv4 version */
596

    
597
static void
598
bgp_do_rx_update(struct bgp_conn *conn,
599
                 byte *withdrawn, int withdrawn_len,
600
                 byte *nlri, int nlri_len,
601
                 byte *attrs, int attr_len)
602
{
603
  struct bgp_proto *p = conn->bgp;
604
  rta *a0;
605
  rta *a = NULL;
606
  ip_addr prefix;
607
  net *n;
608
  int err = 0, pxlen;
609

    
610
  /* Withdraw routes */
611
  while (withdrawn_len)
612
    {
613
      DECODE_PREFIX(withdrawn, withdrawn_len);
614
      DBG("Withdraw %I/%d\n", prefix, pxlen);
615
      if (n = net_find(p->p.table, prefix, pxlen))
616
        rte_update(p->p.table, n, &p->p, NULL);
617
    }
618

    
619
  if (!attr_len && !nlri_len)                /* shortcut */
620
    return;
621

    
622
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
623
  if (a0 && nlri_len && bgp_get_nexthop(p, a0))
624
    {
625
      a = rta_lookup(a0);
626
      while (nlri_len)
627
        {
628
          rte *e;
629
          DECODE_PREFIX(nlri, nlri_len);
630
          DBG("Add %I/%d\n", prefix, pxlen);
631
          e = rte_get_temp(rta_clone(a));
632
          n = net_get(p->p.table, prefix, pxlen);
633
          e->net = n;
634
          e->pflags = 0;
635
          rte_update(p->p.table, n, &p->p, e);
636
        }
637
    }
638
bad:
639
  if (a)
640
    rta_free(a);
641
  if (err)
642
    bgp_error(conn, 3, err, NULL, 0);
643
  return;
644
}
645

    
646
#else                        /* IPv6 version */
647

    
648
#define DO_NLRI(name)                                        \
649
  start = x = p->name##_start;                                \
650
  len = len0 = p->name##_len;                                \
651
  if (len)                                                \
652
    {                                                        \
653
      if (len < 3) goto bad;                                \
654
      af = get_u16(x);                                        \
655
      sub = x[2];                                        \
656
      x += 3;                                                \
657
      len -= 3;                                                \
658
      DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
659
    }                                                        \
660
  else                                                        \
661
    af = 0;                                                \
662
  if (af == BGP_AF_IPV6)
663

    
664
static void
665
bgp_do_rx_update(struct bgp_conn *conn,
666
                 byte *withdrawn, int withdrawn_len,
667
                 byte *nlri, int nlri_len,
668
                 byte *attrs, int attr_len)
669
{
670
  struct bgp_proto *p = conn->bgp;
671
  byte *start, *x;
672
  int len, len0;
673
  unsigned af, sub;
674
  rta *a0;
675
  rta *a = NULL;
676
  ip_addr prefix;
677
  net *n;
678
  rte e;
679
  int err = 0, pxlen;
680

    
681
  p->mp_reach_len = 0;
682
  p->mp_unreach_len = 0;
683
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
684
  if (!a0)
685
    return;
686

    
687
  DO_NLRI(mp_unreach)
688
    {
689
      while (len)
690
        {
691
          DECODE_PREFIX(x, len);
692
          DBG("Withdraw %I/%d\n", prefix, pxlen);
693
          if (n = net_find(p->p.table, prefix, pxlen))
694
            rte_update(p->p.table, n, &p->p, NULL);
695
        }
696
    }
697

    
698
  DO_NLRI(mp_reach)
699
    {
700
      int i;
701

    
702
      /* Create fake NEXT_HOP attribute */
703
      if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
704
        goto bad;
705
      bgp_attach_attr_ip(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, x[1]);
706
      len -= *x + 2;
707
      x += *x + 1;
708

    
709
      /* Ignore SNPA info */
710
      i = *x++;
711
      while (i--)
712
        {
713
          if (len < 1 || len < 1 + *x)
714
            goto bad;
715
          len -= *x + 1;
716
          x += *x + 1;
717
        }
718

    
719
      if (bgp_get_nexthop(p, a0))
720
        {
721
          a = rta_lookup(a0);
722
          while (len)
723
            {
724
              rte *e;
725
              DECODE_PREFIX(x, len);
726
              DBG("Add %I/%d\n", prefix, pxlen);
727
              e = rte_get_temp(rta_clone(a));
728
              n = net_get(p->p.table, prefix, pxlen);
729
              e->net = n;
730
              e->pflags = 0;
731
              rte_update(p->p.table, n, &p->p, e);
732
            }
733
          rta_free(a);
734
        }
735
    }
736

    
737
  return;
738

    
739
bad:
740
  bgp_error(conn, 3, 9, start, len0);
741
  if (a)
742
    rta_free(a);
743
  return;
744
}
745

    
746
#endif
747

    
748
static void
749
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
750
{
751
  struct bgp_proto *p = conn->bgp;
752
  byte *withdrawn, *attrs, *nlri;
753
  int withdrawn_len, attr_len, nlri_len;
754

    
755
  BGP_TRACE(D_PACKETS, "Got UPDATE");
756
  if (conn->state != BS_ESTABLISHED)
757
    { bgp_error(conn, 5, 0, NULL, 0); return; }
758
  bgp_start_timer(conn->hold_timer, conn->hold_time);
759

    
760
  /* Find parts of the packet and check sizes */
761
  if (len < 23)
762
    {
763
      bgp_error(conn, 1, 2, pkt+16, 2);
764
      return;
765
    }
766
  withdrawn = pkt + 21;
767
  withdrawn_len = get_u16(pkt + 19);
768
  if (withdrawn_len + 23 > len)
769
    goto malformed;
770
  attrs = withdrawn + withdrawn_len + 2;
771
  attr_len = get_u16(attrs - 2);
772
  if (withdrawn_len + attr_len + 23 > len)
773
    goto malformed;
774
  nlri = attrs + attr_len;
775
  nlri_len = len - withdrawn_len - attr_len - 23;
776
  if (!attr_len && nlri_len)
777
    goto malformed;
778
  DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
779

    
780
  lp_flush(bgp_linpool);
781

    
782
  bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
783
  return;
784

    
785
malformed:
786
  bgp_error(conn, 3, 1, NULL, 0);
787
}
788

    
789
static struct {
790
  byte major, minor;
791
  byte *msg;
792
} bgp_msg_table[] = {
793
  { 1, 0, "Invalid message header" },
794
  { 1, 1, "Connection not synchronized" },
795
  { 1, 2, "Bad message length" },
796
  { 1, 3, "Bad message type" },
797
  { 2, 0, "Invalid OPEN message" },
798
  { 2, 1, "Unsupported version number" },
799
  { 2, 2, "Bad peer AS" },
800
  { 2, 3, "Bad BGP identifier" },
801
  { 2, 4, "Unsupported optional parameter" },
802
  { 2, 5, "Authentication failure" },
803
  { 2, 6, "Unacceptable hold time" },
804
  { 2, 7, "Required capability missing" }, /* [RFC3392] */
805
  { 3, 0, "Invalid UPDATE message" },
806
  { 3, 1, "Malformed attribute list" },
807
  { 3, 2, "Unrecognized well-known attribute" },
808
  { 3, 3, "Missing mandatory attribute" },
809
  { 3, 4, "Invalid attribute flags" },
810
  { 3, 5, "Invalid attribute length" },
811
  { 3, 6, "Invalid ORIGIN attribute" },
812
  { 3, 7, "AS routing loop" },                /* Deprecated */
813
  { 3, 8, "Invalid NEXT_HOP attribute" },
814
  { 3, 9, "Optional attribute error" },
815
  { 3, 10, "Invalid network field" },
816
  { 3, 11, "Malformed AS_PATH" },
817
  { 4, 0, "Hold timer expired" },
818
  { 5, 0, "Finite state machine error" },
819
  { 6, 0, "Cease" }
820
};
821

    
822
void
823
bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
824
{
825
  byte *name, namebuf[16];
826
  byte *t, argbuf[36];
827
  unsigned i;
828

    
829
  if (code == 6 && !subcode)                /* Don't report Cease messages */
830
    return;
831

    
832
  bsprintf(namebuf, "%d.%d", code, subcode);
833
  name = namebuf;
834
  for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
835
    if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
836
      {
837
        name = bgp_msg_table[i].msg;
838
        break;
839
      }
840
  t = argbuf;
841
  if (len)
842
    {
843
      *t++ = ':';
844
      *t++ = ' ';
845
      if (len > 16)
846
        len = 16;
847
      for (i=0; i<len; i++)
848
        t += bsprintf(t, "%02x", data[i]);
849
    }
850
  *t = 0;
851
  log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
852
}
853

    
854
static void
855
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
856
{
857
  if (len < 21)
858
    {
859
      bgp_error(conn, 1, 2, pkt+16, 2);
860
      return;
861
    }
862
  bgp_log_error(conn->bgp, "Received error notification", pkt[19], pkt[20], pkt+21, len-21);
863
  conn->error_flag = 1;
864
  if (conn->primary)
865
    proto_notify_state(&conn->bgp->p, PS_STOP);
866
  bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
867
}
868

    
869
static void
870
bgp_rx_keepalive(struct bgp_conn *conn)
871
{
872
  struct bgp_proto *p = conn->bgp;
873

    
874
  BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
875
  bgp_start_timer(conn->hold_timer, conn->hold_time);
876
  switch (conn->state)
877
    {
878
    case BS_OPENCONFIRM:
879
      DBG("BGP: UP!!!\n");
880
      conn->state = BS_ESTABLISHED;
881
      bgp_attr_init(conn->bgp);
882
      proto_notify_state(&conn->bgp->p, PS_UP);
883
      break;
884
    case BS_ESTABLISHED:
885
      break;
886
    default:
887
      bgp_error(conn, 5, 0, NULL, 0);
888
    }
889
}
890

    
891
/**
892
 * bgp_rx_packet - handle a received packet
893
 * @conn: BGP connection
894
 * @pkt: start of the packet
895
 * @len: packet size
896
 *
897
 * bgp_rx_packet() takes a newly received packet and calls the corresponding
898
 * packet handler according to the packet type.
899
 */
900
static void
901
bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
902
{
903
  DBG("BGP: Got packet %02x (%d bytes)\n", pkt[18], len);
904
  switch (pkt[18])
905
    {
906
    case PKT_OPEN:                return bgp_rx_open(conn, pkt, len);
907
    case PKT_UPDATE:                return bgp_rx_update(conn, pkt, len);
908
    case PKT_NOTIFICATION:      return bgp_rx_notification(conn, pkt, len);
909
    case PKT_KEEPALIVE:                return bgp_rx_keepalive(conn);
910
    default:                        bgp_error(conn, 1, 3, pkt+18, 1);
911
    }
912
}
913

    
914
/**
915
 * bgp_rx - handle received data
916
 * @sk: socket
917
 * @size: amount of data received
918
 *
919
 * bgp_rx() is called by the socket layer whenever new data arrive from
920
 * the underlying TCP connection. It assembles the data fragments to packets,
921
 * checks their headers and framing and passes complete packets to
922
 * bgp_rx_packet().
923
 */
924
int
925
bgp_rx(sock *sk, int size)
926
{
927
  struct bgp_conn *conn = sk->data;
928
  byte *pkt_start = sk->rbuf;
929
  byte *end = pkt_start + size;
930
  unsigned i, len;
931

    
932
  DBG("BGP: RX hook: Got %d bytes\n", size);
933
  while (end >= pkt_start + BGP_HEADER_LENGTH)
934
    {
935
      if (conn->error_flag)
936
        {
937
          /*
938
           *  We still need to remember the erroneous packet, so that
939
           *  we can generate error notifications properly.  To avoid
940
           *  subsequent reads rewriting the buffer, we just reset the
941
           *  rx_hook.
942
           */
943
          DBG("BGP: Error, dropping input\n");
944
          sk->rx_hook = NULL;
945
          return 0;
946
        }
947
      for(i=0; i<16; i++)
948
        if (pkt_start[i] != 0xff)
949
          {
950
            bgp_error(conn, 1, 1, NULL, 0);
951
            break;
952
          }
953
      len = get_u16(pkt_start+16);
954
      if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
955
        {
956
          bgp_error(conn, 1, 2, pkt_start+16, 2);
957
          break;
958
        }
959
      if (end < pkt_start + len)
960
        break;
961
      bgp_rx_packet(conn, pkt_start, len);
962
      pkt_start += len;
963
    }
964
  if (pkt_start != sk->rbuf)
965
    {
966
      memmove(sk->rbuf, pkt_start, end - pkt_start);
967
      sk->rpos = sk->rbuf + (end - pkt_start);
968
    }
969
  return 0;
970
}