Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / packets.c @ 4827b69f

History | View | Annotate | Download (26.6 KB)

1
/*
2
 *        BIRD -- BGP Packet Processing
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#undef LOCAL_DEBUG
10

    
11
#include "nest/bird.h"
12
#include "nest/iface.h"
13
#include "nest/protocol.h"
14
#include "nest/route.h"
15
#include "nest/attrs.h"
16
#include "conf/conf.h"
17
#include "lib/unaligned.h"
18
#include "lib/socket.h"
19

    
20
#include "nest/cli.h"
21

    
22
#include "bgp.h"
23

    
24
static struct rate_limit rl_rcv_update,  rl_snd_update;
25

    
26
static byte *
27
bgp_create_notification(struct bgp_conn *conn, byte *buf)
28
{
29
  struct bgp_proto *p = conn->bgp;
30

    
31
  BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
32
  buf[0] = conn->notify_code;
33
  buf[1] = conn->notify_subcode;
34
  memcpy(buf+2, conn->notify_data, conn->notify_size);
35
  return buf + 2 + conn->notify_size;
36
}
37

    
38
#ifdef IPV6
39
static byte *
40
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
41
{
42
  *buf++ = 1;                /* Capability 1: Multiprotocol extensions */
43
  *buf++ = 4;                /* Capability data length */
44
  *buf++ = 0;                /* We support AF IPv6 */
45
  *buf++ = BGP_AF_IPV6;
46
  *buf++ = 0;                /* RFU */
47
  *buf++ = 1;                /* and SAFI 1 */
48
  return buf;
49
}
50

    
51
#else
52

    
53
static byte *
54
bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
55
{
56
  *buf++ = 1;                /* Capability 1: Multiprotocol extensions */
57
  *buf++ = 4;                /* Capability data length */
58
  *buf++ = 0;                /* We support AF IPv4 */
59
  *buf++ = BGP_AF_IPV4;
60
  *buf++ = 0;                /* RFU */
61
  *buf++ = 1;                /* and SAFI 1 */
62
  return buf;
63
}
64
#endif
65

    
66
static byte *
67
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
68
{
69
  *buf++ = 65;                /* Capability 65: Support for 4-octet AS number */
70
  *buf++ = 4;                /* Capability data length */
71
  put_u32(buf, conn->bgp->local_as);
72
  return buf + 4;
73
}
74

    
75
static byte *
76
bgp_create_open(struct bgp_conn *conn, byte *buf)
77
{
78
  struct bgp_proto *p = conn->bgp;
79
  byte *cap;
80
  int cap_len;
81

    
82
  BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
83
            BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
84
  buf[0] = BGP_VERSION;
85
  put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
86
  put_u16(buf+3, p->cf->hold_time);
87
  put_u32(buf+5, p->local_id);
88

    
89
  if (conn->start_state == BSS_CONNECT_NOCAP)
90
    {
91
      BGP_TRACE(D_PACKETS, "Skipping capabilities");
92
      buf[9] = 0;
93
      return buf + 10;
94
    }
95

    
96
  /* Skipped 3 B for length field and Capabilities parameter header */
97
  cap = buf + 12;
98

    
99
#ifndef IPV6
100
  if (p->cf->advertise_ipv4)
101
    cap = bgp_put_cap_ipv4(conn, cap);
102
#endif
103

    
104
#ifdef IPV6
105
  cap = bgp_put_cap_ipv6(conn, cap);
106
#endif
107

    
108
  if (conn->want_as4_support)
109
    cap = bgp_put_cap_as4(conn, cap);
110

    
111
  cap_len = cap - buf - 12;
112
  if (cap_len > 0)
113
    {
114
      buf[9]  = cap_len + 2;        /* Optional params len */
115
      buf[10] = 2;                /* Option: Capability list */
116
      buf[11] = cap_len;        /* Option length */
117
      return cap;
118
    }
119
  else
120
    {
121
      buf[9] = 0;                /* No optional parameters */
122
      return buf + 10;
123
    }
124
}
125

    
126
static unsigned int
127
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
128
{
129
  byte *start = w;
130
  ip_addr a;
131
  int bytes;
132

    
133
  while (!EMPTY_LIST(buck->prefixes) && remains >= 5)
134
    {
135
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
136
      DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
137
      *w++ = px->n.pxlen;
138
      bytes = (px->n.pxlen + 7) / 8;
139
      a = px->n.prefix;
140
      ipa_hton(a);
141
      memcpy(w, &a, bytes);
142
      w += bytes;
143
      remains -= bytes + 1;
144
      rem_node(&px->bucket_node);
145
      fib_delete(&p->prefix_fib, px);
146
    }
147
  return w - start;
148
}
149

    
150
static void
151
bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
152
{
153
  while (!EMPTY_LIST(buck->prefixes))
154
    {
155
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
156
      log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
157
      rem_node(&px->bucket_node);
158
      fib_delete(&p->prefix_fib, px);
159
    }
160
}
161

    
162
#ifndef IPV6                /* IPv4 version */
163

    
164
static byte *
165
bgp_create_update(struct bgp_conn *conn, byte *buf)
166
{
167
  struct bgp_proto *p = conn->bgp;
168
  struct bgp_bucket *buck;
169
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
170
  byte *w;
171
  int wd_size = 0;
172
  int r_size = 0;
173
  int a_size = 0;
174

    
175
  w = buf+2;
176
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
177
    {
178
      DBG("Withdrawn routes:\n");
179
      wd_size = bgp_encode_prefixes(p, w, buck, remains);
180
      w += wd_size;
181
      remains -= wd_size;
182
    }
183
  put_u16(buf, wd_size);
184

    
185
  if (remains >= 3072)
186
    {
187
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
188
        {
189
          if (EMPTY_LIST(buck->prefixes))
190
            {
191
              DBG("Deleting empty bucket %p\n", buck);
192
              rem_node(&buck->send_node);
193
              bgp_free_bucket(p, buck);
194
              continue;
195
            }
196

    
197
          DBG("Processing bucket %p\n", buck);
198
          a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048);
199

    
200
          if (a_size < 0)
201
            {
202
              log(L_ERR "%s: Attribute list too long, skipping corresponding route group", p->p.name);
203
              bgp_flush_prefixes(p, buck);
204
              rem_node(&buck->send_node);
205
              bgp_free_bucket(p, buck);
206
              continue;
207
            }
208

    
209
          put_u16(w, a_size);
210
          w += a_size + 2;
211
          r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
212
          w += r_size;
213
          break;
214
        }
215
    }
216
  if (!a_size)                                /* Attributes not already encoded */
217
    {
218
      put_u16(w, 0);
219
      w += 2;
220
    }
221
  if (wd_size || r_size)
222
    {
223
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
224
      return w;
225
    }
226
  else
227
    return NULL;
228
}
229

    
230
#else                /* IPv6 version */
231

    
232
static byte *
233
bgp_create_update(struct bgp_conn *conn, byte *buf)
234
{
235
  struct bgp_proto *p = conn->bgp;
236
  struct bgp_bucket *buck;
237
  int size;
238
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
239
  byte *w, *tmp, *tstart;
240
  ip_addr *ipp, ip, ip_ll;
241
  ea_list *ea;
242
  eattr *nh;
243
  neighbor *n;
244

    
245
  put_u16(buf, 0);
246
  w = buf+4;
247

    
248
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
249
    {
250
      DBG("Withdrawn routes:\n");
251
      tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
252
      *tmp++ = 0;
253
      *tmp++ = BGP_AF_IPV6;
254
      *tmp++ = 1;
255
      ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
256
      size = bgp_encode_attrs(p, w, ea, remains);
257
      ASSERT(size >= 0);
258
      w += size;
259
      remains -= size;
260
    }
261

    
262
  if (remains >= 3072)
263
    {
264
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
265
        {
266
          if (EMPTY_LIST(buck->prefixes))
267
            {
268
              DBG("Deleting empty bucket %p\n", buck);
269
              rem_node(&buck->send_node);
270
              bgp_free_bucket(p, buck);
271
              continue;
272
            }
273

    
274
          DBG("Processing bucket %p\n", buck);
275
          size = bgp_encode_attrs(p, w, buck->eattrs, 2048);
276

    
277
          if (size < 0)
278
            {
279
              log(L_ERR "%s: Attribute list too long, ignoring corresponding route group", p->p.name);
280
              bgp_flush_prefixes(p, buck);
281
              rem_node(&buck->send_node);
282
              bgp_free_bucket(p, buck);
283
              continue;
284
            }
285

    
286
          w += size;
287
          remains -= size;
288
          tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
289
          *tmp++ = 0;
290
          *tmp++ = BGP_AF_IPV6;
291
          *tmp++ = 1;
292
          nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
293
          ASSERT(nh);
294

    
295
          /* We have two addresses here in 'nh'. Really. */
296
          ipp = (ip_addr *) nh->u.ptr->data;
297
          ip = ipp[0];
298
          ip_ll = IPA_NONE;
299

    
300
          if (ipa_equal(ip, p->source_addr))
301
            ip_ll = p->local_link;
302
          else
303
            {
304
              /* If we send a route with 'third party' next hop destinated 
305
               * in the same interface, we should also send a link local 
306
               * next hop address. We use the received one (stored in the 
307
               * other part of BA_NEXT_HOP eattr). If we didn't received
308
               * it (for example it is a static route), we do not send link
309
               * local next hop address. It is contrary to RFC 2545, but
310
               * probably the only sane possibility.
311
               */
312

    
313
              n = neigh_find(&p->p, &ip, 0);
314
              if (n && n->iface == p->neigh->iface)
315
                ip_ll = ipp[1];
316
            }
317

    
318
          if (ipa_nonzero(ip_ll))
319
            {
320
              *tmp++ = 32;
321
              ipa_hton(ip);
322
              memcpy(tmp, &ip, 16);
323
              ipa_hton(ip_ll);
324
              memcpy(tmp+16, &ip_ll, 16);
325
              tmp += 32;
326
            }
327
          else
328
            {
329
              *tmp++ = 16;
330
              ipa_hton(ip);
331
              memcpy(tmp, &ip, 16);
332
              tmp += 16;
333
            }
334

    
335
          *tmp++ = 0;                        /* No SNPA information */
336
          tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
337
          ea->attrs[0].u.ptr->length = tmp - tstart;
338
          size = bgp_encode_attrs(p, w, ea, remains);
339
          ASSERT(size >= 0);
340
          w += size;
341
          break;
342
        }
343
    }
344

    
345
  size = w - (buf+4);
346
  put_u16(buf+2, size);
347
  lp_flush(bgp_linpool);
348
  if (size)
349
    {
350
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
351
      return w;
352
    }
353
  else
354
    return NULL;
355
}
356

    
357
#endif
358

    
359
static void
360
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
361
{
362
  memset(buf, 0xff, 16);                /* Marker */
363
  put_u16(buf+16, len);
364
  buf[18] = type;
365
}
366

    
367
/**
368
 * bgp_fire_tx - transmit packets
369
 * @conn: connection
370
 *
371
 * Whenever the transmit buffers of the underlying TCP connection
372
 * are free and we have any packets queued for sending, the socket functions
373
 * call bgp_fire_tx() which takes care of selecting the highest priority packet
374
 * queued (Notification > Keepalive > Open > Update), assembling its header
375
 * and body and sending it to the connection.
376
 */
377
static int
378
bgp_fire_tx(struct bgp_conn *conn)
379
{
380
  struct bgp_proto *p = conn->bgp;
381
  unsigned int s = conn->packets_to_send;
382
  sock *sk = conn->sk;
383
  byte *buf, *pkt, *end;
384
  int type;
385

    
386
  if (!sk)
387
    {
388
      conn->packets_to_send = 0;
389
      return 0;
390
    }
391
  buf = sk->tbuf;
392
  pkt = buf + BGP_HEADER_LENGTH;
393

    
394
  if (s & (1 << PKT_SCHEDULE_CLOSE))
395
    {
396
      /* We can finally close connection and enter idle state */
397
      bgp_conn_enter_idle_state(conn);
398
      return 0;
399
    }
400
  if (s & (1 << PKT_NOTIFICATION))
401
    {
402
      s = 1 << PKT_SCHEDULE_CLOSE;
403
      type = PKT_NOTIFICATION;
404
      end = bgp_create_notification(conn, pkt);
405
    }
406
  else if (s & (1 << PKT_KEEPALIVE))
407
    {
408
      s &= ~(1 << PKT_KEEPALIVE);
409
      type = PKT_KEEPALIVE;
410
      end = pkt;                        /* Keepalives carry no data */
411
      BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
412
      bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
413
    }
414
  else if (s & (1 << PKT_OPEN))
415
    {
416
      s &= ~(1 << PKT_OPEN);
417
      type = PKT_OPEN;
418
      end = bgp_create_open(conn, pkt);
419
    }
420
  else if (s & (1 << PKT_UPDATE))
421
    {
422
      end = bgp_create_update(conn, pkt);
423
      type = PKT_UPDATE;
424
      if (!end)
425
        {
426
          conn->packets_to_send = 0;
427
          return 0;
428
        }
429
    }
430
  else
431
    return 0;
432
  conn->packets_to_send = s;
433
  bgp_create_header(buf, end - buf, type);
434
  return sk_send(sk, end - buf);
435
}
436

    
437
/**
438
 * bgp_schedule_packet - schedule a packet for transmission
439
 * @conn: connection
440
 * @type: packet type
441
 *
442
 * Schedule a packet of type @type to be sent as soon as possible.
443
 */
444
void
445
bgp_schedule_packet(struct bgp_conn *conn, int type)
446
{
447
  DBG("BGP: Scheduling packet type %d\n", type);
448
  conn->packets_to_send |= 1 << type;
449
  if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
450
    ev_schedule(conn->tx_ev);
451
}
452

    
453
void
454
bgp_kick_tx(void *vconn)
455
{
456
  struct bgp_conn *conn = vconn;
457

    
458
  DBG("BGP: kicking TX\n");
459
  while (bgp_fire_tx(conn))
460
    ;
461
}
462

    
463
void
464
bgp_tx(sock *sk)
465
{
466
  struct bgp_conn *conn = sk->data;
467

    
468
  DBG("BGP: TX hook\n");
469
  while (bgp_fire_tx(conn))
470
    ;
471
}
472

    
473
/* Capatibility negotiation as per RFC 2842 */
474

    
475
void
476
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
477
{
478
  struct bgp_proto *p = conn->bgp;
479
  int cl;
480
  u32 as;
481

    
482
  while (len > 0)
483
    {
484
      if (len < 2 || len < 2 + opt[1])
485
        goto err;
486
      
487
      cl = opt[1];
488

    
489
      switch (opt[0])
490
        {
491
        case 65:
492
          if (cl != 4)
493
            goto err;
494
          conn->peer_as4_support = 1;
495
          if (conn->want_as4_support)
496
            conn->advertised_as = get_u32(opt + 2);
497
          break;
498

    
499
          /* We can safely ignore all other capabilities */
500
        }
501
      len -= 2 + cl;
502
      opt += 2 + cl;
503
    }
504
  return;
505

    
506
    err:
507
  bgp_error(conn, 2, 0, NULL, 0);
508
  return;
509
}
510

    
511
static int
512
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
513
{
514
  struct bgp_proto *p = conn->bgp;
515
  int ol;
516

    
517
  while (len > 0)
518
    {
519
      if (len < 2 || len < 2 + opt[1])
520
        { bgp_error(conn, 2, 0, NULL, 0); return 0; }
521
#ifdef LOCAL_DEBUG
522
      {
523
        int i;
524
        DBG("\tOption %02x:", opt[0]);
525
        for(i=0; i<opt[1]; i++)
526
          DBG(" %02x", opt[2+i]);
527
        DBG("\n");
528
      }
529
#endif
530

    
531
      ol = opt[1];
532
      switch (opt[0])
533
        {
534
        case 2:
535
          if (conn->start_state == BSS_CONNECT_NOCAP)
536
            BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
537
          else
538
            bgp_parse_capabilities(conn, opt + 2, ol);
539
          break;
540

    
541
        default:
542
          /*
543
           *  BGP specs don't tell us to send which option
544
           *  we didn't recognize, but it's common practice
545
           *  to do so. Also, capability negotiation with
546
           *  Cisco routers doesn't work without that.
547
           */
548
          bgp_error(conn, 2, 4, opt, ol);
549
          return 0;
550
        }
551
      len -= 2 + ol;
552
      opt += 2 + ol;
553
    }
554
  return 0;
555
}
556

    
557
static void
558
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
559
{
560
  struct bgp_conn *other;
561
  struct bgp_proto *p = conn->bgp;
562
  struct bgp_config *cf = p->cf;
563
  unsigned hold;
564
  u16 base_as;
565
  u32 id;
566

    
567
  /* Check state */
568
  if (conn->state != BS_OPENSENT)
569
    { bgp_error(conn, 5, 0, NULL, 0); return; }
570

    
571
  /* Check message contents */
572
  if (len < 29 || len != 29 + pkt[28])
573
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
574
  if (pkt[19] != BGP_VERSION)
575
    { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
576
  conn->advertised_as = base_as = get_u16(pkt+20);
577
  hold = get_u16(pkt+22);
578
  id = get_u32(pkt+24);
579
  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
580

    
581
  if (bgp_parse_options(conn, pkt+29, pkt[28]))
582
    return;
583

    
584
  if (hold > 0 && hold < 3)
585
    { bgp_error(conn, 2, 6, pkt+22, 2); return; }
586

    
587
  if (!id || id == 0xffffffff || id == p->local_id)
588
    { bgp_error(conn, 2, 3, pkt+24, -4); return; }
589

    
590
  if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
591
    log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
592

    
593
  if (conn->advertised_as != p->remote_as)
594
    { bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return; }
595

    
596
  /* Check the other connection */
597
  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
598
  switch (other->state)
599
    {
600
    case BS_IDLE:
601
    case BS_CONNECT:
602
    case BS_ACTIVE:
603
    case BS_OPENSENT:
604
    case BS_CLOSE:
605
      break;
606
    case BS_OPENCONFIRM:
607
      if ((p->local_id < id) == (conn == &p->incoming_conn))
608
        {
609
          /* Should close the other connection */
610
          BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
611
          bgp_error(other, 6, 0, NULL, 0);
612
          break;
613
        }
614
      /* Fall thru */
615
    case BS_ESTABLISHED:
616
      /* Should close this connection */
617
      BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
618
      bgp_error(conn, 6, 0, NULL, 0);
619
      return;
620
    default:
621
      bug("bgp_rx_open: Unknown state");
622
    }
623

    
624
  /* Update our local variables */
625
  conn->hold_time = MIN(hold, p->cf->hold_time);
626
  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
627
  p->remote_id = id;
628
  p->as4_session = conn->want_as4_support && conn->peer_as4_support;
629

    
630
  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
631

    
632
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
633
  bgp_start_timer(conn->hold_timer, conn->hold_time);
634
  conn->state = BS_OPENCONFIRM;
635
}
636

    
637
#define DECODE_PREFIX(pp, ll) do {                \
638
  int b = *pp++;                                \
639
  int q;                                        \
640
  ll--;                                                \
641
  if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \
642
  q = (b+7) / 8;                                \
643
  if (ll < q) { err=1; goto bad; }                \
644
  memcpy(&prefix, pp, q);                        \
645
  pp += q;                                        \
646
  ll -= q;                                        \
647
  ipa_ntoh(prefix);                                \
648
  prefix = ipa_and(prefix, ipa_mkmask(b));        \
649
  pxlen = b;                                        \
650
} while (0)
651

    
652
static inline int
653
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
654
{
655
  neighbor *neigh;
656
  ip_addr nexthop;
657
  struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
658
  ASSERT(nh);
659
  nexthop = *(ip_addr *) nh->u.ptr->data;
660
  neigh = neigh_find(&bgp->p, &nexthop, 0);
661
  if (neigh)
662
    {
663
      if (neigh->scope == SCOPE_HOST)
664
        {
665
          DBG("BGP: Loop!\n");
666
          return 0;
667
        }
668
    }
669
  else
670
    neigh = bgp->neigh;
671
  a->gw = neigh->addr;
672
  a->iface = neigh->iface;
673
  return 1;
674
}
675

    
676
#ifndef IPV6                /* IPv4 version */
677

    
678
static void
679
bgp_do_rx_update(struct bgp_conn *conn,
680
                 byte *withdrawn, int withdrawn_len,
681
                 byte *nlri, int nlri_len,
682
                 byte *attrs, int attr_len)
683
{
684
  struct bgp_proto *p = conn->bgp;
685
  rta *a0;
686
  rta *a = NULL;
687
  ip_addr prefix;
688
  net *n;
689
  int err = 0, pxlen;
690

    
691
  /* Withdraw routes */
692
  while (withdrawn_len)
693
    {
694
      DECODE_PREFIX(withdrawn, withdrawn_len);
695
      DBG("Withdraw %I/%d\n", prefix, pxlen);
696
      if (n = net_find(p->p.table, prefix, pxlen))
697
        rte_update(p->p.table, n, &p->p, NULL);
698
    }
699

    
700
  if (!attr_len && !nlri_len)                /* shortcut */
701
    return;
702

    
703
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
704
  if (a0 && nlri_len && bgp_get_nexthop(p, a0))
705
    {
706
      a = rta_lookup(a0);
707
      while (nlri_len)
708
        {
709
          rte *e;
710
          DECODE_PREFIX(nlri, nlri_len);
711
          DBG("Add %I/%d\n", prefix, pxlen);
712
          e = rte_get_temp(rta_clone(a));
713
          n = net_get(p->p.table, prefix, pxlen);
714
          e->net = n;
715
          e->pflags = 0;
716
          rte_update(p->p.table, n, &p->p, e);
717
        }
718
    }
719
bad:
720
  if (a)
721
    rta_free(a);
722
  if (err)
723
    bgp_error(conn, 3, err, NULL, 0);
724
  return;
725
}
726

    
727
#else                        /* IPv6 version */
728

    
729
#define DO_NLRI(name)                                        \
730
  start = x = p->name##_start;                                \
731
  len = len0 = p->name##_len;                                \
732
  if (len)                                                \
733
    {                                                        \
734
      if (len < 3) goto bad;                                \
735
      af = get_u16(x);                                        \
736
      sub = x[2];                                        \
737
      x += 3;                                                \
738
      len -= 3;                                                \
739
      DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
740
    }                                                        \
741
  else                                                        \
742
    af = 0;                                                \
743
  if (af == BGP_AF_IPV6)
744

    
745
static void
746
bgp_do_rx_update(struct bgp_conn *conn,
747
                 byte *withdrawn, int withdrawn_len,
748
                 byte *nlri, int nlri_len,
749
                 byte *attrs, int attr_len)
750
{
751
  struct bgp_proto *p = conn->bgp;
752
  byte *start, *x;
753
  int len, len0;
754
  unsigned af, sub;
755
  rta *a0;
756
  rta *a = NULL;
757
  ip_addr prefix;
758
  net *n;
759
  rte e;
760
  int err = 0, pxlen;
761

    
762
  p->mp_reach_len = 0;
763
  p->mp_unreach_len = 0;
764
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
765
  if (!a0)
766
    return;
767

    
768
  DO_NLRI(mp_unreach)
769
    {
770
      while (len)
771
        {
772
          DECODE_PREFIX(x, len);
773
          DBG("Withdraw %I/%d\n", prefix, pxlen);
774
          if (n = net_find(p->p.table, prefix, pxlen))
775
            rte_update(p->p.table, n, &p->p, NULL);
776
        }
777
    }
778

    
779
  DO_NLRI(mp_reach)
780
    {
781
      int i;
782

    
783
      /* Create fake NEXT_HOP attribute */
784
      if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
785
        goto bad;
786

    
787
      ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
788
      memcpy(nh, x+1, 16);
789
      ipa_ntoh(nh[0]);
790

    
791
      /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
792
      if (*x == 32)
793
        {
794
          memcpy(nh+1, x+17, 16);
795
          ipa_ntoh(nh[1]);
796
        }
797
      else
798
        nh[1] = IPA_NONE;
799

    
800
      /* Also ignore one reserved byte */
801
      len -= *x + 2;
802
      x += *x + 2;
803

    
804
      if (bgp_get_nexthop(p, a0))
805
        {
806
          a = rta_lookup(a0);
807
          while (len)
808
            {
809
              rte *e;
810
              DECODE_PREFIX(x, len);
811
              DBG("Add %I/%d\n", prefix, pxlen);
812
              e = rte_get_temp(rta_clone(a));
813
              n = net_get(p->p.table, prefix, pxlen);
814
              e->net = n;
815
              e->pflags = 0;
816
              rte_update(p->p.table, n, &p->p, e);
817
            }
818
          rta_free(a);
819
        }
820
    }
821

    
822
  return;
823

    
824
bad:
825
  bgp_error(conn, 3, 9, start, len0);
826
  if (a)
827
    rta_free(a);
828
  return;
829
}
830

    
831
#endif
832

    
833
static void
834
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
835
{
836
  struct bgp_proto *p = conn->bgp;
837
  byte *withdrawn, *attrs, *nlri;
838
  int withdrawn_len, attr_len, nlri_len;
839

    
840
  BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
841

    
842
  if (conn->state != BS_ESTABLISHED)
843
    { bgp_error(conn, 5, 0, NULL, 0); return; }
844
  bgp_start_timer(conn->hold_timer, conn->hold_time);
845

    
846
  /* Find parts of the packet and check sizes */
847
  if (len < 23)
848
    {
849
      bgp_error(conn, 1, 2, pkt+16, 2);
850
      return;
851
    }
852
  withdrawn = pkt + 21;
853
  withdrawn_len = get_u16(pkt + 19);
854
  if (withdrawn_len + 23 > len)
855
    goto malformed;
856
  attrs = withdrawn + withdrawn_len + 2;
857
  attr_len = get_u16(attrs - 2);
858
  if (withdrawn_len + attr_len + 23 > len)
859
    goto malformed;
860
  nlri = attrs + attr_len;
861
  nlri_len = len - withdrawn_len - attr_len - 23;
862
  if (!attr_len && nlri_len)
863
    goto malformed;
864
  DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
865

    
866
  lp_flush(bgp_linpool);
867

    
868
  bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
869
  return;
870

    
871
malformed:
872
  bgp_error(conn, 3, 1, NULL, 0);
873
}
874

    
875
static struct {
876
  byte major, minor;
877
  byte *msg;
878
} bgp_msg_table[] = {
879
  { 1, 0, "Invalid message header" },
880
  { 1, 1, "Connection not synchronized" },
881
  { 1, 2, "Bad message length" },
882
  { 1, 3, "Bad message type" },
883
  { 2, 0, "Invalid OPEN message" },
884
  { 2, 1, "Unsupported version number" },
885
  { 2, 2, "Bad peer AS" },
886
  { 2, 3, "Bad BGP identifier" },
887
  { 2, 4, "Unsupported optional parameter" },
888
  { 2, 5, "Authentication failure" },
889
  { 2, 6, "Unacceptable hold time" },
890
  { 2, 7, "Required capability missing" }, /* [RFC3392] */
891
  { 3, 0, "Invalid UPDATE message" },
892
  { 3, 1, "Malformed attribute list" },
893
  { 3, 2, "Unrecognized well-known attribute" },
894
  { 3, 3, "Missing mandatory attribute" },
895
  { 3, 4, "Invalid attribute flags" },
896
  { 3, 5, "Invalid attribute length" },
897
  { 3, 6, "Invalid ORIGIN attribute" },
898
  { 3, 7, "AS routing loop" },                /* Deprecated */
899
  { 3, 8, "Invalid NEXT_HOP attribute" },
900
  { 3, 9, "Optional attribute error" },
901
  { 3, 10, "Invalid network field" },
902
  { 3, 11, "Malformed AS_PATH" },
903
  { 4, 0, "Hold timer expired" },
904
  { 5, 0, "Finite state machine error" },
905
  { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
906
  { 6, 1, "Maximum number of prefixes reached" },
907
  { 6, 2, "Administrative shutdown" },
908
  { 6, 3, "Peer de-configured" },
909
  { 6, 4, "Administrative reset" },
910
  { 6, 5, "Connection rejected" },
911
  { 6, 6, "Other configuration change" },
912
  { 6, 7, "Connection collision resolution" },
913
  { 6, 8, "Out of Resources" }
914
};
915

    
916
/**
917
 * bgp_error_dsc - return BGP error description
918
 * @buff: temporary buffer
919
 * @code: BGP error code
920
 * @subcode: BGP error subcode
921
 *
922
 * bgp_error_dsc() returns error description for BGP errors
923
 * which might be static string or given temporary buffer.
924
 */
925
const byte *
926
bgp_error_dsc(byte *buff, unsigned code, unsigned subcode)
927
{
928
  unsigned i;
929
  for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
930
    if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
931
      {
932
        return bgp_msg_table[i].msg;
933
      }
934

    
935
  bsprintf(buff, "Unknown error %d.%d", code, subcode);
936
  return buff;
937
}
938

    
939
void
940
bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
941
{
942
  const byte *name;
943
  byte namebuf[32];
944
  byte *t, argbuf[36];
945
  unsigned i;
946

    
947
  if (code == 6 && !subcode)                /* Don't report Cease messages */
948
    return;
949

    
950
  name = bgp_error_dsc(namebuf, code, subcode);
951
  t = argbuf;
952
  if (len)
953
    {
954
      *t++ = ':';
955
      *t++ = ' ';
956
      if (len > 16)
957
        len = 16;
958
      for (i=0; i<len; i++)
959
        t += bsprintf(t, "%02x", data[i]);
960
    }
961
  *t = 0;
962
  log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
963
}
964

    
965
static void
966
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
967
{
968
  struct bgp_proto *p = conn->bgp;
969
  if (len < 21)
970
    {
971
      bgp_error(conn, 1, 2, pkt+16, 2);
972
      return;
973
    }
974

    
975
  unsigned code = pkt[19];
976
  unsigned subcode = pkt[20];
977
  int delay = 1;
978

    
979
  bgp_log_error(conn->bgp, "Received error notification", code, subcode, pkt+21, len-21);
980
  bgp_store_error(conn->bgp, conn, BE_BGP_RX, (code << 16) | subcode);
981

    
982
#ifndef IPV6
983
  if ((code == 2) && ((subcode == 4) || (subcode == 7))
984
      /* Error related to capability:
985
       * 4 - Peer does not support capabilities at all.
986
       * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
987
       */
988
      && (p->cf->capabilities == 2)
989
      /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
990
      && (conn->start_state == BSS_CONNECT)
991
      /* Failed connection attempt have used capabilities */
992
      && (p->cf->remote_as <= 0xFFFF))
993
      /* Not possible with disabled capabilities */
994
    {
995
      /* We try connect without capabilities */
996
      log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
997
      conn->bgp->start_state = BSS_CONNECT_NOCAP;
998
      delay = 0;
999
    }
1000
#endif
1001

    
1002
  if (delay) bgp_update_startup_delay(conn->bgp, conn, code, subcode);
1003
  bgp_conn_enter_close_state(conn);
1004
  bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1005
}
1006

    
1007
static void
1008
bgp_rx_keepalive(struct bgp_conn *conn)
1009
{
1010
  struct bgp_proto *p = conn->bgp;
1011

    
1012
  BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1013
  bgp_start_timer(conn->hold_timer, conn->hold_time);
1014
  switch (conn->state)
1015
    {
1016
    case BS_OPENCONFIRM:
1017
      bgp_conn_enter_established_state(conn);
1018
      break;
1019
    case BS_ESTABLISHED:
1020
      break;
1021
    default:
1022
      bgp_error(conn, 5, 0, NULL, 0);
1023
    }
1024
}
1025

    
1026
/**
1027
 * bgp_rx_packet - handle a received packet
1028
 * @conn: BGP connection
1029
 * @pkt: start of the packet
1030
 * @len: packet size
1031
 *
1032
 * bgp_rx_packet() takes a newly received packet and calls the corresponding
1033
 * packet handler according to the packet type.
1034
 */
1035
static void
1036
bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1037
{
1038
  DBG("BGP: Got packet %02x (%d bytes)\n", pkt[18], len);
1039
  switch (pkt[18])
1040
    {
1041
    case PKT_OPEN:                return bgp_rx_open(conn, pkt, len);
1042
    case PKT_UPDATE:                return bgp_rx_update(conn, pkt, len);
1043
    case PKT_NOTIFICATION:      return bgp_rx_notification(conn, pkt, len);
1044
    case PKT_KEEPALIVE:                return bgp_rx_keepalive(conn);
1045
    default:                        bgp_error(conn, 1, 3, pkt+18, 1);
1046
    }
1047
}
1048

    
1049
/**
1050
 * bgp_rx - handle received data
1051
 * @sk: socket
1052
 * @size: amount of data received
1053
 *
1054
 * bgp_rx() is called by the socket layer whenever new data arrive from
1055
 * the underlying TCP connection. It assembles the data fragments to packets,
1056
 * checks their headers and framing and passes complete packets to
1057
 * bgp_rx_packet().
1058
 */
1059
int
1060
bgp_rx(sock *sk, int size)
1061
{
1062
  struct bgp_conn *conn = sk->data;
1063
  byte *pkt_start = sk->rbuf;
1064
  byte *end = pkt_start + size;
1065
  unsigned i, len;
1066

    
1067
  DBG("BGP: RX hook: Got %d bytes\n", size);
1068
  while (end >= pkt_start + BGP_HEADER_LENGTH)
1069
    {
1070
      if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1071
        return 0;
1072
      for(i=0; i<16; i++)
1073
        if (pkt_start[i] != 0xff)
1074
          {
1075
            bgp_error(conn, 1, 1, NULL, 0);
1076
            break;
1077
          }
1078
      len = get_u16(pkt_start+16);
1079
      if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
1080
        {
1081
          bgp_error(conn, 1, 2, pkt_start+16, 2);
1082
          break;
1083
        }
1084
      if (end < pkt_start + len)
1085
        break;
1086
      bgp_rx_packet(conn, pkt_start, len);
1087
      pkt_start += len;
1088
    }
1089
  if (pkt_start != sk->rbuf)
1090
    {
1091
      memmove(sk->rbuf, pkt_start, end - pkt_start);
1092
      sk->rpos = sk->rbuf + (end - pkt_start);
1093
    }
1094
  return 0;
1095
}