Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / packets.c @ bf47fe4b

History | View | Annotate | Download (29.5 KB)

1
/*
2
 *        BIRD -- BGP Packet Processing
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#undef LOCAL_DEBUG
10

    
11
#include "nest/bird.h"
12
#include "nest/iface.h"
13
#include "nest/protocol.h"
14
#include "nest/route.h"
15
#include "nest/attrs.h"
16
#include "conf/conf.h"
17
#include "lib/unaligned.h"
18
#include "lib/socket.h"
19

    
20
#include "nest/cli.h"
21

    
22
#include "bgp.h"
23

    
24
static struct rate_limit rl_rcv_update,  rl_snd_update;
25

    
26
static byte *
27
bgp_create_notification(struct bgp_conn *conn, byte *buf)
28
{
29
  struct bgp_proto *p = conn->bgp;
30

    
31
  BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
32
  buf[0] = conn->notify_code;
33
  buf[1] = conn->notify_subcode;
34
  memcpy(buf+2, conn->notify_data, conn->notify_size);
35
  return buf + 2 + conn->notify_size;
36
}
37

    
38
#ifdef IPV6
39
static byte *
40
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
41
{
42
  *buf++ = 1;                /* Capability 1: Multiprotocol extensions */
43
  *buf++ = 4;                /* Capability data length */
44
  *buf++ = 0;                /* We support AF IPv6 */
45
  *buf++ = BGP_AF_IPV6;
46
  *buf++ = 0;                /* RFU */
47
  *buf++ = 1;                /* and SAFI 1 */
48
  return buf;
49
}
50

    
51
#else
52

    
53
static byte *
54
bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
55
{
56
  *buf++ = 1;                /* Capability 1: Multiprotocol extensions */
57
  *buf++ = 4;                /* Capability data length */
58
  *buf++ = 0;                /* We support AF IPv4 */
59
  *buf++ = BGP_AF_IPV4;
60
  *buf++ = 0;                /* RFU */
61
  *buf++ = 1;                /* and SAFI 1 */
62
  return buf;
63
}
64
#endif
65

    
66
static byte *
67
bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
68
{
69
  *buf++ = 2;                /* Capability 2: Support for route refresh */
70
  *buf++ = 0;                /* Capability data length */
71
  return buf;
72
}
73

    
74
static byte *
75
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
76
{
77
  *buf++ = 65;                /* Capability 65: Support for 4-octet AS number */
78
  *buf++ = 4;                /* Capability data length */
79
  put_u32(buf, conn->bgp->local_as);
80
  return buf + 4;
81
}
82

    
83
static byte *
84
bgp_create_open(struct bgp_conn *conn, byte *buf)
85
{
86
  struct bgp_proto *p = conn->bgp;
87
  byte *cap;
88
  int cap_len;
89

    
90
  BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
91
            BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
92
  buf[0] = BGP_VERSION;
93
  put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
94
  put_u16(buf+3, p->cf->hold_time);
95
  put_u32(buf+5, p->local_id);
96

    
97
  if (conn->start_state == BSS_CONNECT_NOCAP)
98
    {
99
      BGP_TRACE(D_PACKETS, "Skipping capabilities");
100
      buf[9] = 0;
101
      return buf + 10;
102
    }
103

    
104
  /* Skipped 3 B for length field and Capabilities parameter header */
105
  cap = buf + 12;
106

    
107
#ifndef IPV6
108
  if (p->cf->advertise_ipv4)
109
    cap = bgp_put_cap_ipv4(conn, cap);
110
#endif
111

    
112
#ifdef IPV6
113
  cap = bgp_put_cap_ipv6(conn, cap);
114
#endif
115

    
116
  if (p->cf->enable_refresh)
117
    cap = bgp_put_cap_rr(conn, cap);
118

    
119
  if (conn->want_as4_support)
120
    cap = bgp_put_cap_as4(conn, cap);
121

    
122
  cap_len = cap - buf - 12;
123
  if (cap_len > 0)
124
    {
125
      buf[9]  = cap_len + 2;        /* Optional params len */
126
      buf[10] = 2;                /* Option: Capability list */
127
      buf[11] = cap_len;        /* Option length */
128
      return cap;
129
    }
130
  else
131
    {
132
      buf[9] = 0;                /* No optional parameters */
133
      return buf + 10;
134
    }
135
}
136

    
137
static unsigned int
138
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
139
{
140
  byte *start = w;
141
  ip_addr a;
142
  int bytes;
143

    
144
  while (!EMPTY_LIST(buck->prefixes) && remains >= 5)
145
    {
146
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
147
      DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
148
      *w++ = px->n.pxlen;
149
      bytes = (px->n.pxlen + 7) / 8;
150
      a = px->n.prefix;
151
      ipa_hton(a);
152
      memcpy(w, &a, bytes);
153
      w += bytes;
154
      remains -= bytes + 1;
155
      rem_node(&px->bucket_node);
156
      fib_delete(&p->prefix_fib, px);
157
    }
158
  return w - start;
159
}
160

    
161
static void
162
bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
163
{
164
  while (!EMPTY_LIST(buck->prefixes))
165
    {
166
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
167
      log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
168
      rem_node(&px->bucket_node);
169
      fib_delete(&p->prefix_fib, px);
170
    }
171
}
172

    
173
#ifndef IPV6                /* IPv4 version */
174

    
175
static byte *
176
bgp_create_update(struct bgp_conn *conn, byte *buf)
177
{
178
  struct bgp_proto *p = conn->bgp;
179
  struct bgp_bucket *buck;
180
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
181
  byte *w;
182
  int wd_size = 0;
183
  int r_size = 0;
184
  int a_size = 0;
185

    
186
  w = buf+2;
187
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
188
    {
189
      DBG("Withdrawn routes:\n");
190
      wd_size = bgp_encode_prefixes(p, w, buck, remains);
191
      w += wd_size;
192
      remains -= wd_size;
193
    }
194
  put_u16(buf, wd_size);
195

    
196
  if (remains >= 3072)
197
    {
198
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
199
        {
200
          if (EMPTY_LIST(buck->prefixes))
201
            {
202
              DBG("Deleting empty bucket %p\n", buck);
203
              rem_node(&buck->send_node);
204
              bgp_free_bucket(p, buck);
205
              continue;
206
            }
207

    
208
          DBG("Processing bucket %p\n", buck);
209
          a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048);
210

    
211
          if (a_size < 0)
212
            {
213
              log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
214
              bgp_flush_prefixes(p, buck);
215
              rem_node(&buck->send_node);
216
              bgp_free_bucket(p, buck);
217
              continue;
218
            }
219

    
220
          put_u16(w, a_size);
221
          w += a_size + 2;
222
          r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
223
          w += r_size;
224
          break;
225
        }
226
    }
227
  if (!a_size)                                /* Attributes not already encoded */
228
    {
229
      put_u16(w, 0);
230
      w += 2;
231
    }
232
  if (wd_size || r_size)
233
    {
234
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
235
      return w;
236
    }
237
  else
238
    return NULL;
239
}
240

    
241
#else                /* IPv6 version */
242

    
243
static byte *
244
bgp_create_update(struct bgp_conn *conn, byte *buf)
245
{
246
  struct bgp_proto *p = conn->bgp;
247
  struct bgp_bucket *buck;
248
  int size, second, rem_stored;
249
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
250
  byte *w, *w_stored, *tmp, *tstart;
251
  ip_addr *ipp, ip, ip_ll;
252
  ea_list *ea;
253
  eattr *nh;
254
  neighbor *n;
255

    
256
  put_u16(buf, 0);
257
  w = buf+4;
258

    
259
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
260
    {
261
      DBG("Withdrawn routes:\n");
262
      tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
263
      *tmp++ = 0;
264
      *tmp++ = BGP_AF_IPV6;
265
      *tmp++ = 1;
266
      ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
267
      size = bgp_encode_attrs(p, w, ea, remains);
268
      ASSERT(size >= 0);
269
      w += size;
270
      remains -= size;
271
    }
272

    
273
  if (remains >= 3072)
274
    {
275
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
276
        {
277
          if (EMPTY_LIST(buck->prefixes))
278
            {
279
              DBG("Deleting empty bucket %p\n", buck);
280
              rem_node(&buck->send_node);
281
              bgp_free_bucket(p, buck);
282
              continue;
283
            }
284

    
285
          DBG("Processing bucket %p\n", buck);
286
          rem_stored = remains;
287
          w_stored = w;
288

    
289
          size = bgp_encode_attrs(p, w, buck->eattrs, 2048);
290
          if (size < 0)
291
            {
292
              log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
293
              bgp_flush_prefixes(p, buck);
294
              rem_node(&buck->send_node);
295
              bgp_free_bucket(p, buck);
296
              continue;
297
            }
298
          w += size;
299
          remains -= size;
300

    
301
          /* We have two addresses here in NEXT_HOP eattr. Really.
302
             Unless NEXT_HOP was modified by filter */
303
          nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
304
          ASSERT(nh);
305
          second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
306
          ipp = (ip_addr *) nh->u.ptr->data;
307
          ip = ipp[0];
308
          ip_ll = IPA_NONE;
309

    
310
          if (ipa_equal(ip, p->source_addr))
311
            ip_ll = p->local_link;
312
          else
313
            {
314
              /* If we send a route with 'third party' next hop destinated 
315
               * in the same interface, we should also send a link local 
316
               * next hop address. We use the received one (stored in the 
317
               * other part of BA_NEXT_HOP eattr). If we didn't received
318
               * it (for example it is a static route), we can't use
319
               * 'third party' next hop and we have to use local IP address
320
               * as next hop. Sending original next hop address without
321
               * link local address seems to be a natural way to solve that
322
               * problem, but it is contrary to RFC 2545 and Quagga does not
323
               * accept such routes.
324
               */
325

    
326
              n = neigh_find(&p->p, &ip, 0);
327
              if (n && n->iface == p->neigh->iface)
328
                {
329
                  if (second && ipa_nonzero(ipp[1]))
330
                    ip_ll = ipp[1];
331
                  else
332
                    {
333
                      switch (p->cf->missing_lladdr)
334
                        {
335
                        case MLL_SELF:
336
                          ip = p->source_addr;
337
                          ip_ll = p->local_link;
338
                          break;
339
                        case MLL_DROP:
340
                          log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
341
                          w = w_stored;
342
                          remains = rem_stored;
343
                          bgp_flush_prefixes(p, buck);
344
                          rem_node(&buck->send_node);
345
                          bgp_free_bucket(p, buck);
346
                          continue;
347
                        case MLL_IGNORE:
348
                          break;
349
                        }
350
                    }
351
                }
352
            }
353

    
354
          tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
355
          *tmp++ = 0;
356
          *tmp++ = BGP_AF_IPV6;
357
          *tmp++ = 1;
358

    
359
          if (ipa_nonzero(ip_ll))
360
            {
361
              *tmp++ = 32;
362
              ipa_hton(ip);
363
              memcpy(tmp, &ip, 16);
364
              ipa_hton(ip_ll);
365
              memcpy(tmp+16, &ip_ll, 16);
366
              tmp += 32;
367
            }
368
          else
369
            {
370
              *tmp++ = 16;
371
              ipa_hton(ip);
372
              memcpy(tmp, &ip, 16);
373
              tmp += 16;
374
            }
375

    
376
          *tmp++ = 0;                        /* No SNPA information */
377
          tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
378
          ea->attrs[0].u.ptr->length = tmp - tstart;
379
          size = bgp_encode_attrs(p, w, ea, remains);
380
          ASSERT(size >= 0);
381
          w += size;
382
          break;
383
        }
384
    }
385

    
386
  size = w - (buf+4);
387
  put_u16(buf+2, size);
388
  lp_flush(bgp_linpool);
389
  if (size)
390
    {
391
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
392
      return w;
393
    }
394
  else
395
    return NULL;
396
}
397

    
398
#endif
399

    
400
static byte *
401
bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
402
{
403
  struct bgp_proto *p = conn->bgp;
404
  BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
405

    
406
#ifdef IPV6
407
  *buf++ = 0;                /* AFI IPv6 */
408
  *buf++ = BGP_AF_IPV6;
409
#else
410
  *buf++ = 0;                /* AFI IPv4 */
411
  *buf++ = BGP_AF_IPV4;
412
#endif
413
  *buf++ = 0;                /* RFU */
414
  *buf++ = 1;                /* and SAFI 1 */
415
  return buf;
416
}
417

    
418
static void
419
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
420
{
421
  memset(buf, 0xff, 16);                /* Marker */
422
  put_u16(buf+16, len);
423
  buf[18] = type;
424
}
425

    
426
/**
427
 * bgp_fire_tx - transmit packets
428
 * @conn: connection
429
 *
430
 * Whenever the transmit buffers of the underlying TCP connection
431
 * are free and we have any packets queued for sending, the socket functions
432
 * call bgp_fire_tx() which takes care of selecting the highest priority packet
433
 * queued (Notification > Keepalive > Open > Update), assembling its header
434
 * and body and sending it to the connection.
435
 */
436
static int
437
bgp_fire_tx(struct bgp_conn *conn)
438
{
439
  struct bgp_proto *p = conn->bgp;
440
  unsigned int s = conn->packets_to_send;
441
  sock *sk = conn->sk;
442
  byte *buf, *pkt, *end;
443
  int type;
444

    
445
  if (!sk)
446
    {
447
      conn->packets_to_send = 0;
448
      return 0;
449
    }
450
  buf = sk->tbuf;
451
  pkt = buf + BGP_HEADER_LENGTH;
452

    
453
  if (s & (1 << PKT_SCHEDULE_CLOSE))
454
    {
455
      /* We can finally close connection and enter idle state */
456
      bgp_conn_enter_idle_state(conn);
457
      return 0;
458
    }
459
  if (s & (1 << PKT_NOTIFICATION))
460
    {
461
      s = 1 << PKT_SCHEDULE_CLOSE;
462
      type = PKT_NOTIFICATION;
463
      end = bgp_create_notification(conn, pkt);
464
    }
465
  else if (s & (1 << PKT_KEEPALIVE))
466
    {
467
      s &= ~(1 << PKT_KEEPALIVE);
468
      type = PKT_KEEPALIVE;
469
      end = pkt;                        /* Keepalives carry no data */
470
      BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
471
      bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
472
    }
473
  else if (s & (1 << PKT_OPEN))
474
    {
475
      s &= ~(1 << PKT_OPEN);
476
      type = PKT_OPEN;
477
      end = bgp_create_open(conn, pkt);
478
    }
479
  else if (s & (1 << PKT_ROUTE_REFRESH))
480
    {
481
      s &= ~(1 << PKT_ROUTE_REFRESH);
482
      type = PKT_ROUTE_REFRESH;
483
      end = bgp_create_route_refresh(conn, pkt);
484
    }
485
  else if (s & (1 << PKT_UPDATE))
486
    {
487
      end = bgp_create_update(conn, pkt);
488
      type = PKT_UPDATE;
489
      if (!end)
490
        {
491
          conn->packets_to_send = 0;
492
          return 0;
493
        }
494
    }
495
  else
496
    return 0;
497
  conn->packets_to_send = s;
498
  bgp_create_header(buf, end - buf, type);
499
  return sk_send(sk, end - buf);
500
}
501

    
502
/**
503
 * bgp_schedule_packet - schedule a packet for transmission
504
 * @conn: connection
505
 * @type: packet type
506
 *
507
 * Schedule a packet of type @type to be sent as soon as possible.
508
 */
509
void
510
bgp_schedule_packet(struct bgp_conn *conn, int type)
511
{
512
  DBG("BGP: Scheduling packet type %d\n", type);
513
  conn->packets_to_send |= 1 << type;
514
  if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
515
    ev_schedule(conn->tx_ev);
516
}
517

    
518
void
519
bgp_kick_tx(void *vconn)
520
{
521
  struct bgp_conn *conn = vconn;
522

    
523
  DBG("BGP: kicking TX\n");
524
  while (bgp_fire_tx(conn))
525
    ;
526
}
527

    
528
void
529
bgp_tx(sock *sk)
530
{
531
  struct bgp_conn *conn = sk->data;
532

    
533
  DBG("BGP: TX hook\n");
534
  while (bgp_fire_tx(conn))
535
    ;
536
}
537

    
538
/* Capatibility negotiation as per RFC 2842 */
539

    
540
void
541
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
542
{
543
  struct bgp_proto *p = conn->bgp;
544
  int cl;
545

    
546
  while (len > 0)
547
    {
548
      if (len < 2 || len < 2 + opt[1])
549
        goto err;
550
      
551
      cl = opt[1];
552

    
553
      switch (opt[0])
554
        {
555
        case 2:
556
          if (cl != 0)
557
            goto err;
558
          conn->peer_refresh_support = 1;
559
          break;
560
        case 65:
561
          if (cl != 4)
562
            goto err;
563
          conn->peer_as4_support = 1;
564
          if (conn->want_as4_support)
565
            conn->advertised_as = get_u32(opt + 2);
566
          break;
567

    
568
          /* We can safely ignore all other capabilities */
569
        }
570
      len -= 2 + cl;
571
      opt += 2 + cl;
572
    }
573
  return;
574

    
575
    err:
576
  bgp_error(conn, 2, 0, NULL, 0);
577
  return;
578
}
579

    
580
static int
581
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
582
{
583
  struct bgp_proto *p = conn->bgp;
584
  int ol;
585

    
586
  while (len > 0)
587
    {
588
      if (len < 2 || len < 2 + opt[1])
589
        { bgp_error(conn, 2, 0, NULL, 0); return 0; }
590
#ifdef LOCAL_DEBUG
591
      {
592
        int i;
593
        DBG("\tOption %02x:", opt[0]);
594
        for(i=0; i<opt[1]; i++)
595
          DBG(" %02x", opt[2+i]);
596
        DBG("\n");
597
      }
598
#endif
599

    
600
      ol = opt[1];
601
      switch (opt[0])
602
        {
603
        case 2:
604
          if (conn->start_state == BSS_CONNECT_NOCAP)
605
            BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
606
          else
607
            bgp_parse_capabilities(conn, opt + 2, ol);
608
          break;
609

    
610
        default:
611
          /*
612
           *  BGP specs don't tell us to send which option
613
           *  we didn't recognize, but it's common practice
614
           *  to do so. Also, capability negotiation with
615
           *  Cisco routers doesn't work without that.
616
           */
617
          bgp_error(conn, 2, 4, opt, ol);
618
          return 0;
619
        }
620
      len -= 2 + ol;
621
      opt += 2 + ol;
622
    }
623
  return 0;
624
}
625

    
626
static void
627
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
628
{
629
  struct bgp_conn *other;
630
  struct bgp_proto *p = conn->bgp;
631
  unsigned hold;
632
  u16 base_as;
633
  u32 id;
634

    
635
  /* Check state */
636
  if (conn->state != BS_OPENSENT)
637
    { bgp_error(conn, 5, 0, NULL, 0); return; }
638

    
639
  /* Check message contents */
640
  if (len < 29 || len != 29 + pkt[28])
641
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
642
  if (pkt[19] != BGP_VERSION)
643
    { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
644
  conn->advertised_as = base_as = get_u16(pkt+20);
645
  hold = get_u16(pkt+22);
646
  id = get_u32(pkt+24);
647
  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
648

    
649
  if (bgp_parse_options(conn, pkt+29, pkt[28]))
650
    return;
651

    
652
  if (hold > 0 && hold < 3)
653
    { bgp_error(conn, 2, 6, pkt+22, 2); return; }
654

    
655
  if (!id || id == 0xffffffff || id == p->local_id)
656
    { bgp_error(conn, 2, 3, pkt+24, -4); return; }
657

    
658
  if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
659
    log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
660

    
661
  if (conn->advertised_as != p->remote_as)
662
    {
663
      if (conn->peer_as4_support)
664
        {
665
          u32 val = htonl(conn->advertised_as);
666
          bgp_error(conn, 2, 2, (byte *) &val, 4);
667
        }
668
      else
669
        bgp_error(conn, 2, 2, pkt+20, 2);
670

    
671
      return;
672
    }
673

    
674
  /* Check the other connection */
675
  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
676
  switch (other->state)
677
    {
678
    case BS_IDLE:
679
    case BS_CONNECT:
680
    case BS_ACTIVE:
681
    case BS_OPENSENT:
682
    case BS_CLOSE:
683
      break;
684
    case BS_OPENCONFIRM:
685
      if ((p->local_id < id) == (conn == &p->incoming_conn))
686
        {
687
          /* Should close the other connection */
688
          BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
689
          bgp_error(other, 6, 7, NULL, 0);
690
          break;
691
        }
692
      /* Fall thru */
693
    case BS_ESTABLISHED:
694
      /* Should close this connection */
695
      BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
696
      bgp_error(conn, 6, 7, NULL, 0);
697
      return;
698
    default:
699
      bug("bgp_rx_open: Unknown state");
700
    }
701

    
702
  /* Update our local variables */
703
  conn->hold_time = MIN(hold, p->cf->hold_time);
704
  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
705
  p->remote_id = id;
706
  p->as4_session = conn->want_as4_support && conn->peer_as4_support;
707

    
708
  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
709

    
710
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
711
  bgp_start_timer(conn->hold_timer, conn->hold_time);
712
  conn->state = BS_OPENCONFIRM;
713
}
714

    
715
#define DECODE_PREFIX(pp, ll) do {                \
716
  int b = *pp++;                                \
717
  int q;                                        \
718
  ll--;                                                \
719
  if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \
720
  q = (b+7) / 8;                                \
721
  if (ll < q) { err=1; goto bad; }                \
722
  memcpy(&prefix, pp, q);                        \
723
  pp += q;                                        \
724
  ll -= q;                                        \
725
  ipa_ntoh(prefix);                                \
726
  prefix = ipa_and(prefix, ipa_mkmask(b));        \
727
  pxlen = b;                                        \
728
} while (0)
729

    
730
static inline int
731
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
732
{
733
  neighbor *neigh;
734
  ip_addr nexthop;
735
  struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
736
  ASSERT(nh);
737
  nexthop = *(ip_addr *) nh->u.ptr->data;
738
  neigh = neigh_find(&bgp->p, &nexthop, 0);
739
  if (neigh)
740
    {
741
      if (neigh->scope == SCOPE_HOST)
742
        {
743
          DBG("BGP: Loop!\n");
744
          return 0;
745
        }
746
    }
747
  else
748
    neigh = bgp->neigh;
749
  a->gw = neigh->addr;
750
  a->iface = neigh->iface;
751
  return 1;
752
}
753

    
754
#ifndef IPV6                /* IPv4 version */
755

    
756
static void
757
bgp_do_rx_update(struct bgp_conn *conn,
758
                 byte *withdrawn, int withdrawn_len,
759
                 byte *nlri, int nlri_len,
760
                 byte *attrs, int attr_len)
761
{
762
  struct bgp_proto *p = conn->bgp;
763
  rta *a0;
764
  rta *a = NULL;
765
  ip_addr prefix;
766
  net *n;
767
  int err = 0, pxlen;
768

    
769
  /* Withdraw routes */
770
  while (withdrawn_len)
771
    {
772
      DECODE_PREFIX(withdrawn, withdrawn_len);
773
      DBG("Withdraw %I/%d\n", prefix, pxlen);
774
      if (n = net_find(p->p.table, prefix, pxlen))
775
        rte_update(p->p.table, n, &p->p, &p->p, NULL);
776
    }
777

    
778
  if (!attr_len && !nlri_len)                /* shortcut */
779
    return;
780

    
781
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
782
  if (a0 && nlri_len && bgp_get_nexthop(p, a0))
783
    {
784
      a = rta_lookup(a0);
785
      while (nlri_len)
786
        {
787
          rte *e;
788
          DECODE_PREFIX(nlri, nlri_len);
789
          DBG("Add %I/%d\n", prefix, pxlen);
790
          e = rte_get_temp(rta_clone(a));
791
          n = net_get(p->p.table, prefix, pxlen);
792
          e->net = n;
793
          e->pflags = 0;
794
          rte_update(p->p.table, n, &p->p, &p->p, e);
795
          if (bgp_apply_limits(p) < 0)
796
            goto bad2;
797
        }
798
      rta_free(a);
799
    }
800

    
801
  return;
802

    
803
 bad:
804
  bgp_error(conn, 3, err, NULL, 0);
805
 bad2:
806
  if (a)
807
    rta_free(a);
808
  return;
809
}
810

    
811
#else                        /* IPv6 version */
812

    
813
#define DO_NLRI(name)                                        \
814
  start = x = p->name##_start;                                \
815
  len = len0 = p->name##_len;                                \
816
  if (len)                                                \
817
    {                                                        \
818
      if (len < 3) goto bad;                                \
819
      af = get_u16(x);                                        \
820
      sub = x[2];                                        \
821
      x += 3;                                                \
822
      len -= 3;                                                \
823
      DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
824
    }                                                        \
825
  else                                                        \
826
    af = 0;                                                \
827
  if (af == BGP_AF_IPV6)
828

    
829
static void
830
bgp_do_rx_update(struct bgp_conn *conn,
831
                 byte *withdrawn, int withdrawn_len,
832
                 byte *nlri, int nlri_len,
833
                 byte *attrs, int attr_len)
834
{
835
  struct bgp_proto *p = conn->bgp;
836
  byte *start, *x;
837
  int len, len0;
838
  unsigned af, sub;
839
  rta *a0;
840
  rta *a = NULL;
841
  ip_addr prefix;
842
  net *n;
843
  rte e;
844
  int err = 0, pxlen;
845

    
846
  p->mp_reach_len = 0;
847
  p->mp_unreach_len = 0;
848
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
849
  if (!a0)
850
    return;
851

    
852
  DO_NLRI(mp_unreach)
853
    {
854
      while (len)
855
        {
856
          DECODE_PREFIX(x, len);
857
          DBG("Withdraw %I/%d\n", prefix, pxlen);
858
          if (n = net_find(p->p.table, prefix, pxlen))
859
            rte_update(p->p.table, n, &p->p, &p->p, NULL);
860
        }
861
    }
862

    
863
  DO_NLRI(mp_reach)
864
    {
865
      int i;
866

    
867
      /* Create fake NEXT_HOP attribute */
868
      if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
869
        goto bad;
870

    
871
      ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
872
      memcpy(nh, x+1, 16);
873
      ipa_ntoh(nh[0]);
874

    
875
      /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
876
      if (*x == 32)
877
        {
878
          memcpy(nh+1, x+17, 16);
879
          ipa_ntoh(nh[1]);
880
        }
881
      else
882
        nh[1] = IPA_NONE;
883

    
884
      /* Also ignore one reserved byte */
885
      len -= *x + 2;
886
      x += *x + 2;
887

    
888
      if (bgp_get_nexthop(p, a0))
889
        {
890
          a = rta_lookup(a0);
891
          while (len)
892
            {
893
              rte *e;
894
              DECODE_PREFIX(x, len);
895
              DBG("Add %I/%d\n", prefix, pxlen);
896
              e = rte_get_temp(rta_clone(a));
897
              n = net_get(p->p.table, prefix, pxlen);
898
              e->net = n;
899
              e->pflags = 0;
900
              rte_update(p->p.table, n, &p->p, &p->p, e);
901
              if (bgp_apply_limits(p) < 0)
902
                goto bad2;
903
            }
904
          rta_free(a);
905
        }
906
    }
907

    
908
  return;
909

    
910
 bad:
911
  bgp_error(conn, 3, 9, start, len0);
912
 bad2:
913
  if (a)
914
    rta_free(a);
915
  return;
916
}
917

    
918
#endif
919

    
920
static void
921
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
922
{
923
  struct bgp_proto *p = conn->bgp;
924
  byte *withdrawn, *attrs, *nlri;
925
  int withdrawn_len, attr_len, nlri_len;
926

    
927
  BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
928

    
929
  if (conn->state != BS_ESTABLISHED)
930
    { bgp_error(conn, 5, 0, NULL, 0); return; }
931
  bgp_start_timer(conn->hold_timer, conn->hold_time);
932

    
933
  /* Find parts of the packet and check sizes */
934
  if (len < 23)
935
    {
936
      bgp_error(conn, 1, 2, pkt+16, 2);
937
      return;
938
    }
939
  withdrawn = pkt + 21;
940
  withdrawn_len = get_u16(pkt + 19);
941
  if (withdrawn_len + 23 > len)
942
    goto malformed;
943
  attrs = withdrawn + withdrawn_len + 2;
944
  attr_len = get_u16(attrs - 2);
945
  if (withdrawn_len + attr_len + 23 > len)
946
    goto malformed;
947
  nlri = attrs + attr_len;
948
  nlri_len = len - withdrawn_len - attr_len - 23;
949
  if (!attr_len && nlri_len)
950
    goto malformed;
951
  DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
952

    
953
  lp_flush(bgp_linpool);
954

    
955
  bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
956
  return;
957

    
958
malformed:
959
  bgp_error(conn, 3, 1, NULL, 0);
960
}
961

    
962
static struct {
963
  byte major, minor;
964
  byte *msg;
965
} bgp_msg_table[] = {
966
  { 1, 0, "Invalid message header" },
967
  { 1, 1, "Connection not synchronized" },
968
  { 1, 2, "Bad message length" },
969
  { 1, 3, "Bad message type" },
970
  { 2, 0, "Invalid OPEN message" },
971
  { 2, 1, "Unsupported version number" },
972
  { 2, 2, "Bad peer AS" },
973
  { 2, 3, "Bad BGP identifier" },
974
  { 2, 4, "Unsupported optional parameter" },
975
  { 2, 5, "Authentication failure" },
976
  { 2, 6, "Unacceptable hold time" },
977
  { 2, 7, "Required capability missing" }, /* [RFC3392] */
978
  { 3, 0, "Invalid UPDATE message" },
979
  { 3, 1, "Malformed attribute list" },
980
  { 3, 2, "Unrecognized well-known attribute" },
981
  { 3, 3, "Missing mandatory attribute" },
982
  { 3, 4, "Invalid attribute flags" },
983
  { 3, 5, "Invalid attribute length" },
984
  { 3, 6, "Invalid ORIGIN attribute" },
985
  { 3, 7, "AS routing loop" },                /* Deprecated */
986
  { 3, 8, "Invalid NEXT_HOP attribute" },
987
  { 3, 9, "Optional attribute error" },
988
  { 3, 10, "Invalid network field" },
989
  { 3, 11, "Malformed AS_PATH" },
990
  { 4, 0, "Hold timer expired" },
991
  { 5, 0, "Finite state machine error" },
992
  { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
993
  { 6, 1, "Maximum number of prefixes reached" },
994
  { 6, 2, "Administrative shutdown" },
995
  { 6, 3, "Peer de-configured" },
996
  { 6, 4, "Administrative reset" },
997
  { 6, 5, "Connection rejected" },
998
  { 6, 6, "Other configuration change" },
999
  { 6, 7, "Connection collision resolution" },
1000
  { 6, 8, "Out of Resources" }
1001
};
1002

    
1003
/**
1004
 * bgp_error_dsc - return BGP error description
1005
 * @buff: temporary buffer
1006
 * @code: BGP error code
1007
 * @subcode: BGP error subcode
1008
 *
1009
 * bgp_error_dsc() returns error description for BGP errors
1010
 * which might be static string or given temporary buffer.
1011
 */
1012
const byte *
1013
bgp_error_dsc(byte *buff, unsigned code, unsigned subcode)
1014
{
1015
  unsigned i;
1016
  for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1017
    if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1018
      {
1019
        return bgp_msg_table[i].msg;
1020
      }
1021

    
1022
  bsprintf(buff, "Unknown error %d.%d", code, subcode);
1023
  return buff;
1024
}
1025

    
1026
void
1027
bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
1028
{
1029
  const byte *name;
1030
  byte namebuf[32];
1031
  byte *t, argbuf[36];
1032
  unsigned i;
1033

    
1034
  /* Don't report Cease messages generated by myself */
1035
  if (code == 6 && class == BE_BGP_TX)
1036
    return;
1037

    
1038
  name = bgp_error_dsc(namebuf, code, subcode);
1039
  t = argbuf;
1040
  if (len)
1041
    {
1042
      *t++ = ':';
1043
      *t++ = ' ';
1044

    
1045
      if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1046
        {
1047
          /* Bad peer AS - we would like to print the AS */
1048
          t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
1049
          goto done;
1050
        }
1051
      if (len > 16)
1052
        len = 16;
1053
      for (i=0; i<len; i++)
1054
        t += bsprintf(t, "%02x", data[i]);
1055
    }
1056
 done:
1057
  *t = 0;
1058
  log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
1059
}
1060

    
1061
static void
1062
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
1063
{
1064
  struct bgp_proto *p = conn->bgp;
1065
  if (len < 21)
1066
    {
1067
      bgp_error(conn, 1, 2, pkt+16, 2);
1068
      return;
1069
    }
1070

    
1071
  unsigned code = pkt[19];
1072
  unsigned subcode = pkt[20];
1073
  int err = (code != 6);
1074

    
1075
  bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1076
  bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1077

    
1078
#ifndef IPV6
1079
  if ((code == 2) && ((subcode == 4) || (subcode == 7))
1080
      /* Error related to capability:
1081
       * 4 - Peer does not support capabilities at all.
1082
       * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1083
       */
1084
      && (p->cf->capabilities == 2)
1085
      /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1086
      && (conn->start_state == BSS_CONNECT)
1087
      /* Failed connection attempt have used capabilities */
1088
      && (p->cf->remote_as <= 0xFFFF))
1089
      /* Not possible with disabled capabilities */
1090
    {
1091
      /* We try connect without capabilities */
1092
      log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1093
      p->start_state = BSS_CONNECT_NOCAP;
1094
      err = 0;
1095
    }
1096
#endif
1097

    
1098
  bgp_conn_enter_close_state(conn);
1099
  bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1100

    
1101
  if (err) 
1102
    {
1103
      bgp_update_startup_delay(p);
1104
      bgp_stop(p, 0);
1105
    }
1106
}
1107

    
1108
static void
1109
bgp_rx_keepalive(struct bgp_conn *conn)
1110
{
1111
  struct bgp_proto *p = conn->bgp;
1112

    
1113
  BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1114
  bgp_start_timer(conn->hold_timer, conn->hold_time);
1115
  switch (conn->state)
1116
    {
1117
    case BS_OPENCONFIRM:
1118
      bgp_conn_enter_established_state(conn);
1119
      break;
1120
    case BS_ESTABLISHED:
1121
      break;
1122
    default:
1123
      bgp_error(conn, 5, 0, NULL, 0);
1124
    }
1125
}
1126

    
1127
static void
1128
bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, int len)
1129
{
1130
  struct bgp_proto *p = conn->bgp;
1131

    
1132
  BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1133

    
1134
  if (conn->state != BS_ESTABLISHED)
1135
    { bgp_error(conn, 5, 0, NULL, 0); return; }
1136

    
1137
  if (!p->cf->enable_refresh)
1138
    { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1139

    
1140
  if (len != (BGP_HEADER_LENGTH + 4))
1141
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1142

    
1143
  /* FIXME - we ignore AFI/SAFI values, as we support
1144
     just one value and even an error code for an invalid
1145
     request is not defined */
1146

    
1147
  proto_request_feeding(&p->p);
1148
}
1149

    
1150

    
1151
/**
1152
 * bgp_rx_packet - handle a received packet
1153
 * @conn: BGP connection
1154
 * @pkt: start of the packet
1155
 * @len: packet size
1156
 *
1157
 * bgp_rx_packet() takes a newly received packet and calls the corresponding
1158
 * packet handler according to the packet type.
1159
 */
1160
static void
1161
bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1162
{
1163
  DBG("BGP: Got packet %02x (%d bytes)\n", pkt[18], len);
1164
  switch (pkt[18])
1165
    {
1166
    case PKT_OPEN:                return bgp_rx_open(conn, pkt, len);
1167
    case PKT_UPDATE:                return bgp_rx_update(conn, pkt, len);
1168
    case PKT_NOTIFICATION:      return bgp_rx_notification(conn, pkt, len);
1169
    case PKT_KEEPALIVE:                return bgp_rx_keepalive(conn);
1170
    case PKT_ROUTE_REFRESH:        return bgp_rx_route_refresh(conn, pkt, len);
1171
    default:                        bgp_error(conn, 1, 3, pkt+18, 1);
1172
    }
1173
}
1174

    
1175
/**
1176
 * bgp_rx - handle received data
1177
 * @sk: socket
1178
 * @size: amount of data received
1179
 *
1180
 * bgp_rx() is called by the socket layer whenever new data arrive from
1181
 * the underlying TCP connection. It assembles the data fragments to packets,
1182
 * checks their headers and framing and passes complete packets to
1183
 * bgp_rx_packet().
1184
 */
1185
int
1186
bgp_rx(sock *sk, int size)
1187
{
1188
  struct bgp_conn *conn = sk->data;
1189
  byte *pkt_start = sk->rbuf;
1190
  byte *end = pkt_start + size;
1191
  unsigned i, len;
1192

    
1193
  DBG("BGP: RX hook: Got %d bytes\n", size);
1194
  while (end >= pkt_start + BGP_HEADER_LENGTH)
1195
    {
1196
      if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1197
        return 0;
1198
      for(i=0; i<16; i++)
1199
        if (pkt_start[i] != 0xff)
1200
          {
1201
            bgp_error(conn, 1, 1, NULL, 0);
1202
            break;
1203
          }
1204
      len = get_u16(pkt_start+16);
1205
      if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
1206
        {
1207
          bgp_error(conn, 1, 2, pkt_start+16, 2);
1208
          break;
1209
        }
1210
      if (end < pkt_start + len)
1211
        break;
1212
      bgp_rx_packet(conn, pkt_start, len);
1213
      pkt_start += len;
1214
    }
1215
  if (pkt_start != sk->rbuf)
1216
    {
1217
      memmove(sk->rbuf, pkt_start, end - pkt_start);
1218
      sk->rpos = sk->rbuf + (end - pkt_start);
1219
    }
1220
  return 0;
1221
}