Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / packets.c @ 1389f369

History | View | Annotate | Download (22.8 KB)

1
/*
2
 *        BIRD -- BGP Packet Processing
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#undef LOCAL_DEBUG
10

    
11
#include "nest/bird.h"
12
#include "nest/iface.h"
13
#include "nest/protocol.h"
14
#include "nest/route.h"
15
#include "nest/attrs.h"
16
#include "conf/conf.h"
17
#include "lib/unaligned.h"
18
#include "lib/socket.h"
19

    
20
#include "bgp.h"
21

    
22
static byte *
23
bgp_create_notification(struct bgp_conn *conn, byte *buf)
24
{
25
  struct bgp_proto *p = conn->bgp;
26

    
27
  BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
28
  buf[0] = conn->notify_code;
29
  buf[1] = conn->notify_subcode;
30
  memcpy(buf+2, conn->notify_data, conn->notify_size);
31
  return buf + 2 + conn->notify_size;
32
}
33

    
34
#ifdef IPV6
35
static byte *
36
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
37
{
38
  *buf++ = 1;                /* Capability 1: Multiprotocol extensions */
39
  *buf++ = 4;                /* Capability data length */
40
  *buf++ = 0;                /* We support AF IPv6 */
41
  *buf++ = BGP_AF_IPV6;
42
  *buf++ = 0;                /* RFU */
43
  *buf++ = 1;                /* and SAFI 1 */
44
  return buf;
45
}
46
#endif
47

    
48
static byte *
49
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
50
{
51
  *buf++ = 65;                /* Capability 65: Support for 4-octet AS number */
52
  *buf++ = 4;                /* Capability data length */
53
  put_u32(buf, conn->bgp->local_as);
54
  return buf + 4;
55
}
56

    
57
static byte *
58
bgp_create_open(struct bgp_conn *conn, byte *buf)
59
{
60
  struct bgp_proto *p = conn->bgp;
61
  byte *cap;
62
  int cap_len;
63

    
64
  BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
65
            BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
66
  buf[0] = BGP_VERSION;
67
  put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
68
  put_u16(buf+3, p->cf->hold_time);
69
  put_u32(buf+5, p->local_id);
70
  /* Skipped 3 B for length field and Capabilities parameter header */
71
  cap = buf + 12;
72

    
73
#ifdef IPV6
74
  cap = bgp_put_cap_ipv6(conn, cap);
75
#endif
76
  if (p->cf->enable_as4)
77
    cap = bgp_put_cap_as4(conn, cap);
78

    
79
  cap_len = cap - buf - 12;
80
  if (cap_len > 0)
81
    {
82
      buf[9]  = cap_len + 2;        /* Optional params len */
83
      buf[10] = 2;                /* Option: Capability list */
84
      buf[11] = cap_len;        /* Option length */
85
      return cap;
86
    }
87
  else
88
    {
89
      buf[9] = 0;                /* No optional parameters */
90
      return buf + 10;
91
    }
92
}
93

    
94
static unsigned int
95
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
96
{
97
  byte *start = w;
98
  ip_addr a;
99
  int bytes;
100

    
101
  while (!EMPTY_LIST(buck->prefixes) && remains >= 5)
102
    {
103
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
104
      DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
105
      *w++ = px->n.pxlen;
106
      bytes = (px->n.pxlen + 7) / 8;
107
      a = px->n.prefix;
108
      ipa_hton(a);
109
      memcpy(w, &a, bytes);
110
      w += bytes;
111
      remains -= bytes + 1;
112
      rem_node(&px->bucket_node);
113
      fib_delete(&p->prefix_fib, px);
114
    }
115
  return w - start;
116
}
117

    
118
#ifndef IPV6                /* IPv4 version */
119

    
120
static byte *
121
bgp_create_update(struct bgp_conn *conn, byte *buf)
122
{
123
  struct bgp_proto *p = conn->bgp;
124
  struct bgp_bucket *buck;
125
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
126
  byte *w;
127
  int wd_size = 0;
128
  int r_size = 0;
129
  int a_size = 0;
130

    
131
  w = buf+2;
132
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
133
    {
134
      DBG("Withdrawn routes:\n");
135
      wd_size = bgp_encode_prefixes(p, w, buck, remains);
136
      w += wd_size;
137
      remains -= wd_size;
138
    }
139
  put_u16(buf, wd_size);
140

    
141
  if (remains >= 2048)
142
    {
143
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
144
        {
145
          if (EMPTY_LIST(buck->prefixes))
146
            {
147
              DBG("Deleting empty bucket %p\n", buck);
148
              rem_node(&buck->send_node);
149
              bgp_free_bucket(p, buck);
150
              continue;
151
            }
152
          DBG("Processing bucket %p\n", buck);
153
          a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 1024);
154
          put_u16(w, a_size);
155
          w += a_size + 2;
156
          r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
157
          w += r_size;
158
          break;
159
        }
160
    }
161
  if (!a_size)                                /* Attributes not already encoded */
162
    {
163
      put_u16(w, 0);
164
      w += 2;
165
    }
166
  if (wd_size || r_size)
167
    {
168
      BGP_TRACE(D_PACKETS, "Sending UPDATE");
169
      return w;
170
    }
171
  else
172
    return NULL;
173
}
174

    
175
#else                /* IPv6 version */
176

    
177
static byte *
178
bgp_create_update(struct bgp_conn *conn, byte *buf)
179
{
180
  struct bgp_proto *p = conn->bgp;
181
  struct bgp_bucket *buck;
182
  int size, is_ll;
183
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
184
  byte *w, *tmp, *tstart;
185
  ip_addr ip, ip_ll;
186
  ea_list *ea;
187
  eattr *nh;
188
  neighbor *n;
189

    
190
  put_u16(buf, 0);
191
  w = buf+4;
192

    
193
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
194
    {
195
      DBG("Withdrawn routes:\n");
196
      tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
197
      *tmp++ = 0;
198
      *tmp++ = BGP_AF_IPV6;
199
      *tmp++ = 1;
200
      ea->attrs[0].u.ptr->length = bgp_encode_prefixes(p, tmp, buck, remains-11);
201
      size = bgp_encode_attrs(p, w, ea, remains);
202
      w += size;
203
      remains -= size;
204
    }
205

    
206
  if (remains >= 2048)
207
    {
208
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
209
        {
210
          if (EMPTY_LIST(buck->prefixes))
211
            {
212
              DBG("Deleting empty bucket %p\n", buck);
213
              rem_node(&buck->send_node);
214
              bgp_free_bucket(p, buck);
215
              continue;
216
            }
217
          DBG("Processing bucket %p\n", buck);
218
          size = bgp_encode_attrs(p, w, buck->eattrs, 1024);
219
          w += size;
220
          remains -= size;
221
          tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
222
          *tmp++ = 0;
223
          *tmp++ = BGP_AF_IPV6;
224
          *tmp++ = 1;
225
          nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
226
          ASSERT(nh);
227
          ip = *(ip_addr *) nh->u.ptr->data;
228
          is_ll = 0;
229
          if (ipa_equal(ip, p->local_addr))
230
            {
231
              is_ll = 1;
232
              ip_ll = p->local_link;
233
            }
234
          else
235
            {
236
              n = neigh_find(&p->p, &ip, 0);
237
              if (n && n->iface == p->neigh->iface)
238
                {
239
                  /* FIXME: We are assuming the global scope addresses use the lower 64 bits
240
                   * as an interface identifier which hasn't necessarily to be true.
241
                   */
242
                  is_ll = 1;
243
                  ip_ll = ipa_or(ipa_build(0xfe800000,0,0,0), ipa_and(ip, ipa_build(0,0,~0,~0)));
244
                }
245
            }
246
          if (is_ll)
247
            {
248
              *tmp++ = 32;
249
              ipa_hton(ip);
250
              memcpy(tmp, &ip, 16);
251
              ipa_hton(ip_ll);
252
              memcpy(tmp+16, &ip_ll, 16);
253
              tmp += 32;
254
            }
255
          else
256
            {
257
              *tmp++ = 16;
258
              ipa_hton(ip);
259
              memcpy(tmp, &ip, 16);
260
              tmp += 16;
261
            }
262
          *tmp++ = 0;                        /* No SNPA information */
263
          tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
264
          ea->attrs[0].u.ptr->length = tmp - tstart;
265
          w += bgp_encode_attrs(p, w, ea, remains);
266
          break;
267
        }
268
    }
269

    
270
  size = w - (buf+4);
271
  put_u16(buf+2, size);
272
  lp_flush(bgp_linpool);
273
  if (size)
274
    {
275
      BGP_TRACE(D_PACKETS, "Sending UPDATE");
276
      return w;
277
    }
278
  else
279
    return NULL;
280
}
281

    
282
#endif
283

    
284
static void
285
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
286
{
287
  memset(buf, 0xff, 16);                /* Marker */
288
  put_u16(buf+16, len);
289
  buf[18] = type;
290
}
291

    
292
/**
293
 * bgp_fire_tx - transmit packets
294
 * @conn: connection
295
 *
296
 * Whenever the transmit buffers of the underlying TCP connection
297
 * are free and we have any packets queued for sending, the socket functions
298
 * call bgp_fire_tx() which takes care of selecting the highest priority packet
299
 * queued (Notification > Keepalive > Open > Update), assembling its header
300
 * and body and sending it to the connection.
301
 */
302
static int
303
bgp_fire_tx(struct bgp_conn *conn)
304
{
305
  struct bgp_proto *p = conn->bgp;
306
  unsigned int s = conn->packets_to_send;
307
  sock *sk = conn->sk;
308
  byte *buf, *pkt, *end;
309
  int type;
310

    
311
  if (!sk)
312
    {
313
      conn->packets_to_send = 0;
314
      return 0;
315
    }
316
  buf = sk->tbuf;
317
  pkt = buf + BGP_HEADER_LENGTH;
318

    
319
  if (s & (1 << PKT_SCHEDULE_CLOSE))
320
    {
321
      bgp_close_conn(conn);
322
      return 0;
323
    }
324
  if (s & (1 << PKT_NOTIFICATION))
325
    {
326
      s = 1 << PKT_SCHEDULE_CLOSE;
327
      type = PKT_NOTIFICATION;
328
      end = bgp_create_notification(conn, pkt);
329
    }
330
  else if (s & (1 << PKT_KEEPALIVE))
331
    {
332
      s &= ~(1 << PKT_KEEPALIVE);
333
      type = PKT_KEEPALIVE;
334
      end = pkt;                        /* Keepalives carry no data */
335
      BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
336
      bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
337
    }
338
  else if (s & (1 << PKT_OPEN))
339
    {
340
      s &= ~(1 << PKT_OPEN);
341
      type = PKT_OPEN;
342
      end = bgp_create_open(conn, pkt);
343
    }
344
  else if (s & (1 << PKT_UPDATE))
345
    {
346
      end = bgp_create_update(conn, pkt);
347
      type = PKT_UPDATE;
348
      if (!end)
349
        {
350
          conn->packets_to_send = 0;
351
          return 0;
352
        }
353
    }
354
  else
355
    return 0;
356
  conn->packets_to_send = s;
357
  bgp_create_header(buf, end - buf, type);
358
  return sk_send(sk, end - buf);
359
}
360

    
361
/**
362
 * bgp_schedule_packet - schedule a packet for transmission
363
 * @conn: connection
364
 * @type: packet type
365
 *
366
 * Schedule a packet of type @type to be sent as soon as possible.
367
 */
368
void
369
bgp_schedule_packet(struct bgp_conn *conn, int type)
370
{
371
  DBG("BGP: Scheduling packet type %d\n", type);
372
  conn->packets_to_send |= 1 << type;
373
  if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
374
    while (bgp_fire_tx(conn))
375
      ;
376
}
377

    
378
void
379
bgp_tx(sock *sk)
380
{
381
  struct bgp_conn *conn = sk->data;
382

    
383
  DBG("BGP: TX hook\n");
384
  while (bgp_fire_tx(conn))
385
    ;
386
}
387

    
388
/* Capatibility negotiation as per RFC 2842 */
389

    
390
void
391
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
392
{
393
  struct bgp_proto *p = conn->bgp;
394
  int cl;
395
  u32 as;
396

    
397
  while (len > 0)
398
    {
399
      if (len < 2 || len < 2 + opt[1])
400
        goto err;
401
      
402
      cl = opt[1];
403

    
404
      switch (opt[0])
405
        {
406
        case 65:
407
          if (cl != 4)
408
            goto err;
409
          p->as4_support = 1;
410
          p->as4_session = p->cf->enable_as4;
411
          if (p->as4_session)
412
            conn->advertised_as = get_u32(opt + 2);
413
          break;
414

    
415
          /* We can safely ignore all other capabilities */
416
        }
417
      len -= 2 + cl;
418
      opt += 2 + cl;
419
    }
420
  return;
421

    
422
    err:
423
  bgp_error(conn, 2, 0, NULL, 0);
424
  return;
425
}
426

    
427
static int
428
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
429
{
430
  int ol;
431

    
432
  while (len > 0)
433
    {
434
      if (len < 2 || len < 2 + opt[1])
435
        { bgp_error(conn, 2, 0, NULL, 0); return 0; }
436
#ifdef LOCAL_DEBUG
437
      {
438
        int i;
439
        DBG("\tOption %02x:", opt[0]);
440
        for(i=0; i<opt[1]; i++)
441
          DBG(" %02x", opt[2+i]);
442
        DBG("\n");
443
      }
444
#endif
445

    
446
      ol = opt[1];
447
      switch (opt[0])
448
        {
449
        case 2:
450
          bgp_parse_capabilities(conn, opt + 2, ol);
451
          break;
452

    
453
        default:
454
          /*
455
           *  BGP specs don't tell us to send which option
456
           *  we didn't recognize, but it's common practice
457
           *  to do so. Also, capability negotiation with
458
           *  Cisco routers doesn't work without that.
459
           */
460
          bgp_error(conn, 2, 4, opt, ol);
461
          return 0;
462
        }
463
      len -= 2 + ol;
464
      opt += 2 + ol;
465
    }
466
  return 0;
467
}
468

    
469
static void
470
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
471
{
472
  struct bgp_conn *other;
473
  struct bgp_proto *p = conn->bgp;
474
  struct bgp_config *cf = p->cf;
475
  unsigned hold;
476
  u32 id;
477

    
478
  /* Check state */
479
  if (conn->state != BS_OPENSENT)
480
    { bgp_error(conn, 5, 0, NULL, 0); }
481

    
482
  /* Check message contents */
483
  if (len < 29 || len != 29 + pkt[28])
484
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
485
  if (pkt[19] != BGP_VERSION)
486
    { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
487
  conn->advertised_as = get_u16(pkt+20);
488
  hold = get_u16(pkt+22);
489
  id = get_u32(pkt+24);
490
  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
491

    
492
  p->remote_id = id; // ???
493
  if (bgp_parse_options(conn, pkt+29, pkt[28]))
494
    return;
495

    
496
  if (hold > 0 && hold < 3)
497
    { bgp_error(conn, 2, 6, pkt+22, 2); return; }
498

    
499
  if (!id || id == 0xffffffff || id == p->local_id)
500
    { bgp_error(conn, 2, 3, pkt+24, -4); return; }
501

    
502

    
503
  if (conn->advertised_as != p->remote_as)
504
    {
505
      bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return;
506
    }
507

    
508
  /* Check the other connection */
509
  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
510
  switch (other->state)
511
    {
512
    case BS_IDLE:
513
    case BS_CONNECT:
514
    case BS_ACTIVE:
515
    case BS_OPENSENT:
516
      break;
517
    case BS_OPENCONFIRM:
518
      if ((p->local_id < id) == (conn == &p->incoming_conn))
519
        {
520
          /* Should close the other connection */
521
          BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
522
          bgp_error(other, 6, 0, NULL, 0);
523
          break;
524
        }
525
      /* Fall thru */
526
    case BS_ESTABLISHED:
527
      /* Should close this connection */
528
      BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
529
      bgp_error(conn, 6, 0, NULL, 0);
530
      return;
531
    default:
532
      bug("bgp_rx_open: Unknown state");
533
    }
534

    
535
  /* Make this connection primary */
536
  conn->primary = 1;
537
  p->conn = conn;
538

    
539
  /* Update our local variables */
540
  if (hold < p->cf->hold_time)
541
    conn->hold_time = hold;
542
  else
543
    conn->hold_time = p->cf->hold_time;
544
  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
545
  // p->remote_as = conn->advertised_as;
546
  p->remote_id = id;
547
  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id);
548

    
549
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
550
  bgp_start_timer(conn->hold_timer, conn->hold_time);
551
  conn->state = BS_OPENCONFIRM;
552
}
553

    
554
#define DECODE_PREFIX(pp, ll) do {                \
555
  int b = *pp++;                                \
556
  int q;                                        \
557
  ll--;                                                \
558
  if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \
559
  q = (b+7) / 8;                                \
560
  if (ll < q) { err=1; goto bad; }                \
561
  memcpy(&prefix, pp, q);                        \
562
  pp += q;                                        \
563
  ll -= q;                                        \
564
  ipa_ntoh(prefix);                                \
565
  prefix = ipa_and(prefix, ipa_mkmask(b));        \
566
  pxlen = b;                                        \
567
} while (0)
568

    
569
static inline int
570
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
571
{
572
  neighbor *neigh;
573
  ip_addr nexthop;
574
  struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
575
  ASSERT(nh);
576
  nexthop = *(ip_addr *) nh->u.ptr->data;
577
  neigh = neigh_find(&bgp->p, &nexthop, 0);
578
  if (neigh)
579
    {
580
      if (neigh->scope == SCOPE_HOST)
581
        {
582
          DBG("BGP: Loop!\n");
583
          return 0;
584
        }
585
    }
586
  else
587
    neigh = bgp->neigh;
588
  a->gw = neigh->addr;
589
  a->iface = neigh->iface;
590
  return 1;
591
}
592

    
593
#ifndef IPV6                /* IPv4 version */
594

    
595
static void
596
bgp_do_rx_update(struct bgp_conn *conn,
597
                 byte *withdrawn, int withdrawn_len,
598
                 byte *nlri, int nlri_len,
599
                 byte *attrs, int attr_len)
600
{
601
  struct bgp_proto *p = conn->bgp;
602
  rta *a0;
603
  rta *a = NULL;
604
  ip_addr prefix;
605
  net *n;
606
  int err = 0, pxlen;
607

    
608
  /* Withdraw routes */
609
  while (withdrawn_len)
610
    {
611
      DECODE_PREFIX(withdrawn, withdrawn_len);
612
      DBG("Withdraw %I/%d\n", prefix, pxlen);
613
      if (n = net_find(p->p.table, prefix, pxlen))
614
        rte_update(p->p.table, n, &p->p, NULL);
615
    }
616

    
617
  if (!attr_len && !nlri_len)                /* shortcut */
618
    return;
619

    
620
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
621
  if (a0 && nlri_len && bgp_get_nexthop(p, a0))
622
    {
623
      a = rta_lookup(a0);
624
      while (nlri_len)
625
        {
626
          rte *e;
627
          DECODE_PREFIX(nlri, nlri_len);
628
          DBG("Add %I/%d\n", prefix, pxlen);
629
          e = rte_get_temp(rta_clone(a));
630
          n = net_get(p->p.table, prefix, pxlen);
631
          e->net = n;
632
          e->pflags = 0;
633
          rte_update(p->p.table, n, &p->p, e);
634
        }
635
    }
636
bad:
637
  if (a)
638
    rta_free(a);
639
  if (err)
640
    bgp_error(conn, 3, err, NULL, 0);
641
  return;
642
}
643

    
644
#else                        /* IPv6 version */
645

    
646
#define DO_NLRI(name)                                        \
647
  start = x = p->name##_start;                                \
648
  len = len0 = p->name##_len;                                \
649
  if (len)                                                \
650
    {                                                        \
651
      if (len < 3) goto bad;                                \
652
      af = get_u16(x);                                        \
653
      sub = x[2];                                        \
654
      x += 3;                                                \
655
      len -= 3;                                                \
656
      DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
657
    }                                                        \
658
  else                                                        \
659
    af = 0;                                                \
660
  if (af == BGP_AF_IPV6)
661

    
662
static void
663
bgp_do_rx_update(struct bgp_conn *conn,
664
                 byte *withdrawn, int withdrawn_len,
665
                 byte *nlri, int nlri_len,
666
                 byte *attrs, int attr_len)
667
{
668
  struct bgp_proto *p = conn->bgp;
669
  byte *start, *x;
670
  int len, len0;
671
  unsigned af, sub;
672
  rta *a0;
673
  rta *a = NULL;
674
  ip_addr prefix;
675
  net *n;
676
  rte e;
677
  int err = 0, pxlen;
678

    
679
  p->mp_reach_len = 0;
680
  p->mp_unreach_len = 0;
681
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
682
  if (!a0)
683
    return;
684

    
685
  DO_NLRI(mp_unreach)
686
    {
687
      while (len)
688
        {
689
          DECODE_PREFIX(x, len);
690
          DBG("Withdraw %I/%d\n", prefix, pxlen);
691
          if (n = net_find(p->p.table, prefix, pxlen))
692
            rte_update(p->p.table, n, &p->p, NULL);
693
        }
694
    }
695

    
696
  DO_NLRI(mp_reach)
697
    {
698
      int i;
699

    
700
      /* Create fake NEXT_HOP attribute */
701
      if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
702
        goto bad;
703
      memcpy(bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, 16), x+1, 16);
704
      len -= *x + 2;
705
      x += *x + 1;
706

    
707
      /* Ignore SNPA info */
708
      i = *x++;
709
      while (i--)
710
        {
711
          if (len < 1 || len < 1 + *x)
712
            goto bad;
713
          len -= *x + 1;
714
          x += *x + 1;
715
        }
716

    
717
      if (bgp_get_nexthop(p, a0))
718
        {
719
          a = rta_lookup(a0);
720
          while (len)
721
            {
722
              rte *e;
723
              DECODE_PREFIX(x, len);
724
              DBG("Add %I/%d\n", prefix, pxlen);
725
              e = rte_get_temp(rta_clone(a));
726
              n = net_get(p->p.table, prefix, pxlen);
727
              e->net = n;
728
              e->pflags = 0;
729
              rte_update(p->p.table, n, &p->p, e);
730
            }
731
          rta_free(a);
732
        }
733
    }
734

    
735
  return;
736

    
737
bad:
738
  bgp_error(conn, 3, 9, start, len0);
739
  if (a)
740
    rta_free(a);
741
  return;
742
}
743

    
744
#endif
745

    
746
static void
747
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
748
{
749
  struct bgp_proto *p = conn->bgp;
750
  byte *withdrawn, *attrs, *nlri;
751
  int withdrawn_len, attr_len, nlri_len;
752

    
753
  BGP_TRACE(D_PACKETS, "Got UPDATE");
754
  if (conn->state != BS_ESTABLISHED)
755
    { bgp_error(conn, 5, 0, NULL, 0); return; }
756
  bgp_start_timer(conn->hold_timer, conn->hold_time);
757

    
758
  /* Find parts of the packet and check sizes */
759
  if (len < 23)
760
    {
761
      bgp_error(conn, 1, 2, pkt+16, 2);
762
      return;
763
    }
764
  withdrawn = pkt + 21;
765
  withdrawn_len = get_u16(pkt + 19);
766
  if (withdrawn_len + 23 > len)
767
    goto malformed;
768
  attrs = withdrawn + withdrawn_len + 2;
769
  attr_len = get_u16(attrs - 2);
770
  if (withdrawn_len + attr_len + 23 > len)
771
    goto malformed;
772
  nlri = attrs + attr_len;
773
  nlri_len = len - withdrawn_len - attr_len - 23;
774
  if (!attr_len && nlri_len)
775
    goto malformed;
776
  DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
777

    
778
  lp_flush(bgp_linpool);
779

    
780
  bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
781
  return;
782

    
783
malformed:
784
  bgp_error(conn, 3, 1, NULL, 0);
785
}
786

    
787
static struct {
788
  byte major, minor;
789
  byte *msg;
790
} bgp_msg_table[] = {
791
  { 1, 0, "Invalid message header" },
792
  { 1, 1, "Connection not synchronized" },
793
  { 1, 2, "Bad message length" },
794
  { 1, 3, "Bad message type" },
795
  { 2, 0, "Invalid OPEN message" },
796
  { 2, 1, "Unsupported version number" },
797
  { 2, 2, "Bad peer AS" },
798
  { 2, 3, "Bad BGP identifier" },
799
  { 2, 4, "Unsupported optional parameter" },
800
  { 2, 5, "Authentication failure" },
801
  { 2, 6, "Unacceptable hold time" },
802
  { 2, 7, "Required capability missing" }, /* [RFC3392] */
803
  { 3, 0, "Invalid UPDATE message" },
804
  { 3, 1, "Malformed attribute list" },
805
  { 3, 2, "Unrecognized well-known attribute" },
806
  { 3, 3, "Missing mandatory attribute" },
807
  { 3, 4, "Invalid attribute flags" },
808
  { 3, 5, "Invalid attribute length" },
809
  { 3, 6, "Invalid ORIGIN attribute" },
810
  { 3, 7, "AS routing loop" },                /* Deprecated */
811
  { 3, 8, "Invalid NEXT_HOP attribute" },
812
  { 3, 9, "Optional attribute error" },
813
  { 3, 10, "Invalid network field" },
814
  { 3, 11, "Malformed AS_PATH" },
815
  { 4, 0, "Hold timer expired" },
816
  { 5, 0, "Finite state machine error" },
817
  { 6, 0, "Cease" }
818
};
819

    
820
void
821
bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
822
{
823
  byte *name, namebuf[16];
824
  byte *t, argbuf[36];
825
  unsigned i;
826

    
827
  if (code == 6 && !subcode)                /* Don't report Cease messages */
828
    return;
829

    
830
  bsprintf(namebuf, "%d.%d", code, subcode);
831
  name = namebuf;
832
  for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
833
    if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
834
      {
835
        name = bgp_msg_table[i].msg;
836
        break;
837
      }
838
  t = argbuf;
839
  if (len)
840
    {
841
      *t++ = ':';
842
      *t++ = ' ';
843
      if (len > 16)
844
        len = 16;
845
      for (i=0; i<len; i++)
846
        t += bsprintf(t, "%02x", data[i]);
847
    }
848
  *t = 0;
849
  log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
850
}
851

    
852
static void
853
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
854
{
855
  if (len < 21)
856
    {
857
      bgp_error(conn, 1, 2, pkt+16, 2);
858
      return;
859
    }
860
  bgp_log_error(conn->bgp, "Received error notification", pkt[19], pkt[20], pkt+21, len-21);
861
  conn->error_flag = 1;
862
  if (conn->primary)
863
    proto_notify_state(&conn->bgp->p, PS_STOP);
864
  bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
865
}
866

    
867
static void
868
bgp_rx_keepalive(struct bgp_conn *conn)
869
{
870
  struct bgp_proto *p = conn->bgp;
871

    
872
  BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
873
  bgp_start_timer(conn->hold_timer, conn->hold_time);
874
  switch (conn->state)
875
    {
876
    case BS_OPENCONFIRM:
877
      DBG("BGP: UP!!!\n");
878
      conn->state = BS_ESTABLISHED;
879
      bgp_attr_init(conn->bgp);
880
      proto_notify_state(&conn->bgp->p, PS_UP);
881
      break;
882
    case BS_ESTABLISHED:
883
      break;
884
    default:
885
      bgp_error(conn, 5, 0, NULL, 0);
886
    }
887
}
888

    
889
/**
890
 * bgp_rx_packet - handle a received packet
891
 * @conn: BGP connection
892
 * @pkt: start of the packet
893
 * @len: packet size
894
 *
895
 * bgp_rx_packet() takes a newly received packet and calls the corresponding
896
 * packet handler according to the packet type.
897
 */
898
static void
899
bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
900
{
901
  DBG("BGP: Got packet %02x (%d bytes)\n", pkt[18], len);
902
  switch (pkt[18])
903
    {
904
    case PKT_OPEN:                return bgp_rx_open(conn, pkt, len);
905
    case PKT_UPDATE:                return bgp_rx_update(conn, pkt, len);
906
    case PKT_NOTIFICATION:      return bgp_rx_notification(conn, pkt, len);
907
    case PKT_KEEPALIVE:                return bgp_rx_keepalive(conn);
908
    default:                        bgp_error(conn, 1, 3, pkt+18, 1);
909
    }
910
}
911

    
912
/**
913
 * bgp_rx - handle received data
914
 * @sk: socket
915
 * @size: amount of data received
916
 *
917
 * bgp_rx() is called by the socket layer whenever new data arrive from
918
 * the underlying TCP connection. It assembles the data fragments to packets,
919
 * checks their headers and framing and passes complete packets to
920
 * bgp_rx_packet().
921
 */
922
int
923
bgp_rx(sock *sk, int size)
924
{
925
  struct bgp_conn *conn = sk->data;
926
  byte *pkt_start = sk->rbuf;
927
  byte *end = pkt_start + size;
928
  unsigned i, len;
929

    
930
  DBG("BGP: RX hook: Got %d bytes\n", size);
931
  while (end >= pkt_start + BGP_HEADER_LENGTH)
932
    {
933
      if (conn->error_flag)
934
        {
935
          /*
936
           *  We still need to remember the erroneous packet, so that
937
           *  we can generate error notifications properly.  To avoid
938
           *  subsequent reads rewriting the buffer, we just reset the
939
           *  rx_hook.
940
           */
941
          DBG("BGP: Error, dropping input\n");
942
          sk->rx_hook = NULL;
943
          return 0;
944
        }
945
      for(i=0; i<16; i++)
946
        if (pkt_start[i] != 0xff)
947
          {
948
            bgp_error(conn, 1, 1, NULL, 0);
949
            break;
950
          }
951
      len = get_u16(pkt_start+16);
952
      if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
953
        {
954
          bgp_error(conn, 1, 2, pkt_start+16, 2);
955
          break;
956
        }
957
      if (end < pkt_start + len)
958
        break;
959
      bgp_rx_packet(conn, pkt_start, len);
960
      pkt_start += len;
961
    }
962
  if (pkt_start != sk->rbuf)
963
    {
964
      memmove(sk->rbuf, pkt_start, end - pkt_start);
965
      sk->rpos = sk->rbuf + (end - pkt_start);
966
    }
967
  return 0;
968
}