Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / bgp.c @ e21423ba

History | View | Annotate | Download (18 KB)

1
/*
2
 *        BIRD -- The Border Gateway Protocol
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Border Gateway Protocol
11
 *
12
 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13
 * connection and most of the interface with BIRD core, |packets.c| handling
14
 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15
 * manipulation with BGP attribute lists.
16
 *
17
 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18
 * architecture which is able to keep all the information needed by BGP in the
19
 * primary routing table, therefore no complex data structures like a central
20
 * BGP table are needed. This increases memory footprint of a BGP router with
21
 * many connections, but not too much and, which is more important, it makes
22
 * BGP much easier to implement.
23
 *
24
 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25
 * structure to which are attached individual connections represented by &bgp_connection
26
 * (usually, there exists only one connection, but during BGP session setup, there
27
 * can be more of them). The connections are handled according to the BGP state machine
28
 * defined in the RFC with all the timers and all the parameters configurable.
29
 *
30
 * In incoming direction, we listen on the connection's socket and each time we receive
31
 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32
 * passes complete packets to bgp_rx_packet() which distributes the packet according
33
 * to its type.
34
 *
35
 * In outgoing direction, we gather all the routing updates and sort them to buckets
36
 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37
 * of &rta's and a &fib which helps us to find if we already have another route for
38
 * the same destination queued for sending, so that we can replace it with the new one
39
 * immediately instead of sending both updates). There also exists a special bucket holding
40
 * all the route withdrawals which cannot be queued anywhere else as they don't have any
41
 * attributes. If we have any packet to send (due to either new routes or the connection
42
 * tracking code wanting to send a Open, Keepalive or Notification message), we call
43
 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44
 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45
 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46
 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47
 * type if we have more data of the same type to send.
48
 *
49
 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50
 * of the attribute blocks and translating them to the language of BIRD's extended attributes
51
 * and bgp_encode_attrs() which does the converse. Both functions are built around a
52
 * @bgp_attr_table array describing all important characteristics of all known attributes.
53
 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54
 */
55

    
56
#undef LOCAL_DEBUG
57

    
58
#include "nest/bird.h"
59
#include "nest/iface.h"
60
#include "nest/protocol.h"
61
#include "nest/route.h"
62
#include "nest/locks.h"
63
#include "conf/conf.h"
64
#include "lib/socket.h"
65
#include "lib/resource.h"
66
#include "lib/string.h"
67

    
68
#include "bgp.h"
69

    
70
struct linpool *bgp_linpool;                /* Global temporary pool */
71
static sock *bgp_listen_sk;                /* Global listening socket */
72
static int bgp_counter;                        /* Number of protocol instances using the listening socket */
73
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established" };
74

    
75
static void bgp_connect(struct bgp_proto *p);
76
static void bgp_initiate(struct bgp_proto *p);
77
static void bgp_setup_listen_sk(void);
78

    
79
void
80
bgp_close(struct bgp_proto *p UNUSED)
81
{
82
  ASSERT(bgp_counter);
83
  bgp_counter--;
84
  if (!bgp_counter)
85
    {
86
      rfree(bgp_listen_sk);
87
      bgp_listen_sk = NULL;
88
      rfree(bgp_linpool);
89
      bgp_linpool = NULL;
90
    }
91
}
92

    
93
/**
94
 * bgp_start_timer - start a BGP timer
95
 * @t: timer
96
 * @value: time to fire (0 to disable the timer)
97
 *
98
 * This functions calls tm_start() on @t with time @value and the
99
 * amount of randomization suggested by the BGP standard. Please use
100
 * it for all BGP timers.
101
 */
102
void
103
bgp_start_timer(timer *t, int value)
104
{
105
  if (value)
106
    {
107
      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
108
      t->randomize = value / 4;
109
      tm_start(t, value - t->randomize);
110
    }
111
  else
112
    tm_stop(t);
113
}
114

    
115
/**
116
 * bgp_close_conn - close a BGP connection
117
 * @conn: connection to close
118
 *
119
 * This function takes a connection described by the &bgp_conn structure,
120
 * closes its socket and frees all resources associated with it.
121
 *
122
 * If the connection is being closed due to a protocol error, adjust
123
 * the connection restart timer as well according to the error recovery
124
 * policy set in the configuration.
125
 *
126
 * If the connection was marked as primary, it shuts down the protocol as well.
127
 */
128
void
129
bgp_close_conn(struct bgp_conn *conn)
130
{
131
  struct bgp_proto *p = conn->bgp;
132
  struct bgp_config *cf = p->cf;
133

    
134
  DBG("BGP: Closing connection\n");
135
  conn->packets_to_send = 0;
136
  rfree(conn->connect_retry_timer);
137
  conn->connect_retry_timer = NULL;
138
  rfree(conn->keepalive_timer);
139
  conn->keepalive_timer = NULL;
140
  rfree(conn->hold_timer);
141
  conn->hold_timer = NULL;
142
  rfree(conn->sk);
143
  conn->sk = NULL;
144
  conn->state = BS_IDLE;
145
  if (conn->error_flag > 1)
146
    {
147
      if (cf->disable_after_error)
148
        p->p.disabled = 1;
149
      if (p->last_connect && (bird_clock_t)(p->last_connect + cf->error_amnesia_time) < now)
150
        p->startup_delay = 0;
151
      if (!p->startup_delay)
152
        p->startup_delay = cf->error_delay_time_min;
153
      else
154
        {
155
          p->startup_delay *= 2;
156
          if (p->startup_delay > cf->error_delay_time_max)
157
            p->startup_delay = cf->error_delay_time_max;
158
        }
159
    }
160
  if (conn->primary)
161
    {
162
      bgp_close(p);
163
      p->conn = NULL;
164
      proto_notify_state(&p->p, PS_DOWN);
165
    }
166
  else if (conn->error_flag > 1)
167
    bgp_initiate(p);
168
}
169

    
170
static int
171
bgp_graceful_close_conn(struct bgp_conn *c)
172
{
173
  switch (c->state)
174
    {
175
    case BS_IDLE:
176
      return 0;
177
    case BS_CONNECT:
178
    case BS_ACTIVE:
179
      bgp_close_conn(c);
180
      return 1;
181
    case BS_OPENSENT:
182
    case BS_OPENCONFIRM:
183
    case BS_ESTABLISHED:
184
      bgp_error(c, 6, 0, NULL, 0);
185
      return 1;
186
    default:
187
      bug("bgp_graceful_close_conn: Unknown state %d", c->state);
188
    }
189
}
190

    
191
static void
192
bgp_send_open(struct bgp_conn *conn)
193
{
194
  DBG("BGP: Sending open\n");
195
  conn->sk->rx_hook = bgp_rx;
196
  conn->sk->tx_hook = bgp_tx;
197
  tm_stop(conn->connect_retry_timer);
198
  bgp_schedule_packet(conn, PKT_OPEN);
199
  conn->state = BS_OPENSENT;
200
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
201
}
202

    
203
static void
204
bgp_connected(sock *sk)
205
{
206
  struct bgp_conn *conn = sk->data;
207
  struct bgp_proto *p = conn->bgp;
208

    
209
  BGP_TRACE(D_EVENTS, "Connected");
210
  bgp_send_open(conn);
211
}
212

    
213
static void
214
bgp_connect_timeout(timer *t)
215
{
216
  struct bgp_conn *conn = t->data;
217
  struct bgp_proto *p = conn->bgp;
218

    
219
  DBG("BGP: connect_timeout\n");
220
  bgp_close_conn(conn);
221
  bgp_connect(p);
222
}
223

    
224
static void
225
bgp_sock_err(sock *sk, int err)
226
{
227
  struct bgp_conn *conn = sk->data;
228
  struct bgp_proto *p = conn->bgp;
229

    
230
  if (err)
231
    BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
232
  else
233
    BGP_TRACE(D_EVENTS, "Connection closed");
234
  switch (conn->state)
235
    {
236
    case BS_CONNECT:
237
    case BS_OPENSENT:
238
      rfree(conn->sk);
239
      conn->sk = NULL;
240
      conn->state = BS_ACTIVE;
241
      bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
242
      break;
243
    case BS_OPENCONFIRM:
244
    case BS_ESTABLISHED:
245
      bgp_close_conn(conn);
246
      break;
247
    default:
248
      bug("bgp_sock_err called in invalid state %d", conn->state);
249
    }
250
}
251

    
252
static void
253
bgp_hold_timeout(timer *t)
254
{
255
  struct bgp_conn *conn = t->data;
256

    
257
  DBG("BGP: Hold timeout, closing connection\n");
258
  bgp_error(conn, 4, 0, NULL, 0);
259
}
260

    
261
static void
262
bgp_keepalive_timeout(timer *t)
263
{
264
  struct bgp_conn *conn = t->data;
265

    
266
  DBG("BGP: Keepalive timer\n");
267
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
268
}
269

    
270
static void
271
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
272
{
273
  timer *t;
274

    
275
  conn->sk = NULL;
276
  conn->bgp = p;
277
  conn->packets_to_send = 0;
278
  conn->error_flag = 0;
279
  conn->primary = 0;
280

    
281
  t = conn->connect_retry_timer = tm_new(p->p.pool);
282
  t->hook = bgp_connect_timeout;
283
  t->data = conn;
284
  t = conn->hold_timer = tm_new(p->p.pool);
285
  t->hook = bgp_hold_timeout;
286
  t->data = conn;
287
  t = conn->keepalive_timer = tm_new(p->p.pool);
288
  t->hook = bgp_keepalive_timeout;
289
  t->data = conn;
290
}
291

    
292
static void
293
bgp_setup_sk(struct bgp_proto *p, struct bgp_conn *conn, sock *s)
294
{
295
  s->data = conn;
296
  s->ttl = p->cf->multihop ? : 1;
297
  s->rbsize = BGP_RX_BUFFER_SIZE;
298
  s->tbsize = BGP_TX_BUFFER_SIZE;
299
  s->err_hook = bgp_sock_err;
300
  s->tos = IP_PREC_INTERNET_CONTROL;
301
  conn->sk = s;
302
}
303

    
304
/**
305
 * bgp_connect - initiate an outgoing connection
306
 * @p: BGP instance
307
 *
308
 * The bgp_connect() function creates a new &bgp_conn and initiates
309
 * a TCP connection to the peer. The rest of connection setup is governed
310
 * by the BGP state machine as described in the standard.
311
 */
312
static void
313
bgp_connect(struct bgp_proto *p)        /* Enter Connect state and start establishing connection */
314
{
315
  sock *s;
316
  struct bgp_conn *conn = &p->outgoing_conn;
317

    
318
  DBG("BGP: Connecting\n");
319
  p->last_connect = now;
320
  s = sk_new(p->p.pool);
321
  s->type = SK_TCP_ACTIVE;
322
  if (ipa_nonzero(p->cf->source_addr))
323
    s->saddr = p->cf->source_addr;
324
  else
325
    s->saddr = p->local_addr;
326
  s->daddr = p->cf->remote_ip;
327
  s->dport = BGP_PORT;
328
  BGP_TRACE(D_EVENTS, "Connecting to %I from local address %I", s->daddr, s->saddr);
329
  bgp_setup_conn(p, conn);
330
  bgp_setup_sk(p, conn, s);
331
  s->tx_hook = bgp_connected;
332
  conn->state = BS_CONNECT;
333
  if (sk_open(s))
334
    {
335
      bgp_sock_err(s, 0);
336
      return;
337
    }
338
  DBG("BGP: Waiting for connect success\n");
339
  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
340
}
341

    
342
static void
343
bgp_initiate(struct bgp_proto *p)
344
{
345
  unsigned delay;
346

    
347
  delay = p->cf->start_delay_time;
348
  if (p->startup_delay > delay)
349
    delay = p->startup_delay;
350
  if (delay)
351
    {
352
      BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
353
      bgp_setup_conn(p, &p->outgoing_conn);
354
      bgp_start_timer(p->outgoing_conn.connect_retry_timer, delay);
355
    }
356
  else
357
    bgp_connect(p);
358
}
359

    
360
/**
361
 * bgp_incoming_connection - handle an incoming connection
362
 * @sk: TCP socket
363
 * @dummy: unused
364
 *
365
 * This function serves as a socket hook for accepting of new BGP
366
 * connections. It searches a BGP instance corresponding to the peer
367
 * which has connected and if such an instance exists, it creates a
368
 * &bgp_conn structure, attaches it to the instance and either sends
369
 * an Open message or (if there already is an active connection) it
370
 * closes the new connection by sending a Notification message.
371
 */
372
static int
373
bgp_incoming_connection(sock *sk, int dummy UNUSED)
374
{
375
  struct proto_config *pc;
376
  int match = 0;
377

    
378
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
379
  WALK_LIST(pc, config->protos)
380
    if (pc->protocol == &proto_bgp && pc->proto)
381
      {
382
        struct bgp_proto *p = (struct bgp_proto *) pc->proto;
383
        if (ipa_equal(p->cf->remote_ip, sk->daddr))
384
          {
385
            match = 1;
386
            if ((p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && p->neigh && p->neigh->iface)
387
              {
388
                BGP_TRACE(D_EVENTS, "Incoming connection from %I port %d", sk->daddr, sk->dport);
389
                if (p->incoming_conn.sk)
390
                  {
391
                    DBG("BGP: But one incoming connection already exists, how is that possible?\n");
392
                    break;
393
                  }
394
                bgp_setup_conn(p, &p->incoming_conn);
395
                bgp_setup_sk(p, &p->incoming_conn, sk);
396
                bgp_send_open(&p->incoming_conn);
397
                return 0;
398
              }
399
          }
400
      }
401
  if (!match)
402
    log(L_AUTH "BGP: Unauthorized connect from %I port %d", sk->daddr, sk->dport);
403
  rfree(sk);
404
  return 0;
405
}
406

    
407
static void
408
bgp_setup_listen_sk(void)
409
{
410
  if (!bgp_listen_sk)
411
    {
412
      sock *s = sk_new(&root_pool);
413
      DBG("BGP: Creating incoming socket\n");
414
      s->type = SK_TCP_PASSIVE;
415
      s->sport = BGP_PORT;
416
      s->tos = IP_PREC_INTERNET_CONTROL;
417
      s->ttl = 1;
418
      s->rbsize = BGP_RX_BUFFER_SIZE;
419
      s->tbsize = BGP_TX_BUFFER_SIZE;
420
      s->rx_hook = bgp_incoming_connection;
421
      if (sk_open(s))
422
        {
423
          log(L_ERR "Unable to open incoming BGP socket");
424
          rfree(s);
425
        }
426
      else
427
        bgp_listen_sk = s;
428
    }
429
}
430

    
431
static void
432
bgp_start_neighbor(struct bgp_proto *p)
433
{
434
  p->local_addr = p->neigh->iface->addr->ip;
435
  DBG("BGP: local=%I remote=%I\n", p->local_addr, p->next_hop);
436
#ifdef IPV6
437
  {
438
    struct ifa *a;
439
    p->local_link = ipa_or(ipa_build(0xfe80,0,0,0), ipa_and(p->local_addr, ipa_build(0,0,~0,~0)));
440
    WALK_LIST(a, p->neigh->iface->addrs)
441
      if (a->scope == SCOPE_LINK)
442
        {
443
          p->local_link = a->ip;
444
          break;
445
        }
446
    DBG("BGP: Selected link-level address %I\n", p->local_link);
447
  }
448
#endif
449
  bgp_initiate(p);
450
}
451

    
452
static void
453
bgp_neigh_notify(neighbor *n)
454
{
455
  struct bgp_proto *p = (struct bgp_proto *) n->proto;
456

    
457
  if (n->iface)
458
    {
459
      BGP_TRACE(D_EVENTS, "Neighbor found");
460
      bgp_start_neighbor(p);
461
    }
462
  else
463
    {
464
      BGP_TRACE(D_EVENTS, "Neighbor lost");
465
      /* Send cease packets, but don't wait for them to be delivered */
466
      bgp_graceful_close_conn(&p->outgoing_conn);
467
      bgp_graceful_close_conn(&p->incoming_conn);
468
      proto_notify_state(&p->p, PS_DOWN);
469
    }
470
}
471

    
472
static void
473
bgp_start_locked(struct object_lock *lock)
474
{
475
  struct bgp_proto *p = lock->data;
476
  struct bgp_config *cf = p->cf;
477

    
478
  DBG("BGP: Got lock\n");
479
  p->local_id = cf->c.global->router_id;
480
  p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip;
481
  p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY);
482
  if (!p->neigh)
483
    {
484
      log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop);
485
      p->p.disabled = 1;
486
      proto_notify_state(&p->p, PS_DOWN);
487
    }
488
  else if (p->neigh->iface)
489
    bgp_start_neighbor(p);
490
  else
491
    BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", p->next_hop);
492
}
493

    
494
static int
495
bgp_start(struct proto *P)
496
{
497
  struct bgp_proto *p = (struct bgp_proto *) P;
498
  struct object_lock *lock;
499

    
500
  DBG("BGP: Startup.\n");
501
  p->outgoing_conn.state = BS_IDLE;
502
  p->incoming_conn.state = BS_IDLE;
503
  p->startup_delay = 0;
504
  p->neigh = NULL;
505

    
506
  bgp_counter++;
507
  bgp_setup_listen_sk();
508
  if (!bgp_linpool)
509
    bgp_linpool = lp_new(&root_pool, 4080);
510

    
511
  /*
512
   *  Before attempting to create the connection, we need to lock the
513
   *  port, so that are sure we're the only instance attempting to talk
514
   *  with that neighbor.
515
   */
516

    
517
  lock = p->lock = olock_new(P->pool);
518
  lock->addr = p->cf->remote_ip;
519
  lock->type = OBJLOCK_TCP;
520
  lock->port = BGP_PORT;
521
  lock->iface = NULL;
522
  lock->hook = bgp_start_locked;
523
  lock->data = p;
524
  olock_acquire(lock);
525
  return PS_START;
526
}
527

    
528
static int
529
bgp_shutdown(struct proto *P)
530
{
531
  struct bgp_proto *p = (struct bgp_proto *) P;
532

    
533
  BGP_TRACE(D_EVENTS, "Shutdown requested");
534

    
535
  /*
536
   *  We want to send the Cease notification message to all connections
537
   *  we have open, but we don't want to wait for all of them to complete.
538
   *  We are willing to handle the primary connection carefully, but for
539
   *  the others we just try to send the packet and if there is no buffer
540
   *  space free, we'll gracefully finish.
541
   */
542

    
543
  proto_notify_state(&p->p, PS_STOP);
544
  if (!p->conn)
545
    {
546
      if (p->outgoing_conn.state != BS_IDLE)
547
        p->outgoing_conn.primary = 1;        /* Shuts protocol down after connection close */
548
      else if (p->incoming_conn.state != BS_IDLE)
549
        p->incoming_conn.primary = 1;
550
    }
551
  if (bgp_graceful_close_conn(&p->outgoing_conn) || bgp_graceful_close_conn(&p->incoming_conn))
552
    return p->p.proto_state;
553
  else
554
    {
555
      /* No connections open, shutdown automatically */
556
      bgp_close(p);
557
      return PS_DOWN;
558
    }
559
}
560

    
561
static struct proto *
562
bgp_init(struct proto_config *C)
563
{
564
  struct bgp_config *c = (struct bgp_config *) C;
565
  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
566
  struct bgp_proto *p = (struct bgp_proto *) P;
567

    
568
  P->rt_notify = bgp_rt_notify;
569
  P->rte_better = bgp_rte_better;
570
  P->import_control = bgp_import_control;
571
  P->neigh_notify = bgp_neigh_notify;
572
  p->cf = c;
573
  p->local_as = c->local_as;
574
  p->remote_as = c->remote_as;
575
  p->is_internal = (c->local_as == c->remote_as);
576
  return P;
577
}
578

    
579
/**
580
 * bgp_error - report a protocol error
581
 * @c: connection
582
 * @code: error code (according to the RFC)
583
 * @subcode: error sub-code
584
 * @data: data to be passed in the Notification message
585
 * @len: length of the data
586
 *
587
 * bgp_error() sends a notification packet to tell the other side that a protocol
588
 * error has occurred (including the data considered erroneous if possible) and
589
 * closes the connection.
590
 */
591
void
592
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
593
{
594
  if (c->error_flag)
595
    return;
596
  bgp_log_error(c->bgp, "Error", code, subcode, data, (len > 0) ? len : -len);
597
  c->error_flag = 1 + (code != 6);
598
  c->notify_code = code;
599
  c->notify_subcode = subcode;
600
  c->notify_data = data;
601
  c->notify_size = (len > 0) ? len : 0;
602
  if (c->primary)
603
    proto_notify_state(&c->bgp->p, PS_STOP);
604
  bgp_schedule_packet(c, PKT_NOTIFICATION);
605
}
606

    
607
void
608
bgp_check(struct bgp_config *c)
609
{
610
  if (!c->local_as)
611
    cf_error("Local AS number must be set");
612
  if (!c->remote_as)
613
    cf_error("Neighbor must be configured");
614
}
615

    
616
static void
617
bgp_get_status(struct proto *P, byte *buf)
618
{
619
  struct bgp_proto *p = (struct bgp_proto *) P;
620

    
621
  if (P->proto_state == PS_DOWN)
622
    buf[0] = 0;
623
  else
624
    strcpy(buf, bgp_state_names[MAX(p->incoming_conn.state, p->outgoing_conn.state)]);
625
}
626

    
627
static int
628
bgp_reconfigure(struct proto *P, struct proto_config *C)
629
{
630
  struct bgp_config *new = (struct bgp_config *) C;
631
  struct bgp_proto *p = (struct bgp_proto *) P;
632
  struct bgp_config *old = p->cf;
633

    
634
  return !memcmp(((byte *) old) + sizeof(struct proto_config),
635
                 ((byte *) new) + sizeof(struct proto_config),
636
                 sizeof(struct bgp_config) - sizeof(struct proto_config));
637
}
638

    
639
struct protocol proto_bgp = {
640
  name:                        "BGP",
641
  template:                "bgp%d",
642
  attr_class:                EAP_BGP,
643
  init:                        bgp_init,
644
  start:                bgp_start,
645
  shutdown:                bgp_shutdown,
646
  get_status:                bgp_get_status,
647
  get_attr:                bgp_get_attr,
648
  reconfigure:                bgp_reconfigure,
649
  get_route_info:        bgp_get_route_info,
650
};