Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / bgp.c @ 5400c0e7

History | View | Annotate | Download (32.6 KB)

1
/*
2
 *        BIRD -- The Border Gateway Protocol
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Border Gateway Protocol
11
 *
12
 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13
 * connection and most of the interface with BIRD core, |packets.c| handling
14
 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15
 * manipulation with BGP attribute lists.
16
 *
17
 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18
 * architecture which is able to keep all the information needed by BGP in the
19
 * primary routing table, therefore no complex data structures like a central
20
 * BGP table are needed. This increases memory footprint of a BGP router with
21
 * many connections, but not too much and, which is more important, it makes
22
 * BGP much easier to implement.
23
 *
24
 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25
 * structure to which are attached individual connections represented by &bgp_connection
26
 * (usually, there exists only one connection, but during BGP session setup, there
27
 * can be more of them). The connections are handled according to the BGP state machine
28
 * defined in the RFC with all the timers and all the parameters configurable.
29
 *
30
 * In incoming direction, we listen on the connection's socket and each time we receive
31
 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32
 * passes complete packets to bgp_rx_packet() which distributes the packet according
33
 * to its type.
34
 *
35
 * In outgoing direction, we gather all the routing updates and sort them to buckets
36
 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37
 * of &rta's and a &fib which helps us to find if we already have another route for
38
 * the same destination queued for sending, so that we can replace it with the new one
39
 * immediately instead of sending both updates). There also exists a special bucket holding
40
 * all the route withdrawals which cannot be queued anywhere else as they don't have any
41
 * attributes. If we have any packet to send (due to either new routes or the connection
42
 * tracking code wanting to send a Open, Keepalive or Notification message), we call
43
 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44
 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45
 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46
 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47
 * type if we have more data of the same type to send.
48
 *
49
 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50
 * of the attribute blocks and translating them to the language of BIRD's extended attributes
51
 * and bgp_encode_attrs() which does the converse. Both functions are built around a
52
 * @bgp_attr_table array describing all important characteristics of all known attributes.
53
 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54
 */
55

    
56
#undef LOCAL_DEBUG
57

    
58
#include "nest/bird.h"
59
#include "nest/iface.h"
60
#include "nest/protocol.h"
61
#include "nest/route.h"
62
#include "nest/locks.h"
63
#include "nest/cli.h"
64
#include "conf/conf.h"
65
#include "lib/socket.h"
66
#include "lib/resource.h"
67
#include "lib/string.h"
68

    
69
#include "bgp.h"
70

    
71
struct linpool *bgp_linpool;                /* Global temporary pool */
72
static sock *bgp_listen_sk;                /* Global listening socket */
73
static int bgp_counter;                        /* Number of protocol instances using the listening socket */
74

    
75
static void bgp_close(struct bgp_proto *p, int apply_md5);
76
static void bgp_connect(struct bgp_proto *p);
77
static void bgp_active(struct bgp_proto *p);
78
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
79

    
80

    
81
/**
82
 * bgp_open - open a BGP instance
83
 * @p: BGP instance
84
 *
85
 * This function allocates and configures shared BGP resources.
86
 * Should be called as the last step during initialization
87
 * (when lock is acquired and neighbor is ready).
88
 * When error, state changed to PS_DOWN, -1 is returned and caller
89
 * should return immediately.
90
 */
91
static int
92
bgp_open(struct bgp_proto *p)
93
{
94
  struct config *cfg = p->cf->c.global;
95
  int errcode;
96

    
97
  bgp_counter++;
98

    
99
  if (!bgp_listen_sk)
100
    bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
101

    
102
  if (!bgp_listen_sk)
103
    {
104
      bgp_counter--;
105
      errcode = BEM_NO_SOCKET;
106
      goto err;
107
    }
108

    
109
  if (!bgp_linpool)
110
    bgp_linpool = lp_new(&root_pool, 4080);
111

    
112
  if (p->cf->password)
113
    {
114
      int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, p->cf->password);
115
      if (rv < 0)
116
        {
117
          bgp_close(p, 0);
118
          errcode = BEM_INVALID_MD5;
119
          goto err;
120
        }
121
    }
122

    
123
  return 0;
124

    
125
err:
126
  p->p.disabled = 1;
127
  bgp_store_error(p, NULL, BE_MISC, errcode);
128
  proto_notify_state(&p->p, PS_DOWN);
129
  return -1;
130
}
131

    
132
static void
133
bgp_startup(struct bgp_proto *p)
134
{
135
  BGP_TRACE(D_EVENTS, "Started");
136
  p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
137

    
138
  if (!p->cf->passive)
139
    bgp_active(p);
140
}
141

    
142
static void
143
bgp_startup_timeout(timer *t)
144
{
145
  bgp_startup(t->data);
146
}
147

    
148

    
149
static void
150
bgp_initiate(struct bgp_proto *p)
151
{
152
  int rv = bgp_open(p);
153
  if (rv < 0)
154
    return;
155

    
156
  if (p->startup_delay)
157
    {
158
      BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay);
159
      bgp_start_timer(p->startup_timer, p->startup_delay);
160
    }
161
  else
162
    bgp_startup(p);
163
}
164

    
165
/**
166
 * bgp_close - close a BGP instance
167
 * @p: BGP instance
168
 * @apply_md5: 0 to disable unsetting MD5 auth
169
 *
170
 * This function frees and deconfigures shared BGP resources.
171
 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
172
 * from failed bgp_open().
173
 */
174
static void
175
bgp_close(struct bgp_proto *p, int apply_md5)
176
{
177
  ASSERT(bgp_counter);
178
  bgp_counter--;
179

    
180
  if (p->cf->password && apply_md5)
181
    sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, NULL);
182

    
183
  if (!bgp_counter)
184
    {
185
      rfree(bgp_listen_sk);
186
      bgp_listen_sk = NULL;
187
      rfree(bgp_linpool);
188
      bgp_linpool = NULL;
189
    }
190
}
191

    
192
/**
193
 * bgp_start_timer - start a BGP timer
194
 * @t: timer
195
 * @value: time to fire (0 to disable the timer)
196
 *
197
 * This functions calls tm_start() on @t with time @value and the
198
 * amount of randomization suggested by the BGP standard. Please use
199
 * it for all BGP timers.
200
 */
201
void
202
bgp_start_timer(timer *t, int value)
203
{
204
  if (value)
205
    {
206
      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
207
      t->randomize = value / 4;
208
      tm_start(t, value - t->randomize);
209
    }
210
  else
211
    tm_stop(t);
212
}
213

    
214
/**
215
 * bgp_close_conn - close a BGP connection
216
 * @conn: connection to close
217
 *
218
 * This function takes a connection described by the &bgp_conn structure,
219
 * closes its socket and frees all resources associated with it.
220
 */
221
void
222
bgp_close_conn(struct bgp_conn *conn)
223
{
224
  // struct bgp_proto *p = conn->bgp;
225

    
226
  DBG("BGP: Closing connection\n");
227
  conn->packets_to_send = 0;
228
  rfree(conn->connect_retry_timer);
229
  conn->connect_retry_timer = NULL;
230
  rfree(conn->keepalive_timer);
231
  conn->keepalive_timer = NULL;
232
  rfree(conn->hold_timer);
233
  conn->hold_timer = NULL;
234
  rfree(conn->sk);
235
  conn->sk = NULL;
236
  rfree(conn->tx_ev);
237
  conn->tx_ev = NULL;
238
}
239

    
240

    
241
/**
242
 * bgp_update_startup_delay - update a startup delay
243
 * @p: BGP instance
244
 *
245
 * This function updates a startup delay that is used to postpone next BGP connect.
246
 * It also handles disable_after_error and might stop BGP instance when error
247
 * happened and disable_after_error is on.
248
 *
249
 * It should be called when BGP protocol error happened.
250
 */
251
void
252
bgp_update_startup_delay(struct bgp_proto *p)
253
{
254
  struct bgp_config *cf = p->cf;
255

    
256
  DBG("BGP: Updating startup delay\n");
257

    
258
  if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
259
    p->startup_delay = 0;
260

    
261
  p->last_proto_error = now;
262

    
263
  if (cf->disable_after_error)
264
    {
265
      p->startup_delay = 0;
266
      p->p.disabled = 1;
267
      return;
268
    }
269

    
270
  if (!p->startup_delay)
271
    p->startup_delay = cf->error_delay_time_min;
272
  else
273
    p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
274
}
275

    
276
static void
277
bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
278
{
279
  switch (conn->state)
280
    {
281
    case BS_IDLE:
282
    case BS_CLOSE:
283
      return;
284
    case BS_CONNECT:
285
    case BS_ACTIVE:
286
      bgp_conn_enter_idle_state(conn);
287
      return;
288
    case BS_OPENSENT:
289
    case BS_OPENCONFIRM:
290
    case BS_ESTABLISHED:
291
      bgp_error(conn, 6, subcode, NULL, 0);
292
      return;
293
    default:
294
      bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
295
    }
296
}
297

    
298
static void
299
bgp_down(struct bgp_proto *p)
300
{
301
  if (p->start_state > BSS_PREPARE)
302
    bgp_close(p, 1);
303

    
304
  BGP_TRACE(D_EVENTS, "Down");
305
  proto_notify_state(&p->p, PS_DOWN);
306
}
307

    
308
static void
309
bgp_decision(void *vp)
310
{
311
  struct bgp_proto *p = vp;
312

    
313
  DBG("BGP: Decision start\n");
314
  if ((p->p.proto_state == PS_START)
315
      && (p->outgoing_conn.state == BS_IDLE)
316
      && (!p->cf->passive))
317
    bgp_active(p);
318

    
319
  if ((p->p.proto_state == PS_STOP)
320
      && (p->outgoing_conn.state == BS_IDLE)
321
      && (p->incoming_conn.state == BS_IDLE))
322
    bgp_down(p);
323
}
324

    
325
void
326
bgp_stop(struct bgp_proto *p, unsigned subcode)
327
{
328
  proto_notify_state(&p->p, PS_STOP);
329
  bgp_graceful_close_conn(&p->outgoing_conn, subcode);
330
  bgp_graceful_close_conn(&p->incoming_conn, subcode);
331
  ev_schedule(p->event);
332
}
333

    
334
static inline void
335
bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
336
{
337
  if (conn->bgp->p.mrtdump & MD_STATES)
338
    mrt_dump_bgp_state_change(conn, conn->state, new_state);
339

    
340
  conn->state = new_state;
341
}
342

    
343
void
344
bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
345
{
346
  /* Really, most of the work is done in bgp_rx_open(). */
347
  bgp_conn_set_state(conn, BS_OPENCONFIRM);
348
}
349

    
350
void
351
bgp_conn_enter_established_state(struct bgp_conn *conn)
352
{
353
  struct bgp_proto *p = conn->bgp;
354
 
355
  BGP_TRACE(D_EVENTS, "BGP session established");
356
  DBG("BGP: UP!!!\n");
357

    
358
  /* For multi-hop BGP sessions */
359
  if (ipa_zero(p->source_addr))
360
    p->source_addr = conn->sk->saddr; 
361

    
362
  p->conn = conn;
363
  p->last_error_class = 0;
364
  p->last_error_code = 0;
365
  bgp_attr_init(conn->bgp);
366
  bgp_conn_set_state(conn, BS_ESTABLISHED);
367
  proto_notify_state(&p->p, PS_UP);
368
}
369

    
370
static void
371
bgp_conn_leave_established_state(struct bgp_proto *p)
372
{
373
  BGP_TRACE(D_EVENTS, "BGP session closed");
374
  p->conn = NULL;
375

    
376
  if (p->p.proto_state == PS_UP)
377
    bgp_stop(p, 0);
378
}
379

    
380
void
381
bgp_conn_enter_close_state(struct bgp_conn *conn)
382
{
383
  struct bgp_proto *p = conn->bgp;
384
  int os = conn->state;
385

    
386
  bgp_conn_set_state(conn, BS_CLOSE);
387
  tm_stop(conn->hold_timer);
388
  tm_stop(conn->keepalive_timer);
389
  conn->sk->rx_hook = NULL;
390

    
391
  if (os == BS_ESTABLISHED)
392
    bgp_conn_leave_established_state(p);
393
}
394

    
395
void
396
bgp_conn_enter_idle_state(struct bgp_conn *conn)
397
{
398
  struct bgp_proto *p = conn->bgp;
399
  int os = conn->state;
400

    
401
  bgp_close_conn(conn);
402
  bgp_conn_set_state(conn, BS_IDLE);
403
  ev_schedule(p->event);
404

    
405
  if (os == BS_ESTABLISHED)
406
    bgp_conn_leave_established_state(p);
407
}
408

    
409
static void
410
bgp_send_open(struct bgp_conn *conn)
411
{
412
  conn->start_state = conn->bgp->start_state;
413
  conn->want_as4_support = conn->bgp->cf->enable_as4 && (conn->start_state != BSS_CONNECT_NOCAP);
414
  conn->peer_as4_support = 0;        // Default value, possibly changed by receiving capability.
415
  conn->advertised_as = 0;
416

    
417
  DBG("BGP: Sending open\n");
418
  conn->sk->rx_hook = bgp_rx;
419
  conn->sk->tx_hook = bgp_tx;
420
  tm_stop(conn->connect_retry_timer);
421
  bgp_schedule_packet(conn, PKT_OPEN);
422
  bgp_conn_set_state(conn, BS_OPENSENT);
423
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
424
}
425

    
426
static void
427
bgp_connected(sock *sk)
428
{
429
  struct bgp_conn *conn = sk->data;
430
  struct bgp_proto *p = conn->bgp;
431

    
432
  BGP_TRACE(D_EVENTS, "Connected");
433
  bgp_send_open(conn);
434
}
435

    
436
static void
437
bgp_connect_timeout(timer *t)
438
{
439
  struct bgp_conn *conn = t->data;
440
  struct bgp_proto *p = conn->bgp;
441

    
442
  DBG("BGP: connect_timeout\n");
443
  if (p->p.proto_state == PS_START)
444
    {
445
      bgp_close_conn(conn);
446
      bgp_connect(p);
447
    }
448
  else
449
    bgp_conn_enter_idle_state(conn);
450
}
451

    
452
static void
453
bgp_sock_err(sock *sk, int err)
454
{
455
  struct bgp_conn *conn = sk->data;
456
  struct bgp_proto *p = conn->bgp;
457

    
458
  /*
459
   * This error hook may be called either asynchronously from main
460
   * loop, or synchronously from sk_send().  But sk_send() is called
461
   * only from bgp_tx() and bgp_kick_tx(), which are both called
462
   * asynchronously from main loop. Moreover, they end if err hook is
463
   * called. Therefore, we could suppose that it is always called
464
   * asynchronously.
465
   */
466

    
467
  bgp_store_error(p, conn, BE_SOCKET, err);
468

    
469
  if (err)
470
    BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
471
  else
472
    BGP_TRACE(D_EVENTS, "Connection closed");
473

    
474
  bgp_conn_enter_idle_state(conn);
475
}
476

    
477
static void
478
bgp_hold_timeout(timer *t)
479
{
480
  struct bgp_conn *conn = t->data;
481

    
482
  DBG("BGP: Hold timeout\n");
483

    
484
  /* If there is something in input queue, we are probably congested
485
     and perhaps just not processed BGP packets in time. */
486

    
487
  if (sk_rx_ready(conn->sk) > 0)
488
    bgp_start_timer(conn->hold_timer, 10);
489
  else
490
    bgp_error(conn, 4, 0, NULL, 0);
491
}
492

    
493
static void
494
bgp_keepalive_timeout(timer *t)
495
{
496
  struct bgp_conn *conn = t->data;
497

    
498
  DBG("BGP: Keepalive timer\n");
499
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
500
}
501

    
502
static void
503
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
504
{
505
  timer *t;
506

    
507
  conn->sk = NULL;
508
  conn->bgp = p;
509
  conn->packets_to_send = 0;
510

    
511
  t = conn->connect_retry_timer = tm_new(p->p.pool);
512
  t->hook = bgp_connect_timeout;
513
  t->data = conn;
514
  t = conn->hold_timer = tm_new(p->p.pool);
515
  t->hook = bgp_hold_timeout;
516
  t->data = conn;
517
  t = conn->keepalive_timer = tm_new(p->p.pool);
518
  t->hook = bgp_keepalive_timeout;
519
  t->data = conn;
520
  conn->tx_ev = ev_new(p->p.pool);
521
  conn->tx_ev->hook = bgp_kick_tx;
522
  conn->tx_ev->data = conn;
523
}
524

    
525
static void
526
bgp_setup_sk(struct bgp_conn *conn, sock *s)
527
{
528
  s->data = conn;
529
  s->err_hook = bgp_sock_err;
530
  conn->sk = s;
531
}
532

    
533
static void
534
bgp_active(struct bgp_proto *p)
535
{
536
  int delay = MAX(1, p->cf->start_delay_time);
537
  struct bgp_conn *conn = &p->outgoing_conn;
538

    
539
  BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
540
  bgp_setup_conn(p, conn);
541
  bgp_conn_set_state(conn, BS_ACTIVE);
542
  bgp_start_timer(conn->connect_retry_timer, delay);
543
}
544

    
545
/**
546
 * bgp_connect - initiate an outgoing connection
547
 * @p: BGP instance
548
 *
549
 * The bgp_connect() function creates a new &bgp_conn and initiates
550
 * a TCP connection to the peer. The rest of connection setup is governed
551
 * by the BGP state machine as described in the standard.
552
 */
553
static void
554
bgp_connect(struct bgp_proto *p)        /* Enter Connect state and start establishing connection */
555
{
556
  sock *s;
557
  struct bgp_conn *conn = &p->outgoing_conn;
558
  int hops = p->cf->multihop ? : 1;
559

    
560
  DBG("BGP: Connecting\n");
561
  s = sk_new(p->p.pool);
562
  s->type = SK_TCP_ACTIVE;
563
  s->saddr = p->source_addr;
564
  s->daddr = p->cf->remote_ip;
565
  s->iface = p->neigh ? p->neigh->iface : NULL;
566
  s->dport = BGP_PORT;
567
  s->ttl = p->cf->ttl_security ? 255 : hops;
568
  s->rbsize = BGP_RX_BUFFER_SIZE;
569
  s->tbsize = BGP_TX_BUFFER_SIZE;
570
  s->tos = IP_PREC_INTERNET_CONTROL;
571
  s->password = p->cf->password;
572
  s->tx_hook = bgp_connected;
573
  BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
574
            s->saddr, ipa_has_link_scope(s->saddr) ? s->iface : NULL);
575
  bgp_setup_conn(p, conn);
576
  bgp_setup_sk(conn, s);
577
  bgp_conn_set_state(conn, BS_CONNECT);
578

    
579
  if (sk_open(s) < 0)
580
    {
581
      bgp_sock_err(s, 0);
582
      return;
583
    }
584

    
585
  /* Set minimal receive TTL if needed */
586
  if (p->cf->ttl_security)
587
  {
588
    DBG("Setting minimum received TTL to %d", 256 - hops);
589
    if (sk_set_min_ttl(s, 256 - hops) < 0)
590
    {
591
      log(L_ERR "TTL security configuration failed, closing session");
592
      bgp_sock_err(s, 0);
593
      return;
594
    }
595
  }
596

    
597
  DBG("BGP: Waiting for connect success\n");
598
  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
599
}
600

    
601
/**
602
 * bgp_incoming_connection - handle an incoming connection
603
 * @sk: TCP socket
604
 * @dummy: unused
605
 *
606
 * This function serves as a socket hook for accepting of new BGP
607
 * connections. It searches a BGP instance corresponding to the peer
608
 * which has connected and if such an instance exists, it creates a
609
 * &bgp_conn structure, attaches it to the instance and either sends
610
 * an Open message or (if there already is an active connection) it
611
 * closes the new connection by sending a Notification message.
612
 */
613
static int
614
bgp_incoming_connection(sock *sk, int dummy UNUSED)
615
{
616
  struct proto_config *pc;
617

    
618
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
619
  WALK_LIST(pc, config->protos)
620
    if (pc->protocol == &proto_bgp && pc->proto)
621
      {
622
        struct bgp_proto *p = (struct bgp_proto *) pc->proto;
623
        if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
624
            (!ipa_has_link_scope(sk->daddr) || (p->cf->iface == sk->iface)))
625
          {
626
            /* We are in proper state and there is no other incoming connection */
627
            int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
628
              (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
629

    
630
            BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
631
                      sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL,
632
                      sk->dport, acc ? "accepted" : "rejected");
633

    
634
            if (!acc)
635
              goto err;
636

    
637
            int hops = p->cf->multihop ? : 1;
638
            if (p->cf->ttl_security)
639
            {
640
              /* TTL security support */
641
              if ((sk_set_ttl(sk, 255) < 0) ||
642
                  (sk_set_min_ttl(sk, 256 - hops) < 0))
643
              {
644
                log(L_ERR "TTL security configuration failed, closing session");
645
                goto err;
646
              }
647
            }
648
            else
649
              sk_set_ttl(sk, hops);
650

    
651
            bgp_setup_conn(p, &p->incoming_conn);
652
            bgp_setup_sk(&p->incoming_conn, sk);
653
            bgp_send_open(&p->incoming_conn);
654
            return 0;
655
          }
656
      }
657

    
658
  log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
659
      sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL, sk->dport);
660
 err:
661
  rfree(sk);
662
  return 0;
663
}
664

    
665
static void
666
bgp_listen_sock_err(sock *sk UNUSED, int err)
667
{
668
  if (err == ECONNABORTED)
669
    log(L_WARN "BGP: Incoming connection aborted");
670
  else
671
    log(L_ERR "BGP: Error on listening socket: %M", err);
672
}
673

    
674
static sock *
675
bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
676
{
677
  sock *s = sk_new(&root_pool);
678
  DBG("BGP: Creating listening socket\n");
679
  s->type = SK_TCP_PASSIVE;
680
  s->ttl = 255;
681
  s->saddr = addr;
682
  s->sport = port ? port : BGP_PORT;
683
  s->flags = flags ? 0 : SKF_V6ONLY;
684
  s->tos = IP_PREC_INTERNET_CONTROL;
685
  s->rbsize = BGP_RX_BUFFER_SIZE;
686
  s->tbsize = BGP_TX_BUFFER_SIZE;
687
  s->rx_hook = bgp_incoming_connection;
688
  s->err_hook = bgp_listen_sock_err;
689

    
690
  if (sk_open(s) < 0)
691
    {
692
      log(L_ERR "BGP: Unable to open listening socket");
693
      rfree(s);
694
      return NULL;
695
    }
696

    
697
  return s;
698
}
699

    
700
static void
701
bgp_start_neighbor(struct bgp_proto *p)
702
{
703
  /* Called only for single-hop BGP sessions */
704

    
705
  /* Remove this ? */
706
  if (ipa_zero(p->source_addr))
707
    p->source_addr = p->neigh->iface->addr->ip; 
708

    
709
#ifdef IPV6
710
  {
711
    struct ifa *a;
712
    p->local_link = IPA_NONE;
713
    WALK_LIST(a, p->neigh->iface->addrs)
714
      if (a->scope == SCOPE_LINK)
715
        {
716
          p->local_link = a->ip;
717
          break;
718
        }
719

    
720
    if (! ipa_nonzero(p->local_link))
721
      log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
722

    
723
    DBG("BGP: Selected link-level address %I\n", p->local_link);
724
  }
725
#endif
726

    
727
  bgp_initiate(p);
728
}
729

    
730
static void
731
bgp_neigh_notify(neighbor *n)
732
{
733
  struct bgp_proto *p = (struct bgp_proto *) n->proto;
734

    
735
  if (n->scope > 0)
736
    {
737
      if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE))
738
        {
739
          BGP_TRACE(D_EVENTS, "Neighbor found");
740
          bgp_start_neighbor(p);
741
        }
742
    }
743
  else
744
    {
745
      if ((p->p.proto_state == PS_START) || (p->p.proto_state == PS_UP))
746
        {
747
          BGP_TRACE(D_EVENTS, "Neighbor lost");
748
          bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
749
          bgp_stop(p, 0);
750
        }
751
    }
752
}
753

    
754
static int
755
bgp_reload_routes(struct proto *P)
756
{
757
  struct bgp_proto *p = (struct bgp_proto *) P;
758
  if (!p->conn || !p->conn->peer_refresh_support)
759
    return 0;
760

    
761
  bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
762
  return 1;
763
}
764

    
765
static void
766
bgp_start_locked(struct object_lock *lock)
767
{
768
  struct bgp_proto *p = lock->data;
769
  struct bgp_config *cf = p->cf;
770

    
771
  if (p->p.proto_state != PS_START)
772
    {
773
      DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
774
      return;
775
    }
776

    
777
  DBG("BGP: Got lock\n");
778

    
779
  if (cf->multihop)
780
    {
781
      /* Multi-hop sessions do not use neighbor entries */
782
      bgp_initiate(p);
783
      return;
784
    }
785

    
786
  p->neigh = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
787
  if (!p->neigh || (p->neigh->scope == SCOPE_HOST))
788
    {
789
      log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
790
      /* As we do not start yet, we can just disable protocol */
791
      p->p.disabled = 1;
792
      bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
793
      proto_notify_state(&p->p, PS_DOWN);
794
      return;
795
    }
796
  
797
  if (p->neigh->scope > 0)
798
    bgp_start_neighbor(p);
799
  else
800
    BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
801
}
802

    
803
static int
804
bgp_start(struct proto *P)
805
{
806
  struct bgp_proto *p = (struct bgp_proto *) P;
807
  struct object_lock *lock;
808

    
809
  DBG("BGP: Startup.\n");
810
  p->start_state = BSS_PREPARE;
811
  p->outgoing_conn.state = BS_IDLE;
812
  p->incoming_conn.state = BS_IDLE;
813
  p->neigh = NULL;
814

    
815
  rt_lock_table(p->igp_table);
816

    
817
  p->event = ev_new(p->p.pool);
818
  p->event->hook = bgp_decision;
819
  p->event->data = p;
820

    
821
  p->startup_timer = tm_new(p->p.pool);
822
  p->startup_timer->hook = bgp_startup_timeout;
823
  p->startup_timer->data = p;
824

    
825
  p->local_id = proto_get_router_id(P->cf);
826
  if (p->rr_client)
827
    p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
828

    
829
  p->remote_id = 0;
830
  p->source_addr = p->cf->source_addr;
831

    
832
  /*
833
   *  Before attempting to create the connection, we need to lock the
834
   *  port, so that are sure we're the only instance attempting to talk
835
   *  with that neighbor.
836
   */
837

    
838
  lock = p->lock = olock_new(P->pool);
839
  lock->addr = p->cf->remote_ip;
840
  lock->iface = p->cf->iface;
841
  lock->type = OBJLOCK_TCP;
842
  lock->port = BGP_PORT;
843
  lock->iface = NULL;
844
  lock->hook = bgp_start_locked;
845
  lock->data = p;
846
  olock_acquire(lock);
847

    
848
  return PS_START;
849
}
850

    
851
extern int proto_restart;
852

    
853
static int
854
bgp_shutdown(struct proto *P)
855
{
856
  struct bgp_proto *p = (struct bgp_proto *) P;
857
  unsigned subcode = 0;
858

    
859
  BGP_TRACE(D_EVENTS, "Shutdown requested");
860

    
861
  switch (P->down_code)
862
    {
863
    case PDC_CF_REMOVE:
864
    case PDC_CF_DISABLE:
865
      subcode = 3; // Errcode 6, 3 - peer de-configured
866
      break;
867

    
868
    case PDC_CF_RESTART:
869
      subcode = 6; // Errcode 6, 6 - other configuration change
870
      break;
871

    
872
    case PDC_CMD_DISABLE:
873
    case PDC_CMD_SHUTDOWN:
874
      subcode = 2; // Errcode 6, 2 - administrative shutdown
875
      break;
876

    
877
    case PDC_CMD_RESTART:
878
      subcode = 4; // Errcode 6, 4 - administrative reset
879
      break;
880

    
881
    case PDC_IN_LIMIT_HIT:
882
      subcode = 1; // Errcode 6, 1 - max number of prefixes reached
883
      /* log message for compatibility */
884
      log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
885
      goto limit;
886

    
887
    case PDC_OUT_LIMIT_HIT:
888
      subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
889

    
890
    limit:
891
      bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
892
      if (proto_restart)
893
        bgp_update_startup_delay(p);
894
      else
895
        p->startup_delay = 0;
896
      goto done;
897
    }
898

    
899
  bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
900
  p->startup_delay = 0;
901

    
902
 done:
903
  bgp_stop(p, subcode);
904
  return p->p.proto_state;
905
}
906

    
907
static void
908
bgp_cleanup(struct proto *P)
909
{
910
  struct bgp_proto *p = (struct bgp_proto *) P;
911
  rt_unlock_table(p->igp_table);
912
}
913

    
914
static rtable *
915
get_igp_table(struct bgp_config *cf)
916
{
917
  return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
918
}
919

    
920
static struct proto *
921
bgp_init(struct proto_config *C)
922
{
923
  struct bgp_config *c = (struct bgp_config *) C;
924
  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
925
  struct bgp_proto *p = (struct bgp_proto *) P;
926

    
927
  P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
928
  P->rt_notify = bgp_rt_notify;
929
  P->rte_better = bgp_rte_better;
930
  P->import_control = bgp_import_control;
931
  P->neigh_notify = bgp_neigh_notify;
932
  P->reload_routes = bgp_reload_routes;
933

    
934
  if (c->deterministic_med)
935
    P->rte_recalculate = bgp_rte_recalculate;
936

    
937
  p->cf = c;
938
  p->local_as = c->local_as;
939
  p->remote_as = c->remote_as;
940
  p->is_internal = (c->local_as == c->remote_as);
941
  p->rs_client = c->rs_client;
942
  p->rr_client = c->rr_client;
943
  p->igp_table = get_igp_table(c);
944

    
945
  return P;
946
}
947

    
948

    
949
void
950
bgp_check_config(struct bgp_config *c)
951
{
952
  int internal = (c->local_as == c->remote_as);
953

    
954
  /* Do not check templates at all */
955
  if (c->c.class == SYM_TEMPLATE)
956
    return;
957

    
958
  if (!c->local_as)
959
    cf_error("Local AS number must be set");
960

    
961
  if (!c->remote_as)
962
    cf_error("Neighbor must be configured");
963

    
964
  if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
965
    cf_error("Neighbor AS number out of range (AS4 not available)");
966

    
967
  if (!internal && c->rr_client)
968
    cf_error("Only internal neighbor can be RR client");
969

    
970
  if (internal && c->rs_client)
971
    cf_error("Only external neighbor can be RS client");
972

    
973

    
974
  if (c->multihop && (c->gw_mode == GW_DIRECT))
975
    cf_error("Multihop BGP cannot use direct gateway mode");
976

    
977
  if (c->multihop && (ipa_has_link_scope(c->remote_ip) || 
978
                      ipa_has_link_scope(c->source_addr)))
979
    cf_error("Multihop BGP cannot be used with link-local addresses");
980

    
981

    
982
  /* Different default based on rs_client */
983
  if (!c->missing_lladdr)
984
    c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
985

    
986
  /* Different default for gw_mode */
987
  if (!c->gw_mode)
988
    c->gw_mode = (c->multihop || internal) ? GW_RECURSIVE : GW_DIRECT;
989

    
990
  /* Disable after error incompatible with restart limit action */
991
  if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
992
    c->c.in_limit->action = PLA_DISABLE;
993

    
994

    
995
  if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
996
    cf_error("BGP in recursive mode prohibits sorted table");
997

    
998
  if (c->deterministic_med && c->c.table->sorted)
999
    cf_error("BGP with deterministic MED prohibits sorted table");
1000

    
1001
  if (c->secondary && !c->c.table->sorted)
1002
    cf_error("BGP with secondary option requires sorted table");
1003
}
1004

    
1005
static int
1006
bgp_reconfigure(struct proto *P, struct proto_config *C)
1007
{
1008
  struct bgp_config *new = (struct bgp_config *) C;
1009
  struct bgp_proto *p = (struct bgp_proto *) P;
1010
  struct bgp_config *old = p->cf;
1011

    
1012
  int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1013
                     ((byte *) new) + sizeof(struct proto_config),
1014
                     // password item is last and must be checked separately
1015
                     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1016
    && ((!old->password && !new->password)
1017
        || (old->password && new->password && !strcmp(old->password, new->password)))
1018
    && (get_igp_table(old) == get_igp_table(new));
1019

    
1020
  /* We should update our copy of configuration ptr as old configuration will be freed */
1021
  if (same)
1022
    p->cf = new;
1023

    
1024
  return same;
1025
}
1026

    
1027
static void
1028
bgp_copy_config(struct proto_config *dest, struct proto_config *src)
1029
{
1030
  /* Just a shallow copy */
1031
  proto_copy_rest(dest, src, sizeof(struct bgp_config));
1032
}
1033

    
1034

    
1035
/**
1036
 * bgp_error - report a protocol error
1037
 * @c: connection
1038
 * @code: error code (according to the RFC)
1039
 * @subcode: error sub-code
1040
 * @data: data to be passed in the Notification message
1041
 * @len: length of the data
1042
 *
1043
 * bgp_error() sends a notification packet to tell the other side that a protocol
1044
 * error has occurred (including the data considered erroneous if possible) and
1045
 * closes the connection.
1046
 */
1047
void
1048
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
1049
{
1050
  struct bgp_proto *p = c->bgp;
1051

    
1052
  if (c->state == BS_CLOSE)
1053
    return;
1054

    
1055
  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
1056
  bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1057
  bgp_conn_enter_close_state(c);
1058

    
1059
  c->notify_code = code;
1060
  c->notify_subcode = subcode;
1061
  c->notify_data = data;
1062
  c->notify_size = (len > 0) ? len : 0;
1063
  bgp_schedule_packet(c, PKT_NOTIFICATION);
1064

    
1065
  if (code != 6)
1066
    {
1067
      bgp_update_startup_delay(p);
1068
      bgp_stop(p, 0);
1069
    }
1070
}
1071

    
1072
/**
1073
 * bgp_store_error - store last error for status report
1074
 * @p: BGP instance
1075
 * @c: connection
1076
 * @class: error class (BE_xxx constants)
1077
 * @code: error code (class specific)
1078
 *
1079
 * bgp_store_error() decides whether given error is interesting enough
1080
 * and store that error to last_error variables of @p
1081
 */
1082
void
1083
bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1084
{
1085
  /* During PS_UP, we ignore errors on secondary connection */
1086
  if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1087
    return;
1088

    
1089
  /* During PS_STOP, we ignore any errors, as we want to report
1090
   * the error that caused transition to PS_STOP
1091
   */
1092
  if (p->p.proto_state == PS_STOP)
1093
    return;
1094

    
1095
  p->last_error_class = class;
1096
  p->last_error_code = code;
1097
}
1098

    
1099
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1100
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1101
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket" };
1102
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1103

    
1104
static const char *
1105
bgp_last_errmsg(struct bgp_proto *p)
1106
{
1107
  switch (p->last_error_class)
1108
    {
1109
    case BE_MISC:
1110
      return bgp_misc_errors[p->last_error_code];
1111
    case BE_SOCKET:
1112
      return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1113
    case BE_BGP_RX:
1114
    case BE_BGP_TX:
1115
      return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1116
    case BE_AUTO_DOWN:
1117
      return bgp_auto_errors[p->last_error_code];
1118
    default:
1119
      return "";
1120
    }
1121
}
1122

    
1123
static const char *
1124
bgp_state_dsc(struct bgp_proto *p)
1125
{
1126
  if (p->p.proto_state == PS_DOWN)
1127
    return "Down";
1128

    
1129
  int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1130
  if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1131
    return "Passive";
1132

    
1133
  return bgp_state_names[state];
1134
}
1135

    
1136
static void
1137
bgp_get_status(struct proto *P, byte *buf)
1138
{
1139
  struct bgp_proto *p = (struct bgp_proto *) P;
1140

    
1141
  const char *err1 = bgp_err_classes[p->last_error_class];
1142
  const char *err2 = bgp_last_errmsg(p);
1143

    
1144
  if (P->proto_state == PS_DOWN)
1145
    bsprintf(buf, "%s%s", err1, err2);
1146
  else
1147
    bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1148
}
1149

    
1150
static void
1151
bgp_show_proto_info(struct proto *P)
1152
{
1153
  struct bgp_proto *p = (struct bgp_proto *) P;
1154
  struct bgp_conn *c = p->conn;
1155

    
1156
  proto_show_basic_info(P);
1157

    
1158
  cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
1159
  cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
1160
  cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
1161

    
1162
  if (P->proto_state == PS_START)
1163
    {
1164
      struct bgp_conn *oc = &p->outgoing_conn;
1165

    
1166
      if ((p->start_state < BSS_CONNECT) &&
1167
          (p->startup_timer->expires))
1168
        cli_msg(-1006, "    Error wait:       %d/%d", 
1169
                p->startup_timer->expires - now, p->startup_delay);
1170

    
1171
      if ((oc->state == BS_ACTIVE) &&
1172
          (oc->connect_retry_timer->expires))
1173
        cli_msg(-1006, "    Start delay:      %d/%d", 
1174
                oc->connect_retry_timer->expires - now, p->cf->start_delay_time);
1175
    }
1176
  else if (P->proto_state == PS_UP)
1177
    {
1178
      cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
1179
      cli_msg(-1006, "    Neighbor caps:   %s%s",
1180
              c->peer_refresh_support ? " refresh" : "",
1181
              c->peer_as4_support ? " AS4" : "");
1182
      cli_msg(-1006, "    Session:          %s%s%s%s%s",
1183
              p->is_internal ? "internal" : "external",
1184
              p->cf->multihop ? " multihop" : "",
1185
              p->rr_client ? " route-reflector" : "",
1186
              p->rs_client ? " route-server" : "",
1187
              p->as4_session ? " AS4" : "");
1188
      cli_msg(-1006, "    Source address:   %I", p->source_addr);
1189
      if (P->cf->in_limit)
1190
        cli_msg(-1006, "    Route limit:      %d/%d",
1191
                p->p.stats.imp_routes, P->cf->in_limit->limit);
1192
      cli_msg(-1006, "    Hold timer:       %d/%d",
1193
              tm_remains(c->hold_timer), c->hold_time);
1194
      cli_msg(-1006, "    Keepalive timer:  %d/%d",
1195
              tm_remains(c->keepalive_timer), c->keepalive_time);
1196
    }
1197

    
1198
  if ((p->last_error_class != BE_NONE) && 
1199
      (p->last_error_class != BE_MAN_DOWN))
1200
    {
1201
      const char *err1 = bgp_err_classes[p->last_error_class];
1202
      const char *err2 = bgp_last_errmsg(p);
1203
      cli_msg(-1006, "    Last error:       %s%s", err1, err2);
1204
    }
1205
}
1206

    
1207
struct protocol proto_bgp = {
1208
  name:                        "BGP",
1209
  template:                "bgp%d",
1210
  attr_class:                EAP_BGP,
1211
  preference:                DEF_PREF_BGP,
1212
  init:                        bgp_init,
1213
  start:                bgp_start,
1214
  shutdown:                bgp_shutdown,
1215
  cleanup:                bgp_cleanup,
1216
  reconfigure:                bgp_reconfigure,
1217
  copy_config:                bgp_copy_config,
1218
  get_status:                bgp_get_status,
1219
  get_attr:                bgp_get_attr,
1220
  get_route_info:        bgp_get_route_info,
1221
  show_proto_info:        bgp_show_proto_info
1222
};