Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / bgp.c @ 2d0b7e24

History | View | Annotate | Download (34.2 KB)

1
/*
2
 *        BIRD -- The Border Gateway Protocol
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Border Gateway Protocol
11
 *
12
 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13
 * connection and most of the interface with BIRD core, |packets.c| handling
14
 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15
 * manipulation with BGP attribute lists.
16
 *
17
 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18
 * architecture which is able to keep all the information needed by BGP in the
19
 * primary routing table, therefore no complex data structures like a central
20
 * BGP table are needed. This increases memory footprint of a BGP router with
21
 * many connections, but not too much and, which is more important, it makes
22
 * BGP much easier to implement.
23
 *
24
 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25
 * structure to which are attached individual connections represented by &bgp_connection
26
 * (usually, there exists only one connection, but during BGP session setup, there
27
 * can be more of them). The connections are handled according to the BGP state machine
28
 * defined in the RFC with all the timers and all the parameters configurable.
29
 *
30
 * In incoming direction, we listen on the connection's socket and each time we receive
31
 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32
 * passes complete packets to bgp_rx_packet() which distributes the packet according
33
 * to its type.
34
 *
35
 * In outgoing direction, we gather all the routing updates and sort them to buckets
36
 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37
 * of &rta's and a &fib which helps us to find if we already have another route for
38
 * the same destination queued for sending, so that we can replace it with the new one
39
 * immediately instead of sending both updates). There also exists a special bucket holding
40
 * all the route withdrawals which cannot be queued anywhere else as they don't have any
41
 * attributes. If we have any packet to send (due to either new routes or the connection
42
 * tracking code wanting to send a Open, Keepalive or Notification message), we call
43
 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44
 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45
 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46
 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47
 * type if we have more data of the same type to send.
48
 *
49
 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50
 * of the attribute blocks and translating them to the language of BIRD's extended attributes
51
 * and bgp_encode_attrs() which does the converse. Both functions are built around a
52
 * @bgp_attr_table array describing all important characteristics of all known attributes.
53
 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54
 */
55

    
56
#undef LOCAL_DEBUG
57

    
58
#include "nest/bird.h"
59
#include "nest/iface.h"
60
#include "nest/protocol.h"
61
#include "nest/route.h"
62
#include "nest/cli.h"
63
#include "nest/locks.h"
64
#include "conf/conf.h"
65
#include "lib/socket.h"
66
#include "lib/resource.h"
67
#include "lib/string.h"
68

    
69
#include "bgp.h"
70

    
71
struct linpool *bgp_linpool;                /* Global temporary pool */
72
static sock *bgp_listen_sk;                /* Global listening socket */
73
static int bgp_counter;                        /* Number of protocol instances using the listening socket */
74

    
75
static void bgp_close(struct bgp_proto *p, int apply_md5);
76
static void bgp_connect(struct bgp_proto *p);
77
static void bgp_active(struct bgp_proto *p);
78
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
79
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
80

    
81

    
82
/**
83
 * bgp_open - open a BGP instance
84
 * @p: BGP instance
85
 *
86
 * This function allocates and configures shared BGP resources.
87
 * Should be called as the last step during initialization
88
 * (when lock is acquired and neighbor is ready).
89
 * When error, state changed to PS_DOWN, -1 is returned and caller
90
 * should return immediately.
91
 */
92
static int
93
bgp_open(struct bgp_proto *p)
94
{
95
  struct config *cfg = p->cf->c.global;
96
  int errcode;
97

    
98
  bgp_counter++;
99

    
100
  if (!bgp_listen_sk)
101
    bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
102

    
103
  if (!bgp_listen_sk)
104
    {
105
      bgp_counter--;
106
      errcode = BEM_NO_SOCKET;
107
      goto err;
108
    }
109

    
110
  if (!bgp_linpool)
111
    bgp_linpool = lp_new(&root_pool, 4080);
112

    
113
  if (p->cf->password)
114
    {
115
      int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, p->cf->password);
116
      if (rv < 0)
117
        {
118
          bgp_close(p, 0);
119
          errcode = BEM_INVALID_MD5;
120
          goto err;
121
        }
122
    }
123

    
124
  return 0;
125

    
126
err:
127
  p->p.disabled = 1;
128
  bgp_store_error(p, NULL, BE_MISC, errcode);
129
  proto_notify_state(&p->p, PS_DOWN);
130
  return -1;
131
}
132

    
133
static void
134
bgp_startup(struct bgp_proto *p)
135
{
136
  BGP_TRACE(D_EVENTS, "Started");
137
  p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
138

    
139
  if (!p->cf->passive)
140
    bgp_active(p);
141
}
142

    
143
static void
144
bgp_startup_timeout(timer *t)
145
{
146
  bgp_startup(t->data);
147
}
148

    
149

    
150
static void
151
bgp_initiate(struct bgp_proto *p)
152
{
153
  int rv = bgp_open(p);
154
  if (rv < 0)
155
    return;
156

    
157
  if (p->cf->bfd)
158
    bgp_update_bfd(p, p->cf->bfd);
159

    
160
  if (p->startup_delay)
161
    {
162
      p->start_state = BSS_DELAY;
163
      BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay);
164
      bgp_start_timer(p->startup_timer, p->startup_delay);
165
    }
166
  else
167
    bgp_startup(p);
168
}
169

    
170
/**
171
 * bgp_close - close a BGP instance
172
 * @p: BGP instance
173
 * @apply_md5: 0 to disable unsetting MD5 auth
174
 *
175
 * This function frees and deconfigures shared BGP resources.
176
 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
177
 * from failed bgp_open().
178
 */
179
static void
180
bgp_close(struct bgp_proto *p, int apply_md5)
181
{
182
  ASSERT(bgp_counter);
183
  bgp_counter--;
184

    
185
  if (p->cf->password && apply_md5)
186
    sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, NULL);
187

    
188
  if (!bgp_counter)
189
    {
190
      rfree(bgp_listen_sk);
191
      bgp_listen_sk = NULL;
192
      rfree(bgp_linpool);
193
      bgp_linpool = NULL;
194
    }
195
}
196

    
197
/**
198
 * bgp_start_timer - start a BGP timer
199
 * @t: timer
200
 * @value: time to fire (0 to disable the timer)
201
 *
202
 * This functions calls tm_start() on @t with time @value and the
203
 * amount of randomization suggested by the BGP standard. Please use
204
 * it for all BGP timers.
205
 */
206
void
207
bgp_start_timer(timer *t, int value)
208
{
209
  if (value)
210
    {
211
      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
212
      t->randomize = value / 4;
213
      tm_start(t, value - t->randomize);
214
    }
215
  else
216
    tm_stop(t);
217
}
218

    
219
/**
220
 * bgp_close_conn - close a BGP connection
221
 * @conn: connection to close
222
 *
223
 * This function takes a connection described by the &bgp_conn structure,
224
 * closes its socket and frees all resources associated with it.
225
 */
226
void
227
bgp_close_conn(struct bgp_conn *conn)
228
{
229
  // struct bgp_proto *p = conn->bgp;
230

    
231
  DBG("BGP: Closing connection\n");
232
  conn->packets_to_send = 0;
233
  rfree(conn->connect_retry_timer);
234
  conn->connect_retry_timer = NULL;
235
  rfree(conn->keepalive_timer);
236
  conn->keepalive_timer = NULL;
237
  rfree(conn->hold_timer);
238
  conn->hold_timer = NULL;
239
  rfree(conn->sk);
240
  conn->sk = NULL;
241
  rfree(conn->tx_ev);
242
  conn->tx_ev = NULL;
243
}
244

    
245

    
246
/**
247
 * bgp_update_startup_delay - update a startup delay
248
 * @p: BGP instance
249
 *
250
 * This function updates a startup delay that is used to postpone next BGP connect.
251
 * It also handles disable_after_error and might stop BGP instance when error
252
 * happened and disable_after_error is on.
253
 *
254
 * It should be called when BGP protocol error happened.
255
 */
256
void
257
bgp_update_startup_delay(struct bgp_proto *p)
258
{
259
  struct bgp_config *cf = p->cf;
260

    
261
  DBG("BGP: Updating startup delay\n");
262

    
263
  if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
264
    p->startup_delay = 0;
265

    
266
  p->last_proto_error = now;
267

    
268
  if (cf->disable_after_error)
269
    {
270
      p->startup_delay = 0;
271
      p->p.disabled = 1;
272
      return;
273
    }
274

    
275
  if (!p->startup_delay)
276
    p->startup_delay = cf->error_delay_time_min;
277
  else
278
    p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
279
}
280

    
281
static void
282
bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
283
{
284
  switch (conn->state)
285
    {
286
    case BS_IDLE:
287
    case BS_CLOSE:
288
      return;
289
    case BS_CONNECT:
290
    case BS_ACTIVE:
291
      bgp_conn_enter_idle_state(conn);
292
      return;
293
    case BS_OPENSENT:
294
    case BS_OPENCONFIRM:
295
    case BS_ESTABLISHED:
296
      bgp_error(conn, 6, subcode, NULL, 0);
297
      return;
298
    default:
299
      bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
300
    }
301
}
302

    
303
static void
304
bgp_down(struct bgp_proto *p)
305
{
306
  if (p->start_state > BSS_PREPARE)
307
    bgp_close(p, 1);
308

    
309
  BGP_TRACE(D_EVENTS, "Down");
310
  proto_notify_state(&p->p, PS_DOWN);
311
}
312

    
313
static void
314
bgp_decision(void *vp)
315
{
316
  struct bgp_proto *p = vp;
317

    
318
  DBG("BGP: Decision start\n");
319
  if ((p->p.proto_state == PS_START)
320
      && (p->outgoing_conn.state == BS_IDLE)
321
      && (!p->cf->passive))
322
    bgp_active(p);
323

    
324
  if ((p->p.proto_state == PS_STOP)
325
      && (p->outgoing_conn.state == BS_IDLE)
326
      && (p->incoming_conn.state == BS_IDLE))
327
    bgp_down(p);
328
}
329

    
330
void
331
bgp_stop(struct bgp_proto *p, unsigned subcode)
332
{
333
  proto_notify_state(&p->p, PS_STOP);
334
  bgp_graceful_close_conn(&p->outgoing_conn, subcode);
335
  bgp_graceful_close_conn(&p->incoming_conn, subcode);
336
  ev_schedule(p->event);
337
}
338

    
339
static inline void
340
bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
341
{
342
  if (conn->bgp->p.mrtdump & MD_STATES)
343
    mrt_dump_bgp_state_change(conn, conn->state, new_state);
344

    
345
  conn->state = new_state;
346
}
347

    
348
void
349
bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
350
{
351
  /* Really, most of the work is done in bgp_rx_open(). */
352
  bgp_conn_set_state(conn, BS_OPENCONFIRM);
353
}
354

    
355
void
356
bgp_conn_enter_established_state(struct bgp_conn *conn)
357
{
358
  struct bgp_proto *p = conn->bgp;
359
 
360
  BGP_TRACE(D_EVENTS, "BGP session established");
361
  DBG("BGP: UP!!!\n");
362

    
363
  /* For multi-hop BGP sessions */
364
  if (ipa_zero(p->source_addr))
365
    p->source_addr = conn->sk->saddr; 
366

    
367
  p->conn = conn;
368
  p->last_error_class = 0;
369
  p->last_error_code = 0;
370
  bgp_attr_init(conn->bgp);
371
  bgp_conn_set_state(conn, BS_ESTABLISHED);
372
  proto_notify_state(&p->p, PS_UP);
373
}
374

    
375
static void
376
bgp_conn_leave_established_state(struct bgp_proto *p)
377
{
378
  BGP_TRACE(D_EVENTS, "BGP session closed");
379
  p->conn = NULL;
380

    
381
  if (p->p.proto_state == PS_UP)
382
    bgp_stop(p, 0);
383
}
384

    
385
void
386
bgp_conn_enter_close_state(struct bgp_conn *conn)
387
{
388
  struct bgp_proto *p = conn->bgp;
389
  int os = conn->state;
390

    
391
  bgp_conn_set_state(conn, BS_CLOSE);
392
  tm_stop(conn->keepalive_timer);
393
  conn->sk->rx_hook = NULL;
394

    
395
  /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
396
  bgp_start_timer(conn->hold_timer, 10);
397

    
398
  if (os == BS_ESTABLISHED)
399
    bgp_conn_leave_established_state(p);
400
}
401

    
402
void
403
bgp_conn_enter_idle_state(struct bgp_conn *conn)
404
{
405
  struct bgp_proto *p = conn->bgp;
406
  int os = conn->state;
407

    
408
  bgp_close_conn(conn);
409
  bgp_conn_set_state(conn, BS_IDLE);
410
  ev_schedule(p->event);
411

    
412
  if (os == BS_ESTABLISHED)
413
    bgp_conn_leave_established_state(p);
414
}
415

    
416
static void
417
bgp_send_open(struct bgp_conn *conn)
418
{
419
  conn->start_state = conn->bgp->start_state;
420
  conn->want_as4_support = conn->bgp->cf->enable_as4 && (conn->start_state != BSS_CONNECT_NOCAP);
421
  conn->peer_as4_support = 0;        // Default value, possibly changed by receiving capability.
422
  conn->advertised_as = 0;
423

    
424
  DBG("BGP: Sending open\n");
425
  conn->sk->rx_hook = bgp_rx;
426
  conn->sk->tx_hook = bgp_tx;
427
  tm_stop(conn->connect_retry_timer);
428
  bgp_schedule_packet(conn, PKT_OPEN);
429
  bgp_conn_set_state(conn, BS_OPENSENT);
430
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
431
}
432

    
433
static void
434
bgp_connected(sock *sk)
435
{
436
  struct bgp_conn *conn = sk->data;
437
  struct bgp_proto *p = conn->bgp;
438

    
439
  BGP_TRACE(D_EVENTS, "Connected");
440
  bgp_send_open(conn);
441
}
442

    
443
static void
444
bgp_connect_timeout(timer *t)
445
{
446
  struct bgp_conn *conn = t->data;
447
  struct bgp_proto *p = conn->bgp;
448

    
449
  DBG("BGP: connect_timeout\n");
450
  if (p->p.proto_state == PS_START)
451
    {
452
      bgp_close_conn(conn);
453
      bgp_connect(p);
454
    }
455
  else
456
    bgp_conn_enter_idle_state(conn);
457
}
458

    
459
static void
460
bgp_sock_err(sock *sk, int err)
461
{
462
  struct bgp_conn *conn = sk->data;
463
  struct bgp_proto *p = conn->bgp;
464

    
465
  /*
466
   * This error hook may be called either asynchronously from main
467
   * loop, or synchronously from sk_send().  But sk_send() is called
468
   * only from bgp_tx() and bgp_kick_tx(), which are both called
469
   * asynchronously from main loop. Moreover, they end if err hook is
470
   * called. Therefore, we could suppose that it is always called
471
   * asynchronously.
472
   */
473

    
474
  bgp_store_error(p, conn, BE_SOCKET, err);
475

    
476
  if (err)
477
    BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
478
  else
479
    BGP_TRACE(D_EVENTS, "Connection closed");
480

    
481
  bgp_conn_enter_idle_state(conn);
482
}
483

    
484
static void
485
bgp_hold_timeout(timer *t)
486
{
487
  struct bgp_conn *conn = t->data;
488
  struct bgp_proto *p = conn->bgp;
489

    
490
  DBG("BGP: Hold timeout\n");
491

    
492
  /* We are already closing the connection - just do hangup */
493
  if (conn->state == BS_CLOSE)
494
  {
495
    BGP_TRACE(D_EVENTS, "Connection stalled");
496
    bgp_conn_enter_idle_state(conn);
497
    return;
498
  }
499

    
500
  /* If there is something in input queue, we are probably congested
501
     and perhaps just not processed BGP packets in time. */
502

    
503
  if (sk_rx_ready(conn->sk) > 0)
504
    bgp_start_timer(conn->hold_timer, 10);
505
  else
506
    bgp_error(conn, 4, 0, NULL, 0);
507
}
508

    
509
static void
510
bgp_keepalive_timeout(timer *t)
511
{
512
  struct bgp_conn *conn = t->data;
513

    
514
  DBG("BGP: Keepalive timer\n");
515
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
516
}
517

    
518
static void
519
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
520
{
521
  timer *t;
522

    
523
  conn->sk = NULL;
524
  conn->bgp = p;
525
  conn->packets_to_send = 0;
526

    
527
  t = conn->connect_retry_timer = tm_new(p->p.pool);
528
  t->hook = bgp_connect_timeout;
529
  t->data = conn;
530
  t = conn->hold_timer = tm_new(p->p.pool);
531
  t->hook = bgp_hold_timeout;
532
  t->data = conn;
533
  t = conn->keepalive_timer = tm_new(p->p.pool);
534
  t->hook = bgp_keepalive_timeout;
535
  t->data = conn;
536
  conn->tx_ev = ev_new(p->p.pool);
537
  conn->tx_ev->hook = bgp_kick_tx;
538
  conn->tx_ev->data = conn;
539
}
540

    
541
static void
542
bgp_setup_sk(struct bgp_conn *conn, sock *s)
543
{
544
  s->data = conn;
545
  s->err_hook = bgp_sock_err;
546
  conn->sk = s;
547
}
548

    
549
static void
550
bgp_active(struct bgp_proto *p)
551
{
552
  int delay = MAX(1, p->cf->start_delay_time);
553
  struct bgp_conn *conn = &p->outgoing_conn;
554

    
555
  BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
556
  bgp_setup_conn(p, conn);
557
  bgp_conn_set_state(conn, BS_ACTIVE);
558
  bgp_start_timer(conn->connect_retry_timer, delay);
559
}
560

    
561
/**
562
 * bgp_connect - initiate an outgoing connection
563
 * @p: BGP instance
564
 *
565
 * The bgp_connect() function creates a new &bgp_conn and initiates
566
 * a TCP connection to the peer. The rest of connection setup is governed
567
 * by the BGP state machine as described in the standard.
568
 */
569
static void
570
bgp_connect(struct bgp_proto *p)        /* Enter Connect state and start establishing connection */
571
{
572
  sock *s;
573
  struct bgp_conn *conn = &p->outgoing_conn;
574
  int hops = p->cf->multihop ? : 1;
575

    
576
  DBG("BGP: Connecting\n");
577
  s = sk_new(p->p.pool);
578
  s->type = SK_TCP_ACTIVE;
579
  s->saddr = p->source_addr;
580
  s->daddr = p->cf->remote_ip;
581
  s->iface = p->neigh ? p->neigh->iface : NULL;
582
  s->dport = BGP_PORT;
583
  s->ttl = p->cf->ttl_security ? 255 : hops;
584
  s->rbsize = BGP_RX_BUFFER_SIZE;
585
  s->tbsize = BGP_TX_BUFFER_SIZE;
586
  s->tos = IP_PREC_INTERNET_CONTROL;
587
  s->password = p->cf->password;
588
  s->tx_hook = bgp_connected;
589
  BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
590
            s->saddr, ipa_has_link_scope(s->saddr) ? s->iface : NULL);
591
  bgp_setup_conn(p, conn);
592
  bgp_setup_sk(conn, s);
593
  bgp_conn_set_state(conn, BS_CONNECT);
594

    
595
  if (sk_open(s) < 0)
596
    {
597
      bgp_sock_err(s, 0);
598
      return;
599
    }
600

    
601
  /* Set minimal receive TTL if needed */
602
  if (p->cf->ttl_security)
603
  {
604
    DBG("Setting minimum received TTL to %d", 256 - hops);
605
    if (sk_set_min_ttl(s, 256 - hops) < 0)
606
    {
607
      log(L_ERR "TTL security configuration failed, closing session");
608
      bgp_sock_err(s, 0);
609
      return;
610
    }
611
  }
612

    
613
  DBG("BGP: Waiting for connect success\n");
614
  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
615
}
616

    
617
/**
618
 * bgp_incoming_connection - handle an incoming connection
619
 * @sk: TCP socket
620
 * @dummy: unused
621
 *
622
 * This function serves as a socket hook for accepting of new BGP
623
 * connections. It searches a BGP instance corresponding to the peer
624
 * which has connected and if such an instance exists, it creates a
625
 * &bgp_conn structure, attaches it to the instance and either sends
626
 * an Open message or (if there already is an active connection) it
627
 * closes the new connection by sending a Notification message.
628
 */
629
static int
630
bgp_incoming_connection(sock *sk, int dummy UNUSED)
631
{
632
  struct proto_config *pc;
633

    
634
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
635
  WALK_LIST(pc, config->protos)
636
    if (pc->protocol == &proto_bgp && pc->proto)
637
      {
638
        struct bgp_proto *p = (struct bgp_proto *) pc->proto;
639
        if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
640
            (!ipa_has_link_scope(sk->daddr) || (p->cf->iface == sk->iface)))
641
          {
642
            /* We are in proper state and there is no other incoming connection */
643
            int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
644
              (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
645

    
646
            BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
647
                      sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL,
648
                      sk->dport, acc ? "accepted" : "rejected");
649

    
650
            if (!acc)
651
              goto err;
652

    
653
            int hops = p->cf->multihop ? : 1;
654
            if (p->cf->ttl_security)
655
            {
656
              /* TTL security support */
657
              if ((sk_set_ttl(sk, 255) < 0) ||
658
                  (sk_set_min_ttl(sk, 256 - hops) < 0))
659
              {
660
                log(L_ERR "TTL security configuration failed, closing session");
661
                goto err;
662
              }
663
            }
664
            else
665
              sk_set_ttl(sk, hops);
666

    
667
            bgp_setup_conn(p, &p->incoming_conn);
668
            bgp_setup_sk(&p->incoming_conn, sk);
669
            bgp_send_open(&p->incoming_conn);
670
            return 0;
671
          }
672
      }
673

    
674
  log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
675
      sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL, sk->dport);
676
 err:
677
  rfree(sk);
678
  return 0;
679
}
680

    
681
static void
682
bgp_listen_sock_err(sock *sk UNUSED, int err)
683
{
684
  if (err == ECONNABORTED)
685
    log(L_WARN "BGP: Incoming connection aborted");
686
  else
687
    log(L_ERR "BGP: Error on listening socket: %M", err);
688
}
689

    
690
static sock *
691
bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
692
{
693
  sock *s = sk_new(&root_pool);
694
  DBG("BGP: Creating listening socket\n");
695
  s->type = SK_TCP_PASSIVE;
696
  s->ttl = 255;
697
  s->saddr = addr;
698
  s->sport = port ? port : BGP_PORT;
699
  s->flags = flags ? 0 : SKF_V6ONLY;
700
  s->tos = IP_PREC_INTERNET_CONTROL;
701
  s->rbsize = BGP_RX_BUFFER_SIZE;
702
  s->tbsize = BGP_TX_BUFFER_SIZE;
703
  s->rx_hook = bgp_incoming_connection;
704
  s->err_hook = bgp_listen_sock_err;
705

    
706
  if (sk_open(s) < 0)
707
    {
708
      log(L_ERR "BGP: Unable to open listening socket");
709
      rfree(s);
710
      return NULL;
711
    }
712

    
713
  return s;
714
}
715

    
716
static void
717
bgp_start_neighbor(struct bgp_proto *p)
718
{
719
  /* Called only for single-hop BGP sessions */
720

    
721
  if (ipa_zero(p->source_addr))
722
    p->source_addr = p->neigh->ifa->ip; 
723

    
724
#ifdef IPV6
725
  {
726
    struct ifa *a;
727
    p->local_link = IPA_NONE;
728
    WALK_LIST(a, p->neigh->iface->addrs)
729
      if (a->scope == SCOPE_LINK)
730
        {
731
          p->local_link = a->ip;
732
          break;
733
        }
734

    
735
    if (! ipa_nonzero(p->local_link))
736
      log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
737

    
738
    DBG("BGP: Selected link-level address %I\n", p->local_link);
739
  }
740
#endif
741

    
742
  bgp_initiate(p);
743
}
744

    
745
static void
746
bgp_neigh_notify(neighbor *n)
747
{
748
  struct bgp_proto *p = (struct bgp_proto *) n->proto;
749

    
750
  if (! (n->flags & NEF_STICKY))
751
    return;
752

    
753
  if (n->scope > 0)
754
    {
755
      if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE))
756
        {
757
          BGP_TRACE(D_EVENTS, "Neighbor found");
758
          bgp_start_neighbor(p);
759
        }
760
    }
761
  else
762
    {
763
      if ((p->p.proto_state == PS_START) || (p->p.proto_state == PS_UP))
764
        {
765
          BGP_TRACE(D_EVENTS, "Neighbor lost");
766
          bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
767
          bgp_stop(p, 0);
768
        }
769
    }
770
}
771

    
772
static void
773
bgp_bfd_notify(struct bfd_request *req)
774
{
775
  struct bgp_proto *p = req->data;
776
  int ps = p->p.proto_state;
777

    
778
  if (req->down && ((ps == PS_START) || (ps == PS_UP)))
779
    {
780
      BGP_TRACE(D_EVENTS, "BFD session down");
781
      bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
782
      if (ps == PS_UP)
783
        bgp_update_startup_delay(p);
784
      bgp_stop(p, 0);
785
    }
786
}
787

    
788
static void
789
bgp_update_bfd(struct bgp_proto *p, int use_bfd)
790
{
791
  if (use_bfd && !p->bfd_req)
792
    p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
793
                                     p->cf->multihop ? NULL : p->neigh->iface,
794
                                     bgp_bfd_notify, p);
795

    
796
  if (!use_bfd && p->bfd_req)
797
    {
798
      rfree(p->bfd_req);
799
      p->bfd_req = NULL;
800
    }
801
}
802

    
803
static int
804
bgp_reload_routes(struct proto *P)
805
{
806
  struct bgp_proto *p = (struct bgp_proto *) P;
807
  if (!p->conn || !p->conn->peer_refresh_support)
808
    return 0;
809

    
810
  bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
811
  return 1;
812
}
813

    
814
static void
815
bgp_start_locked(struct object_lock *lock)
816
{
817
  struct bgp_proto *p = lock->data;
818
  struct bgp_config *cf = p->cf;
819

    
820
  if (p->p.proto_state != PS_START)
821
    {
822
      DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
823
      return;
824
    }
825

    
826
  DBG("BGP: Got lock\n");
827

    
828
  if (cf->multihop)
829
    {
830
      /* Multi-hop sessions do not use neighbor entries */
831
      bgp_initiate(p);
832
      return;
833
    }
834

    
835
  p->neigh = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
836
  if (!p->neigh || (p->neigh->scope == SCOPE_HOST))
837
    {
838
      log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
839
      /* As we do not start yet, we can just disable protocol */
840
      p->p.disabled = 1;
841
      bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
842
      proto_notify_state(&p->p, PS_DOWN);
843
      return;
844
    }
845
  
846
  if (p->neigh->scope > 0)
847
    bgp_start_neighbor(p);
848
  else
849
    BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
850
}
851

    
852
static int
853
bgp_start(struct proto *P)
854
{
855
  struct bgp_proto *p = (struct bgp_proto *) P;
856
  struct object_lock *lock;
857

    
858
  DBG("BGP: Startup.\n");
859
  p->start_state = BSS_PREPARE;
860
  p->outgoing_conn.state = BS_IDLE;
861
  p->incoming_conn.state = BS_IDLE;
862
  p->neigh = NULL;
863
  p->bfd_req = NULL;
864

    
865
  rt_lock_table(p->igp_table);
866

    
867
  p->event = ev_new(p->p.pool);
868
  p->event->hook = bgp_decision;
869
  p->event->data = p;
870

    
871
  p->startup_timer = tm_new(p->p.pool);
872
  p->startup_timer->hook = bgp_startup_timeout;
873
  p->startup_timer->data = p;
874

    
875
  p->local_id = proto_get_router_id(P->cf);
876
  if (p->rr_client)
877
    p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
878

    
879
  p->remote_id = 0;
880
  p->source_addr = p->cf->source_addr;
881

    
882
  /*
883
   *  Before attempting to create the connection, we need to lock the
884
   *  port, so that are sure we're the only instance attempting to talk
885
   *  with that neighbor.
886
   */
887

    
888
  lock = p->lock = olock_new(P->pool);
889
  lock->addr = p->cf->remote_ip;
890
  lock->iface = p->cf->iface;
891
  lock->type = OBJLOCK_TCP;
892
  lock->port = BGP_PORT;
893
  lock->hook = bgp_start_locked;
894
  lock->data = p;
895
  olock_acquire(lock);
896

    
897
  return PS_START;
898
}
899

    
900
extern int proto_restart;
901

    
902
static int
903
bgp_shutdown(struct proto *P)
904
{
905
  struct bgp_proto *p = (struct bgp_proto *) P;
906
  unsigned subcode = 0;
907

    
908
  BGP_TRACE(D_EVENTS, "Shutdown requested");
909

    
910
  switch (P->down_code)
911
    {
912
    case PDC_CF_REMOVE:
913
    case PDC_CF_DISABLE:
914
      subcode = 3; // Errcode 6, 3 - peer de-configured
915
      break;
916

    
917
    case PDC_CF_RESTART:
918
      subcode = 6; // Errcode 6, 6 - other configuration change
919
      break;
920

    
921
    case PDC_CMD_DISABLE:
922
    case PDC_CMD_SHUTDOWN:
923
      subcode = 2; // Errcode 6, 2 - administrative shutdown
924
      break;
925

    
926
    case PDC_CMD_RESTART:
927
      subcode = 4; // Errcode 6, 4 - administrative reset
928
      break;
929

    
930
    case PDC_RX_LIMIT_HIT:
931
    case PDC_IN_LIMIT_HIT:
932
      subcode = 1; // Errcode 6, 1 - max number of prefixes reached
933
      /* log message for compatibility */
934
      log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
935
      goto limit;
936

    
937
    case PDC_OUT_LIMIT_HIT:
938
      subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
939

    
940
    limit:
941
      bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
942
      if (proto_restart)
943
        bgp_update_startup_delay(p);
944
      else
945
        p->startup_delay = 0;
946
      goto done;
947
    }
948

    
949
  bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
950
  p->startup_delay = 0;
951

    
952
 done:
953
  bgp_stop(p, subcode);
954
  return p->p.proto_state;
955
}
956

    
957
static void
958
bgp_cleanup(struct proto *P)
959
{
960
  struct bgp_proto *p = (struct bgp_proto *) P;
961
  rt_unlock_table(p->igp_table);
962
}
963

    
964
static rtable *
965
get_igp_table(struct bgp_config *cf)
966
{
967
  return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
968
}
969

    
970
static struct proto *
971
bgp_init(struct proto_config *C)
972
{
973
  struct bgp_config *c = (struct bgp_config *) C;
974
  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
975
  struct bgp_proto *p = (struct bgp_proto *) P;
976

    
977
  P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
978
  P->rt_notify = bgp_rt_notify;
979
  P->rte_better = bgp_rte_better;
980
  P->import_control = bgp_import_control;
981
  P->neigh_notify = bgp_neigh_notify;
982
  P->reload_routes = bgp_reload_routes;
983

    
984
  if (c->deterministic_med)
985
    P->rte_recalculate = bgp_rte_recalculate;
986

    
987
  p->cf = c;
988
  p->local_as = c->local_as;
989
  p->remote_as = c->remote_as;
990
  p->is_internal = (c->local_as == c->remote_as);
991
  p->rs_client = c->rs_client;
992
  p->rr_client = c->rr_client;
993
  p->igp_table = get_igp_table(c);
994

    
995
  return P;
996
}
997

    
998

    
999
void
1000
bgp_check_config(struct bgp_config *c)
1001
{
1002
  int internal = (c->local_as == c->remote_as);
1003

    
1004
  /* Do not check templates at all */
1005
  if (c->c.class == SYM_TEMPLATE)
1006
    return;
1007

    
1008

    
1009
  /* EBGP direct by default, IBGP multihop by default */
1010
  if (c->multihop < 0)
1011
    c->multihop = internal ? 64 : 0;
1012

    
1013
  /* Different default for gw_mode */
1014
  if (!c->gw_mode)
1015
    c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
1016

    
1017
  /* Different default based on rs_client */
1018
  if (!c->missing_lladdr)
1019
    c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
1020

    
1021
  /* Disable after error incompatible with restart limit action */
1022
  if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
1023
    c->c.in_limit->action = PLA_DISABLE;
1024

    
1025

    
1026
  if (!c->local_as)
1027
    cf_error("Local AS number must be set");
1028

    
1029
  if (!c->remote_as)
1030
    cf_error("Neighbor must be configured");
1031

    
1032
  if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
1033
    cf_error("Neighbor AS number out of range (AS4 not available)");
1034

    
1035
  if (!internal && c->rr_client)
1036
    cf_error("Only internal neighbor can be RR client");
1037

    
1038
  if (internal && c->rs_client)
1039
    cf_error("Only external neighbor can be RS client");
1040

    
1041
  if (c->multihop && (c->gw_mode == GW_DIRECT))
1042
    cf_error("Multihop BGP cannot use direct gateway mode");
1043

    
1044
  if (c->multihop && (ipa_has_link_scope(c->remote_ip) || 
1045
                      ipa_has_link_scope(c->source_addr)))
1046
    cf_error("Multihop BGP cannot be used with link-local addresses");
1047

    
1048
  if (c->multihop && c->bfd && ipa_zero(c->source_addr))
1049
    cf_error("Multihop BGP with BFD requires specified source address");
1050

    
1051
  if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
1052
    cf_error("BGP in recursive mode prohibits sorted table");
1053

    
1054
  if (c->deterministic_med && c->c.table->sorted)
1055
    cf_error("BGP with deterministic MED prohibits sorted table");
1056

    
1057
  if (c->secondary && !c->c.table->sorted)
1058
    cf_error("BGP with secondary option requires sorted table");
1059
}
1060

    
1061
static int
1062
bgp_reconfigure(struct proto *P, struct proto_config *C)
1063
{
1064
  struct bgp_config *new = (struct bgp_config *) C;
1065
  struct bgp_proto *p = (struct bgp_proto *) P;
1066
  struct bgp_config *old = p->cf;
1067

    
1068
  if (proto_get_router_id(C) != p->local_id)
1069
    return 0;
1070

    
1071
  int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1072
                     ((byte *) new) + sizeof(struct proto_config),
1073
                     // password item is last and must be checked separately
1074
                     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1075
    && ((!old->password && !new->password)
1076
        || (old->password && new->password && !strcmp(old->password, new->password)))
1077
    && (get_igp_table(old) == get_igp_table(new));
1078

    
1079
  if (same && (p->start_state > BSS_PREPARE))
1080
    bgp_update_bfd(p, new->bfd);
1081

    
1082
  /* We should update our copy of configuration ptr as old configuration will be freed */
1083
  if (same)
1084
    p->cf = new;
1085

    
1086
  return same;
1087
}
1088

    
1089
static void
1090
bgp_copy_config(struct proto_config *dest, struct proto_config *src)
1091
{
1092
  /* Just a shallow copy */
1093
  proto_copy_rest(dest, src, sizeof(struct bgp_config));
1094
}
1095

    
1096

    
1097
/**
1098
 * bgp_error - report a protocol error
1099
 * @c: connection
1100
 * @code: error code (according to the RFC)
1101
 * @subcode: error sub-code
1102
 * @data: data to be passed in the Notification message
1103
 * @len: length of the data
1104
 *
1105
 * bgp_error() sends a notification packet to tell the other side that a protocol
1106
 * error has occurred (including the data considered erroneous if possible) and
1107
 * closes the connection.
1108
 */
1109
void
1110
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
1111
{
1112
  struct bgp_proto *p = c->bgp;
1113

    
1114
  if (c->state == BS_CLOSE)
1115
    return;
1116

    
1117
  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
1118
  bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1119
  bgp_conn_enter_close_state(c);
1120

    
1121
  c->notify_code = code;
1122
  c->notify_subcode = subcode;
1123
  c->notify_data = data;
1124
  c->notify_size = (len > 0) ? len : 0;
1125
  bgp_schedule_packet(c, PKT_NOTIFICATION);
1126

    
1127
  if (code != 6)
1128
    {
1129
      bgp_update_startup_delay(p);
1130
      bgp_stop(p, 0);
1131
    }
1132
}
1133

    
1134
/**
1135
 * bgp_store_error - store last error for status report
1136
 * @p: BGP instance
1137
 * @c: connection
1138
 * @class: error class (BE_xxx constants)
1139
 * @code: error code (class specific)
1140
 *
1141
 * bgp_store_error() decides whether given error is interesting enough
1142
 * and store that error to last_error variables of @p
1143
 */
1144
void
1145
bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1146
{
1147
  /* During PS_UP, we ignore errors on secondary connection */
1148
  if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1149
    return;
1150

    
1151
  /* During PS_STOP, we ignore any errors, as we want to report
1152
   * the error that caused transition to PS_STOP
1153
   */
1154
  if (p->p.proto_state == PS_STOP)
1155
    return;
1156

    
1157
  p->last_error_class = class;
1158
  p->last_error_code = code;
1159
}
1160

    
1161
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1162
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1163
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" };
1164
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1165

    
1166
static const char *
1167
bgp_last_errmsg(struct bgp_proto *p)
1168
{
1169
  switch (p->last_error_class)
1170
    {
1171
    case BE_MISC:
1172
      return bgp_misc_errors[p->last_error_code];
1173
    case BE_SOCKET:
1174
      return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1175
    case BE_BGP_RX:
1176
    case BE_BGP_TX:
1177
      return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1178
    case BE_AUTO_DOWN:
1179
      return bgp_auto_errors[p->last_error_code];
1180
    default:
1181
      return "";
1182
    }
1183
}
1184

    
1185
static const char *
1186
bgp_state_dsc(struct bgp_proto *p)
1187
{
1188
  if (p->p.proto_state == PS_DOWN)
1189
    return "Down";
1190

    
1191
  int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1192
  if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1193
    return "Passive";
1194

    
1195
  return bgp_state_names[state];
1196
}
1197

    
1198
static void
1199
bgp_get_status(struct proto *P, byte *buf)
1200
{
1201
  struct bgp_proto *p = (struct bgp_proto *) P;
1202

    
1203
  const char *err1 = bgp_err_classes[p->last_error_class];
1204
  const char *err2 = bgp_last_errmsg(p);
1205

    
1206
  if (P->proto_state == PS_DOWN)
1207
    bsprintf(buf, "%s%s", err1, err2);
1208
  else
1209
    bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1210
}
1211

    
1212
static void
1213
bgp_show_proto_info(struct proto *P)
1214
{
1215
  struct bgp_proto *p = (struct bgp_proto *) P;
1216
  struct bgp_conn *c = p->conn;
1217

    
1218
  proto_show_basic_info(P);
1219

    
1220
  cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
1221
  cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
1222
  cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
1223

    
1224
  if (P->proto_state == PS_START)
1225
    {
1226
      struct bgp_conn *oc = &p->outgoing_conn;
1227

    
1228
      if ((p->start_state < BSS_CONNECT) &&
1229
          (p->startup_timer->expires))
1230
        cli_msg(-1006, "    Error wait:       %d/%d", 
1231
                p->startup_timer->expires - now, p->startup_delay);
1232

    
1233
      if ((oc->state == BS_ACTIVE) &&
1234
          (oc->connect_retry_timer->expires))
1235
        cli_msg(-1006, "    Start delay:      %d/%d", 
1236
                oc->connect_retry_timer->expires - now, p->cf->start_delay_time);
1237
    }
1238
  else if (P->proto_state == PS_UP)
1239
    {
1240
      cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
1241
      cli_msg(-1006, "    Neighbor caps:   %s%s",
1242
              c->peer_refresh_support ? " refresh" : "",
1243
              c->peer_as4_support ? " AS4" : "");
1244
      cli_msg(-1006, "    Session:          %s%s%s%s%s",
1245
              p->is_internal ? "internal" : "external",
1246
              p->cf->multihop ? " multihop" : "",
1247
              p->rr_client ? " route-reflector" : "",
1248
              p->rs_client ? " route-server" : "",
1249
              p->as4_session ? " AS4" : "");
1250
      cli_msg(-1006, "    Source address:   %I", p->source_addr);
1251
      if (P->cf->in_limit)
1252
        cli_msg(-1006, "    Route limit:      %d/%d",
1253
                p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
1254
      cli_msg(-1006, "    Hold timer:       %d/%d",
1255
              tm_remains(c->hold_timer), c->hold_time);
1256
      cli_msg(-1006, "    Keepalive timer:  %d/%d",
1257
              tm_remains(c->keepalive_timer), c->keepalive_time);
1258
    }
1259

    
1260
  if ((p->last_error_class != BE_NONE) && 
1261
      (p->last_error_class != BE_MAN_DOWN))
1262
    {
1263
      const char *err1 = bgp_err_classes[p->last_error_class];
1264
      const char *err2 = bgp_last_errmsg(p);
1265
      cli_msg(-1006, "    Last error:       %s%s", err1, err2);
1266
    }
1267
}
1268

    
1269
struct protocol proto_bgp = {
1270
  name:                        "BGP",
1271
  template:                "bgp%d",
1272
  attr_class:                EAP_BGP,
1273
  preference:                DEF_PREF_BGP,
1274
  init:                        bgp_init,
1275
  start:                bgp_start,
1276
  shutdown:                bgp_shutdown,
1277
  cleanup:                bgp_cleanup,
1278
  reconfigure:                bgp_reconfigure,
1279
  copy_config:                bgp_copy_config,
1280
  get_status:                bgp_get_status,
1281
  get_attr:                bgp_get_attr,
1282
  get_route_info:        bgp_get_route_info,
1283
  show_proto_info:        bgp_show_proto_info
1284
};