Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / bgp.c @ 6cf72d7a

History | View | Annotate | Download (40.1 KB)

1
/*
2
 *        BIRD -- The Border Gateway Protocol
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Border Gateway Protocol
11
 *
12
 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13
 * connection and most of the interface with BIRD core, |packets.c| handling
14
 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15
 * manipulation with BGP attribute lists.
16
 *
17
 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18
 * architecture which is able to keep all the information needed by BGP in the
19
 * primary routing table, therefore no complex data structures like a central
20
 * BGP table are needed. This increases memory footprint of a BGP router with
21
 * many connections, but not too much and, which is more important, it makes
22
 * BGP much easier to implement.
23
 *
24
 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25
 * structure to which are attached individual connections represented by &bgp_connection
26
 * (usually, there exists only one connection, but during BGP session setup, there
27
 * can be more of them). The connections are handled according to the BGP state machine
28
 * defined in the RFC with all the timers and all the parameters configurable.
29
 *
30
 * In incoming direction, we listen on the connection's socket and each time we receive
31
 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32
 * passes complete packets to bgp_rx_packet() which distributes the packet according
33
 * to its type.
34
 *
35
 * In outgoing direction, we gather all the routing updates and sort them to buckets
36
 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37
 * of &rta's and a &fib which helps us to find if we already have another route for
38
 * the same destination queued for sending, so that we can replace it with the new one
39
 * immediately instead of sending both updates). There also exists a special bucket holding
40
 * all the route withdrawals which cannot be queued anywhere else as they don't have any
41
 * attributes. If we have any packet to send (due to either new routes or the connection
42
 * tracking code wanting to send a Open, Keepalive or Notification message), we call
43
 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44
 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45
 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46
 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47
 * type if we have more data of the same type to send.
48
 *
49
 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50
 * of the attribute blocks and translating them to the language of BIRD's extended attributes
51
 * and bgp_encode_attrs() which does the converse. Both functions are built around a
52
 * @bgp_attr_table array describing all important characteristics of all known attributes.
53
 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54
 *
55
 * BGP protocol implements graceful restart in both restarting (local restart)
56
 * and receiving (neighbor restart) roles. The first is handled mostly by the
57
 * graceful restart code in the nest, BGP protocol just handles capabilities,
58
 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
59
 * The second is implemented by internal restart of the BGP state to %BS_IDLE
60
 * and protocol state to %PS_START, but keeping the protocol up from the core
61
 * point of view and therefore maintaining received routes. Routing table
62
 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
63
 * stale routes after reestablishment of BGP session during graceful restart.
64
 */
65

    
66
#undef LOCAL_DEBUG
67

    
68
#include "nest/bird.h"
69
#include "nest/iface.h"
70
#include "nest/protocol.h"
71
#include "nest/route.h"
72
#include "nest/cli.h"
73
#include "nest/locks.h"
74
#include "conf/conf.h"
75
#include "lib/socket.h"
76
#include "lib/resource.h"
77
#include "lib/string.h"
78

    
79
#include "bgp.h"
80

    
81

    
82
struct linpool *bgp_linpool;                /* Global temporary pool */
83
static sock *bgp_listen_sk;                /* Global listening socket */
84
static int bgp_counter;                        /* Number of protocol instances using the listening socket */
85

    
86
static void bgp_close(struct bgp_proto *p, int apply_md5);
87
static void bgp_connect(struct bgp_proto *p);
88
static void bgp_active(struct bgp_proto *p);
89
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
90
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
91

    
92

    
93
/**
94
 * bgp_open - open a BGP instance
95
 * @p: BGP instance
96
 *
97
 * This function allocates and configures shared BGP resources.
98
 * Should be called as the last step during initialization
99
 * (when lock is acquired and neighbor is ready).
100
 * When error, state changed to PS_DOWN, -1 is returned and caller
101
 * should return immediately.
102
 */
103
static int
104
bgp_open(struct bgp_proto *p)
105
{
106
  struct config *cfg = p->cf->c.global;
107
  int errcode;
108

    
109
  if (!bgp_listen_sk)
110
    bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
111

    
112
  if (!bgp_listen_sk)
113
    {
114
      errcode = BEM_NO_SOCKET;
115
      goto err;
116
    }
117

    
118
  if (!bgp_linpool)
119
    bgp_linpool = lp_new(&root_pool, 4080);
120

    
121
  bgp_counter++;
122

    
123
  if (p->cf->password)
124
    if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, p->cf->password) < 0)
125
      {
126
        sk_log_error(bgp_listen_sk, p->p.name);
127
        bgp_close(p, 0);
128
        errcode = BEM_INVALID_MD5;
129
        goto err;
130
      }
131

    
132
  return 0;
133

    
134
err:
135
  p->p.disabled = 1;
136
  bgp_store_error(p, NULL, BE_MISC, errcode);
137
  proto_notify_state(&p->p, PS_DOWN);
138
  return -1;
139
}
140

    
141
static void
142
bgp_startup(struct bgp_proto *p)
143
{
144
  BGP_TRACE(D_EVENTS, "Started");
145
  p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
146

    
147
  if (!p->cf->passive)
148
    bgp_active(p);
149
}
150

    
151
static void
152
bgp_startup_timeout(timer *t)
153
{
154
  bgp_startup(t->data);
155
}
156

    
157

    
158
static void
159
bgp_initiate(struct bgp_proto *p)
160
{
161
  int rv = bgp_open(p);
162
  if (rv < 0)
163
    return;
164

    
165
  if (p->cf->bfd)
166
    bgp_update_bfd(p, p->cf->bfd);
167

    
168
  if (p->startup_delay)
169
    {
170
      p->start_state = BSS_DELAY;
171
      BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
172
      bgp_start_timer(p->startup_timer, p->startup_delay);
173
    }
174
  else
175
    bgp_startup(p);
176
}
177

    
178
/**
179
 * bgp_close - close a BGP instance
180
 * @p: BGP instance
181
 * @apply_md5: 0 to disable unsetting MD5 auth
182
 *
183
 * This function frees and deconfigures shared BGP resources.
184
 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
185
 * from failed bgp_open().
186
 */
187
static void
188
bgp_close(struct bgp_proto *p, int apply_md5)
189
{
190
  ASSERT(bgp_counter);
191
  bgp_counter--;
192

    
193
  if (p->cf->password && apply_md5)
194
    if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, NULL) < 0)
195
      sk_log_error(bgp_listen_sk, p->p.name);
196

    
197
  if (!bgp_counter)
198
    {
199
      rfree(bgp_listen_sk);
200
      bgp_listen_sk = NULL;
201
      rfree(bgp_linpool);
202
      bgp_linpool = NULL;
203
    }
204
}
205

    
206
/**
207
 * bgp_start_timer - start a BGP timer
208
 * @t: timer
209
 * @value: time to fire (0 to disable the timer)
210
 *
211
 * This functions calls tm_start() on @t with time @value and the
212
 * amount of randomization suggested by the BGP standard. Please use
213
 * it for all BGP timers.
214
 */
215
void
216
bgp_start_timer(timer *t, int value)
217
{
218
  if (value)
219
    {
220
      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
221
      t->randomize = value / 4;
222
      tm_start(t, value - t->randomize);
223
    }
224
  else
225
    tm_stop(t);
226
}
227

    
228
/**
229
 * bgp_close_conn - close a BGP connection
230
 * @conn: connection to close
231
 *
232
 * This function takes a connection described by the &bgp_conn structure,
233
 * closes its socket and frees all resources associated with it.
234
 */
235
void
236
bgp_close_conn(struct bgp_conn *conn)
237
{
238
  // struct bgp_proto *p = conn->bgp;
239

    
240
  DBG("BGP: Closing connection\n");
241
  conn->packets_to_send = 0;
242
  rfree(conn->connect_retry_timer);
243
  conn->connect_retry_timer = NULL;
244
  rfree(conn->keepalive_timer);
245
  conn->keepalive_timer = NULL;
246
  rfree(conn->hold_timer);
247
  conn->hold_timer = NULL;
248
  rfree(conn->sk);
249
  conn->sk = NULL;
250
  rfree(conn->tx_ev);
251
  conn->tx_ev = NULL;
252
}
253

    
254

    
255
/**
256
 * bgp_update_startup_delay - update a startup delay
257
 * @p: BGP instance
258
 *
259
 * This function updates a startup delay that is used to postpone next BGP connect.
260
 * It also handles disable_after_error and might stop BGP instance when error
261
 * happened and disable_after_error is on.
262
 *
263
 * It should be called when BGP protocol error happened.
264
 */
265
void
266
bgp_update_startup_delay(struct bgp_proto *p)
267
{
268
  struct bgp_config *cf = p->cf;
269

    
270
  DBG("BGP: Updating startup delay\n");
271

    
272
  if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
273
    p->startup_delay = 0;
274

    
275
  p->last_proto_error = now;
276

    
277
  if (cf->disable_after_error)
278
    {
279
      p->startup_delay = 0;
280
      p->p.disabled = 1;
281
      return;
282
    }
283

    
284
  if (!p->startup_delay)
285
    p->startup_delay = cf->error_delay_time_min;
286
  else
287
    p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
288
}
289

    
290
static void
291
bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
292
{
293
  switch (conn->state)
294
    {
295
    case BS_IDLE:
296
    case BS_CLOSE:
297
      return;
298
    case BS_CONNECT:
299
    case BS_ACTIVE:
300
      bgp_conn_enter_idle_state(conn);
301
      return;
302
    case BS_OPENSENT:
303
    case BS_OPENCONFIRM:
304
    case BS_ESTABLISHED:
305
      bgp_error(conn, 6, subcode, NULL, 0);
306
      return;
307
    default:
308
      bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
309
    }
310
}
311

    
312
static void
313
bgp_down(struct bgp_proto *p)
314
{
315
  if (p->start_state > BSS_PREPARE)
316
    bgp_close(p, 1);
317

    
318
  BGP_TRACE(D_EVENTS, "Down");
319
  proto_notify_state(&p->p, PS_DOWN);
320
}
321

    
322
static void
323
bgp_decision(void *vp)
324
{
325
  struct bgp_proto *p = vp;
326

    
327
  DBG("BGP: Decision start\n");
328
  if ((p->p.proto_state == PS_START)
329
      && (p->outgoing_conn.state == BS_IDLE)
330
      && (p->incoming_conn.state != BS_OPENCONFIRM)
331
      && (!p->cf->passive))
332
    bgp_active(p);
333

    
334
  if ((p->p.proto_state == PS_STOP)
335
      && (p->outgoing_conn.state == BS_IDLE)
336
      && (p->incoming_conn.state == BS_IDLE))
337
    bgp_down(p);
338
}
339

    
340
void
341
bgp_stop(struct bgp_proto *p, unsigned subcode)
342
{
343
  proto_notify_state(&p->p, PS_STOP);
344
  bgp_graceful_close_conn(&p->outgoing_conn, subcode);
345
  bgp_graceful_close_conn(&p->incoming_conn, subcode);
346
  ev_schedule(p->event);
347
}
348

    
349
static inline void
350
bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
351
{
352
  if (conn->bgp->p.mrtdump & MD_STATES)
353
    mrt_dump_bgp_state_change(conn, conn->state, new_state);
354

    
355
  conn->state = new_state;
356
}
357

    
358
void
359
bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
360
{
361
  /* Really, most of the work is done in bgp_rx_open(). */
362
  bgp_conn_set_state(conn, BS_OPENCONFIRM);
363
}
364

    
365
void
366
bgp_conn_enter_established_state(struct bgp_conn *conn)
367
{
368
  struct bgp_proto *p = conn->bgp;
369

    
370
  BGP_TRACE(D_EVENTS, "BGP session established");
371
  DBG("BGP: UP!!!\n");
372

    
373
  /* For multi-hop BGP sessions */
374
  if (ipa_zero(p->source_addr))
375
    p->source_addr = conn->sk->saddr;
376

    
377
  p->conn = conn;
378
  p->last_error_class = 0;
379
  p->last_error_code = 0;
380
  bgp_init_bucket_table(p);
381
  bgp_init_prefix_table(p, 8);
382

    
383
  int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
384

    
385
  if (p->p.gr_recovery && !peer_gr_ready)
386
    proto_graceful_restart_unlock(&p->p);
387

    
388
  if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
389
    p->p.gr_wait = 1;
390

    
391
  if (p->gr_active)
392
    tm_stop(p->gr_timer);
393

    
394
  if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
395
    bgp_graceful_restart_done(p);
396

    
397
  bgp_conn_set_state(conn, BS_ESTABLISHED);
398
  proto_notify_state(&p->p, PS_UP);
399
}
400

    
401
static void
402
bgp_conn_leave_established_state(struct bgp_proto *p)
403
{
404
  BGP_TRACE(D_EVENTS, "BGP session closed");
405
  p->conn = NULL;
406

    
407
  if (p->p.proto_state == PS_UP)
408
    bgp_stop(p, 0);
409
}
410

    
411
void
412
bgp_conn_enter_close_state(struct bgp_conn *conn)
413
{
414
  struct bgp_proto *p = conn->bgp;
415
  int os = conn->state;
416

    
417
  bgp_conn_set_state(conn, BS_CLOSE);
418
  tm_stop(conn->keepalive_timer);
419
  conn->sk->rx_hook = NULL;
420

    
421
  /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
422
  bgp_start_timer(conn->hold_timer, 10);
423

    
424
  if (os == BS_ESTABLISHED)
425
    bgp_conn_leave_established_state(p);
426
}
427

    
428
void
429
bgp_conn_enter_idle_state(struct bgp_conn *conn)
430
{
431
  struct bgp_proto *p = conn->bgp;
432
  int os = conn->state;
433

    
434
  bgp_close_conn(conn);
435
  bgp_conn_set_state(conn, BS_IDLE);
436
  ev_schedule(p->event);
437

    
438
  if (os == BS_ESTABLISHED)
439
    bgp_conn_leave_established_state(p);
440
}
441

    
442
/**
443
 * bgp_handle_graceful_restart - handle detected BGP graceful restart
444
 * @p: BGP instance
445
 *
446
 * This function is called when a BGP graceful restart of the neighbor is
447
 * detected (when the TCP connection fails or when a new TCP connection
448
 * appears). The function activates processing of the restart - starts routing
449
 * table refresh cycle and activates BGP restart timer. The protocol state goes
450
 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
451
 * caller.
452
 */
453
void
454
bgp_handle_graceful_restart(struct bgp_proto *p)
455
{
456
  ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
457

    
458
  BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
459
            p->gr_active ? " - already pending" : "");
460
  proto_notify_state(&p->p, PS_START);
461

    
462
  if (p->gr_active)
463
    rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
464

    
465
  p->gr_active = 1;
466
  bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
467
  rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
468
}
469

    
470
/**
471
 * bgp_graceful_restart_done - finish active BGP graceful restart
472
 * @p: BGP instance
473
 *
474
 * This function is called when the active BGP graceful restart of the neighbor
475
 * should be finished - either successfully (the neighbor sends all paths and
476
 * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
477
 * not support BGP graceful restart on the new session). The function ends
478
 * routing table refresh cycle and stops BGP restart timer.
479
 */
480
void
481
bgp_graceful_restart_done(struct bgp_proto *p)
482
{
483
  BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
484
  p->gr_active = 0;
485
  tm_stop(p->gr_timer);
486
  rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
487
}
488

    
489
/**
490
 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
491
 * @t: timer
492
 *
493
 * This function is a timeout hook for @gr_timer, implementing BGP restart time
494
 * limit for reestablisment of the BGP session after the graceful restart. When
495
 * fired, we just proceed with the usual protocol restart.
496
 */
497

    
498
static void
499
bgp_graceful_restart_timeout(timer *t)
500
{
501
  struct bgp_proto *p = t->data;
502

    
503
  BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
504
  bgp_stop(p, 0);
505
}
506

    
507
static void
508
bgp_send_open(struct bgp_conn *conn)
509
{
510
  conn->start_state = conn->bgp->start_state;
511

    
512
  // Default values, possibly changed by receiving capabilities.
513
  conn->advertised_as = 0;
514
  conn->peer_refresh_support = 0;
515
  conn->peer_as4_support = 0;
516
  conn->peer_add_path = 0;
517
  conn->peer_gr_aware = 0;
518
  conn->peer_gr_able = 0;
519
  conn->peer_gr_time = 0;
520
  conn->peer_gr_flags = 0;
521
  conn->peer_gr_aflags = 0;
522

    
523
  DBG("BGP: Sending open\n");
524
  conn->sk->rx_hook = bgp_rx;
525
  conn->sk->tx_hook = bgp_tx;
526
  tm_stop(conn->connect_retry_timer);
527
  bgp_schedule_packet(conn, PKT_OPEN);
528
  bgp_conn_set_state(conn, BS_OPENSENT);
529
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
530
}
531

    
532
static void
533
bgp_connected(sock *sk)
534
{
535
  struct bgp_conn *conn = sk->data;
536
  struct bgp_proto *p = conn->bgp;
537

    
538
  BGP_TRACE(D_EVENTS, "Connected");
539
  bgp_send_open(conn);
540
}
541

    
542
static void
543
bgp_connect_timeout(timer *t)
544
{
545
  struct bgp_conn *conn = t->data;
546
  struct bgp_proto *p = conn->bgp;
547

    
548
  DBG("BGP: connect_timeout\n");
549
  if (p->p.proto_state == PS_START)
550
    {
551
      bgp_close_conn(conn);
552
      bgp_connect(p);
553
    }
554
  else
555
    bgp_conn_enter_idle_state(conn);
556
}
557

    
558
static void
559
bgp_sock_err(sock *sk, int err)
560
{
561
  struct bgp_conn *conn = sk->data;
562
  struct bgp_proto *p = conn->bgp;
563

    
564
  /*
565
   * This error hook may be called either asynchronously from main
566
   * loop, or synchronously from sk_send().  But sk_send() is called
567
   * only from bgp_tx() and bgp_kick_tx(), which are both called
568
   * asynchronously from main loop. Moreover, they end if err hook is
569
   * called. Therefore, we could suppose that it is always called
570
   * asynchronously.
571
   */
572

    
573
  bgp_store_error(p, conn, BE_SOCKET, err);
574

    
575
  if (err)
576
    BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
577
  else
578
    BGP_TRACE(D_EVENTS, "Connection closed");
579

    
580
  if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
581
    bgp_handle_graceful_restart(p);
582

    
583
  bgp_conn_enter_idle_state(conn);
584
}
585

    
586
static void
587
bgp_hold_timeout(timer *t)
588
{
589
  struct bgp_conn *conn = t->data;
590
  struct bgp_proto *p = conn->bgp;
591

    
592
  DBG("BGP: Hold timeout\n");
593

    
594
  /* We are already closing the connection - just do hangup */
595
  if (conn->state == BS_CLOSE)
596
  {
597
    BGP_TRACE(D_EVENTS, "Connection stalled");
598
    bgp_conn_enter_idle_state(conn);
599
    return;
600
  }
601

    
602
  /* If there is something in input queue, we are probably congested
603
     and perhaps just not processed BGP packets in time. */
604

    
605
  if (sk_rx_ready(conn->sk) > 0)
606
    bgp_start_timer(conn->hold_timer, 10);
607
  else
608
    bgp_error(conn, 4, 0, NULL, 0);
609
}
610

    
611
static void
612
bgp_keepalive_timeout(timer *t)
613
{
614
  struct bgp_conn *conn = t->data;
615

    
616
  DBG("BGP: Keepalive timer\n");
617
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
618
}
619

    
620
static void
621
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
622
{
623
  timer *t;
624

    
625
  conn->sk = NULL;
626
  conn->bgp = p;
627
  conn->packets_to_send = 0;
628

    
629
  t = conn->connect_retry_timer = tm_new(p->p.pool);
630
  t->hook = bgp_connect_timeout;
631
  t->data = conn;
632
  t = conn->hold_timer = tm_new(p->p.pool);
633
  t->hook = bgp_hold_timeout;
634
  t->data = conn;
635
  t = conn->keepalive_timer = tm_new(p->p.pool);
636
  t->hook = bgp_keepalive_timeout;
637
  t->data = conn;
638
  conn->tx_ev = ev_new(p->p.pool);
639
  conn->tx_ev->hook = bgp_kick_tx;
640
  conn->tx_ev->data = conn;
641
}
642

    
643
static void
644
bgp_setup_sk(struct bgp_conn *conn, sock *s)
645
{
646
  s->data = conn;
647
  s->err_hook = bgp_sock_err;
648
  conn->sk = s;
649
}
650

    
651
static void
652
bgp_active(struct bgp_proto *p)
653
{
654
  int delay = MAX(1, p->cf->connect_delay_time);
655
  struct bgp_conn *conn = &p->outgoing_conn;
656

    
657
  BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
658
  bgp_setup_conn(p, conn);
659
  bgp_conn_set_state(conn, BS_ACTIVE);
660
  bgp_start_timer(conn->connect_retry_timer, delay);
661
}
662

    
663
/**
664
 * bgp_connect - initiate an outgoing connection
665
 * @p: BGP instance
666
 *
667
 * The bgp_connect() function creates a new &bgp_conn and initiates
668
 * a TCP connection to the peer. The rest of connection setup is governed
669
 * by the BGP state machine as described in the standard.
670
 */
671
static void
672
bgp_connect(struct bgp_proto *p)        /* Enter Connect state and start establishing connection */
673
{
674
  sock *s;
675
  struct bgp_conn *conn = &p->outgoing_conn;
676
  int hops = p->cf->multihop ? : 1;
677

    
678
  DBG("BGP: Connecting\n");
679
  s = sk_new(p->p.pool);
680
  s->type = SK_TCP_ACTIVE;
681
  s->saddr = p->source_addr;
682
  s->daddr = p->cf->remote_ip;
683
  s->dport = p->cf->remote_port;
684
  s->iface = p->neigh ? p->neigh->iface : NULL;
685
  s->ttl = p->cf->ttl_security ? 255 : hops;
686
  s->rbsize = BGP_RX_BUFFER_SIZE;
687
  s->tbsize = BGP_TX_BUFFER_SIZE;
688
  s->tos = IP_PREC_INTERNET_CONTROL;
689
  s->password = p->cf->password;
690
  s->tx_hook = bgp_connected;
691
  BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
692
            s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
693
  bgp_setup_conn(p, conn);
694
  bgp_setup_sk(conn, s);
695
  bgp_conn_set_state(conn, BS_CONNECT);
696

    
697
  if (sk_open(s) < 0)
698
    goto err;
699

    
700
  /* Set minimal receive TTL if needed */
701
  if (p->cf->ttl_security)
702
    if (sk_set_min_ttl(s, 256 - hops) < 0)
703
      goto err;
704

    
705
  DBG("BGP: Waiting for connect success\n");
706
  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
707
  return;
708

    
709
 err:
710
  sk_log_error(s, p->p.name);
711
  bgp_sock_err(s, 0);
712
  return;
713
}
714

    
715
/**
716
 * bgp_find_proto - find existing proto for incoming connection
717
 * @sk: TCP socket
718
 *
719
 */
720
static struct bgp_proto *
721
bgp_find_proto(sock *sk)
722
{
723
  struct proto_config *pc;
724

    
725
  WALK_LIST(pc, config->protos)
726
    if ((pc->protocol == &proto_bgp) && pc->proto)
727
      {
728
        struct bgp_proto *p = (struct bgp_proto *) pc->proto;
729
        if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
730
            (!ipa_is_link_local(sk->daddr) || (p->cf->iface == sk->iface)))
731
          return p;
732
      }
733

    
734
  return NULL;
735
}
736

    
737
/**
738
 * bgp_incoming_connection - handle an incoming connection
739
 * @sk: TCP socket
740
 * @dummy: unused
741
 *
742
 * This function serves as a socket hook for accepting of new BGP
743
 * connections. It searches a BGP instance corresponding to the peer
744
 * which has connected and if such an instance exists, it creates a
745
 * &bgp_conn structure, attaches it to the instance and either sends
746
 * an Open message or (if there already is an active connection) it
747
 * closes the new connection by sending a Notification message.
748
 */
749
static int
750
bgp_incoming_connection(sock *sk, int dummy UNUSED)
751
{
752
  struct bgp_proto *p;
753
  int acc, hops;
754

    
755
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
756
  p = bgp_find_proto(sk);
757
  if (!p)
758
    {
759
      log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
760
          sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
761
      rfree(sk);
762
      return 0;
763
    }
764

    
765
  /* We are in proper state and there is no other incoming connection */
766
  acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
767
    (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
768

    
769
  if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
770
    {
771
      bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
772
      bgp_handle_graceful_restart(p);
773
      bgp_conn_enter_idle_state(p->conn);
774
      acc = 1;
775
    }
776

    
777
  BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
778
            sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
779
            sk->dport, acc ? "accepted" : "rejected");
780

    
781
  if (!acc)
782
    {
783
      rfree(sk);
784
      return 0;
785
    }
786

    
787
  hops = p->cf->multihop ? : 1;
788

    
789
  if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
790
    goto err;
791

    
792
  if (p->cf->ttl_security)
793
    if (sk_set_min_ttl(sk, 256 - hops) < 0)
794
      goto err;
795

    
796
  bgp_setup_conn(p, &p->incoming_conn);
797
  bgp_setup_sk(&p->incoming_conn, sk);
798
  bgp_send_open(&p->incoming_conn);
799
  return 0;
800

    
801
err:
802
  sk_log_error(sk, p->p.name);
803
  log(L_ERR "%s: Incoming connection aborted", p->p.name);
804
  rfree(sk);
805
  return 0;
806
}
807

    
808
static void
809
bgp_listen_sock_err(sock *sk UNUSED, int err)
810
{
811
  if (err == ECONNABORTED)
812
    log(L_WARN "BGP: Incoming connection aborted");
813
  else
814
    log(L_ERR "BGP: Error on listening socket: %M", err);
815
}
816

    
817
static sock *
818
bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
819
{
820
  sock *s = sk_new(&root_pool);
821
  DBG("BGP: Creating listening socket\n");
822
  s->type = SK_TCP_PASSIVE;
823
  s->ttl = 255;
824
  s->saddr = addr;
825
  s->sport = port ? port : BGP_PORT;
826
  s->flags = flags ? 0 : SKF_V6ONLY;
827
  s->tos = IP_PREC_INTERNET_CONTROL;
828
  s->rbsize = BGP_RX_BUFFER_SIZE;
829
  s->tbsize = BGP_TX_BUFFER_SIZE;
830
  s->rx_hook = bgp_incoming_connection;
831
  s->err_hook = bgp_listen_sock_err;
832

    
833
  if (sk_open(s) < 0)
834
    goto err;
835

    
836
  return s;
837

    
838
 err:
839
  sk_log_error(s, "BGP");
840
  log(L_ERR "BGP: Cannot open listening socket");
841
  rfree(s);
842
  return NULL;
843
}
844

    
845
static void
846
bgp_start_neighbor(struct bgp_proto *p)
847
{
848
  /* Called only for single-hop BGP sessions */
849

    
850
  if (ipa_zero(p->source_addr))
851
    p->source_addr = p->neigh->ifa->ip;
852

    
853
#ifdef IPV6
854
  {
855
    struct ifa *a;
856
    p->local_link = IPA_NONE;
857
    WALK_LIST(a, p->neigh->iface->addrs)
858
      if (a->scope == SCOPE_LINK)
859
        {
860
          p->local_link = a->ip;
861
          break;
862
        }
863

    
864
    if (! ipa_nonzero(p->local_link))
865
      log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
866

    
867
    DBG("BGP: Selected link-level address %I\n", p->local_link);
868
  }
869
#endif
870

    
871
  bgp_initiate(p);
872
}
873

    
874
static void
875
bgp_neigh_notify(neighbor *n)
876
{
877
  struct bgp_proto *p = (struct bgp_proto *) n->proto;
878
  int ps = p->p.proto_state;
879

    
880
  if (n != p->neigh)
881
    return;
882

    
883
  if ((ps == PS_DOWN) || (ps == PS_STOP))
884
    return;
885

    
886
  int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
887

    
888
  if (n->scope <= 0)
889
    {
890
      if (!prepare)
891
        {
892
          BGP_TRACE(D_EVENTS, "Neighbor lost");
893
          bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
894
          /* Perhaps also run bgp_update_startup_delay(p)? */
895
          bgp_stop(p, 0);
896
        }
897
    }
898
  else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
899
    {
900
      if (!prepare)
901
        {
902
          BGP_TRACE(D_EVENTS, "Link down");
903
          bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
904
          if (ps == PS_UP)
905
            bgp_update_startup_delay(p);
906
          bgp_stop(p, 0);
907
        }
908
    }
909
  else
910
    {
911
      if (prepare)
912
        {
913
          BGP_TRACE(D_EVENTS, "Neighbor ready");
914
          bgp_start_neighbor(p);
915
        }
916
    }
917
}
918

    
919
static void
920
bgp_bfd_notify(struct bfd_request *req)
921
{
922
  struct bgp_proto *p = req->data;
923
  int ps = p->p.proto_state;
924

    
925
  if (req->down && ((ps == PS_START) || (ps == PS_UP)))
926
    {
927
      BGP_TRACE(D_EVENTS, "BFD session down");
928
      bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
929
      if (ps == PS_UP)
930
        bgp_update_startup_delay(p);
931
      bgp_stop(p, 0);
932
    }
933
}
934

    
935
static void
936
bgp_update_bfd(struct bgp_proto *p, int use_bfd)
937
{
938
  if (use_bfd && !p->bfd_req)
939
    p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
940
                                     p->cf->multihop ? NULL : p->neigh->iface,
941
                                     bgp_bfd_notify, p);
942

    
943
  if (!use_bfd && p->bfd_req)
944
    {
945
      rfree(p->bfd_req);
946
      p->bfd_req = NULL;
947
    }
948
}
949

    
950
static int
951
bgp_reload_routes(struct proto *P)
952
{
953
  struct bgp_proto *p = (struct bgp_proto *) P;
954
  if (!p->conn || !p->conn->peer_refresh_support)
955
    return 0;
956

    
957
  bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
958
  return 1;
959
}
960

    
961
static void
962
bgp_feed_done(struct proto *P)
963
{
964
  struct bgp_proto *p = (struct bgp_proto *) P;
965
  if (!p->conn || !p->cf->gr_mode || p->p.refeeding)
966
    return;
967

    
968
  p->send_end_mark = 1;
969
  bgp_schedule_packet(p->conn, PKT_UPDATE);
970
}
971

    
972
static void
973
bgp_start_locked(struct object_lock *lock)
974
{
975
  struct bgp_proto *p = lock->data;
976
  struct bgp_config *cf = p->cf;
977

    
978
  if (p->p.proto_state != PS_START)
979
    {
980
      DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
981
      return;
982
    }
983

    
984
  DBG("BGP: Got lock\n");
985

    
986
  if (cf->multihop)
987
    {
988
      /* Multi-hop sessions do not use neighbor entries */
989
      bgp_initiate(p);
990
      return;
991
    }
992

    
993
  neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
994
  if (!n)
995
    {
996
      log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
997
      /* As we do not start yet, we can just disable protocol */
998
      p->p.disabled = 1;
999
      bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1000
      proto_notify_state(&p->p, PS_DOWN);
1001
      return;
1002
    }
1003

    
1004
  p->neigh = n;
1005

    
1006
  if (n->scope <= 0)
1007
    BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1008
  else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1009
    BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1010
  else
1011
    bgp_start_neighbor(p);
1012
}
1013

    
1014
static int
1015
bgp_start(struct proto *P)
1016
{
1017
  struct bgp_proto *p = (struct bgp_proto *) P;
1018
  struct object_lock *lock;
1019

    
1020
  DBG("BGP: Startup.\n");
1021
  p->start_state = BSS_PREPARE;
1022
  p->outgoing_conn.state = BS_IDLE;
1023
  p->incoming_conn.state = BS_IDLE;
1024
  p->neigh = NULL;
1025
  p->bfd_req = NULL;
1026
  p->gr_ready = 0;
1027
  p->gr_active = 0;
1028

    
1029
  rt_lock_table(p->igp_table);
1030

    
1031
  p->event = ev_new(p->p.pool);
1032
  p->event->hook = bgp_decision;
1033
  p->event->data = p;
1034

    
1035
  p->startup_timer = tm_new(p->p.pool);
1036
  p->startup_timer->hook = bgp_startup_timeout;
1037
  p->startup_timer->data = p;
1038

    
1039
  p->gr_timer = tm_new(p->p.pool);
1040
  p->gr_timer->hook = bgp_graceful_restart_timeout;
1041
  p->gr_timer->data = p;
1042

    
1043
  p->local_id = proto_get_router_id(P->cf);
1044
  if (p->rr_client)
1045
    p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1046

    
1047
  p->remote_id = 0;
1048
  p->source_addr = p->cf->source_addr;
1049

    
1050
  if (p->p.gr_recovery && p->cf->gr_mode)
1051
    proto_graceful_restart_lock(P);
1052

    
1053
  /*
1054
   *  Before attempting to create the connection, we need to lock the
1055
   *  port, so that are sure we're the only instance attempting to talk
1056
   *  with that neighbor.
1057
   */
1058

    
1059
  lock = p->lock = olock_new(P->pool);
1060
  lock->addr = p->cf->remote_ip;
1061
  lock->port = p->cf->remote_port;
1062
  lock->iface = p->cf->iface;
1063
  lock->type = OBJLOCK_TCP;
1064
  lock->hook = bgp_start_locked;
1065
  lock->data = p;
1066
  olock_acquire(lock);
1067

    
1068
  return PS_START;
1069
}
1070

    
1071
extern int proto_restart;
1072

    
1073
static int
1074
bgp_shutdown(struct proto *P)
1075
{
1076
  struct bgp_proto *p = (struct bgp_proto *) P;
1077
  unsigned subcode = 0;
1078

    
1079
  BGP_TRACE(D_EVENTS, "Shutdown requested");
1080

    
1081
  switch (P->down_code)
1082
    {
1083
    case PDC_CF_REMOVE:
1084
    case PDC_CF_DISABLE:
1085
      subcode = 3; // Errcode 6, 3 - peer de-configured
1086
      break;
1087

    
1088
    case PDC_CF_RESTART:
1089
      subcode = 6; // Errcode 6, 6 - other configuration change
1090
      break;
1091

    
1092
    case PDC_CMD_DISABLE:
1093
    case PDC_CMD_SHUTDOWN:
1094
      subcode = 2; // Errcode 6, 2 - administrative shutdown
1095
      break;
1096

    
1097
    case PDC_CMD_RESTART:
1098
      subcode = 4; // Errcode 6, 4 - administrative reset
1099
      break;
1100

    
1101
    case PDC_RX_LIMIT_HIT:
1102
    case PDC_IN_LIMIT_HIT:
1103
      subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1104
      /* log message for compatibility */
1105
      log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1106
      goto limit;
1107

    
1108
    case PDC_OUT_LIMIT_HIT:
1109
      subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1110

    
1111
    limit:
1112
      bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1113
      if (proto_restart)
1114
        bgp_update_startup_delay(p);
1115
      else
1116
        p->startup_delay = 0;
1117
      goto done;
1118
    }
1119

    
1120
  bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1121
  p->startup_delay = 0;
1122

    
1123
 done:
1124
  bgp_stop(p, subcode);
1125
  return p->p.proto_state;
1126
}
1127

    
1128
static void
1129
bgp_cleanup(struct proto *P)
1130
{
1131
  struct bgp_proto *p = (struct bgp_proto *) P;
1132
  rt_unlock_table(p->igp_table);
1133
}
1134

    
1135
static rtable *
1136
get_igp_table(struct bgp_config *cf)
1137
{
1138
  return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
1139
}
1140

    
1141
static struct proto *
1142
bgp_init(struct proto_config *C)
1143
{
1144
  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
1145
  struct bgp_config *c = (struct bgp_config *) C;
1146
  struct bgp_proto *p = (struct bgp_proto *) P;
1147

    
1148
  P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
1149
  P->rt_notify = bgp_rt_notify;
1150
  P->import_control = bgp_import_control;
1151
  P->neigh_notify = bgp_neigh_notify;
1152
  P->reload_routes = bgp_reload_routes;
1153
  P->feed_done = bgp_feed_done;
1154
  P->rte_better = bgp_rte_better;
1155
  P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
1156

    
1157
  p->cf = c;
1158
  p->local_as = c->local_as;
1159
  p->remote_as = c->remote_as;
1160
  p->is_internal = (c->local_as == c->remote_as);
1161
  p->rs_client = c->rs_client;
1162
  p->rr_client = c->rr_client;
1163
  p->igp_table = get_igp_table(c);
1164

    
1165
  return P;
1166
}
1167

    
1168

    
1169
void
1170
bgp_check_config(struct bgp_config *c)
1171
{
1172
  int internal = (c->local_as == c->remote_as);
1173

    
1174
  /* Do not check templates at all */
1175
  if (c->c.class == SYM_TEMPLATE)
1176
    return;
1177

    
1178

    
1179
  /* EBGP direct by default, IBGP multihop by default */
1180
  if (c->multihop < 0)
1181
    c->multihop = internal ? 64 : 0;
1182

    
1183
  /* Different default for gw_mode */
1184
  if (!c->gw_mode)
1185
    c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
1186

    
1187
  /* Different default based on rs_client */
1188
  if (!c->missing_lladdr)
1189
    c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
1190

    
1191
  /* Disable after error incompatible with restart limit action */
1192
  if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
1193
    c->c.in_limit->action = PLA_DISABLE;
1194

    
1195

    
1196
  if (!c->local_as)
1197
    cf_error("Local AS number must be set");
1198

    
1199
  if (ipa_zero(c->remote_ip))
1200
    cf_error("Neighbor must be configured");
1201

    
1202
  if (!c->remote_as)
1203
    cf_error("Remote AS number must be set");
1204

    
1205
  // if (ipa_is_link_local(c->remote_ip) && !c->iface)
1206
  //   cf_error("Link-local neighbor address requires specified interface");
1207

    
1208
  if (!ipa_is_link_local(c->remote_ip) != !c->iface)
1209
    cf_error("Link-local address and interface scope must be used together");
1210

    
1211
  if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
1212
    cf_error("Neighbor AS number out of range (AS4 not available)");
1213

    
1214
  if (!internal && c->rr_client)
1215
    cf_error("Only internal neighbor can be RR client");
1216

    
1217
  if (internal && c->rs_client)
1218
    cf_error("Only external neighbor can be RS client");
1219

    
1220
  if (c->multihop && (c->gw_mode == GW_DIRECT))
1221
    cf_error("Multihop BGP cannot use direct gateway mode");
1222

    
1223
  if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
1224
                      ipa_is_link_local(c->source_addr)))
1225
    cf_error("Multihop BGP cannot be used with link-local addresses");
1226

    
1227
  if (c->multihop && c->check_link)
1228
    cf_error("Multihop BGP cannot depend on link state");
1229

    
1230
  if (c->multihop && c->bfd && ipa_zero(c->source_addr))
1231
    cf_error("Multihop BGP with BFD requires specified source address");
1232

    
1233
  if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
1234
    cf_error("BGP in recursive mode prohibits sorted table");
1235

    
1236
  if (c->deterministic_med && c->c.table->sorted)
1237
    cf_error("BGP with deterministic MED prohibits sorted table");
1238

    
1239
  if (c->secondary && !c->c.table->sorted)
1240
    cf_error("BGP with secondary option requires sorted table");
1241
}
1242

    
1243
static int
1244
bgp_reconfigure(struct proto *P, struct proto_config *C)
1245
{
1246
  struct bgp_config *new = (struct bgp_config *) C;
1247
  struct bgp_proto *p = (struct bgp_proto *) P;
1248
  struct bgp_config *old = p->cf;
1249

    
1250
  if (proto_get_router_id(C) != p->local_id)
1251
    return 0;
1252

    
1253
  int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1254
                     ((byte *) new) + sizeof(struct proto_config),
1255
                     // password item is last and must be checked separately
1256
                     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1257
    && ((!old->password && !new->password)
1258
        || (old->password && new->password && !strcmp(old->password, new->password)))
1259
    && (get_igp_table(old) == get_igp_table(new));
1260

    
1261
  if (same && (p->start_state > BSS_PREPARE))
1262
    bgp_update_bfd(p, new->bfd);
1263

    
1264
  /* We should update our copy of configuration ptr as old configuration will be freed */
1265
  if (same)
1266
    p->cf = new;
1267

    
1268
  return same;
1269
}
1270

    
1271
static void
1272
bgp_copy_config(struct proto_config *dest, struct proto_config *src)
1273
{
1274
  /* Just a shallow copy */
1275
  proto_copy_rest(dest, src, sizeof(struct bgp_config));
1276
}
1277

    
1278

    
1279
/**
1280
 * bgp_error - report a protocol error
1281
 * @c: connection
1282
 * @code: error code (according to the RFC)
1283
 * @subcode: error sub-code
1284
 * @data: data to be passed in the Notification message
1285
 * @len: length of the data
1286
 *
1287
 * bgp_error() sends a notification packet to tell the other side that a protocol
1288
 * error has occurred (including the data considered erroneous if possible) and
1289
 * closes the connection.
1290
 */
1291
void
1292
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
1293
{
1294
  struct bgp_proto *p = c->bgp;
1295

    
1296
  if (c->state == BS_CLOSE)
1297
    return;
1298

    
1299
  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
1300
  bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1301
  bgp_conn_enter_close_state(c);
1302

    
1303
  c->notify_code = code;
1304
  c->notify_subcode = subcode;
1305
  c->notify_data = data;
1306
  c->notify_size = (len > 0) ? len : 0;
1307
  bgp_schedule_packet(c, PKT_NOTIFICATION);
1308

    
1309
  if (code != 6)
1310
    {
1311
      bgp_update_startup_delay(p);
1312
      bgp_stop(p, 0);
1313
    }
1314
}
1315

    
1316
/**
1317
 * bgp_store_error - store last error for status report
1318
 * @p: BGP instance
1319
 * @c: connection
1320
 * @class: error class (BE_xxx constants)
1321
 * @code: error code (class specific)
1322
 *
1323
 * bgp_store_error() decides whether given error is interesting enough
1324
 * and store that error to last_error variables of @p
1325
 */
1326
void
1327
bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1328
{
1329
  /* During PS_UP, we ignore errors on secondary connection */
1330
  if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1331
    return;
1332

    
1333
  /* During PS_STOP, we ignore any errors, as we want to report
1334
   * the error that caused transition to PS_STOP
1335
   */
1336
  if (p->p.proto_state == PS_STOP)
1337
    return;
1338

    
1339
  p->last_error_class = class;
1340
  p->last_error_code = code;
1341
}
1342

    
1343
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1344
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1345
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1346
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1347

    
1348
static const char *
1349
bgp_last_errmsg(struct bgp_proto *p)
1350
{
1351
  switch (p->last_error_class)
1352
    {
1353
    case BE_MISC:
1354
      return bgp_misc_errors[p->last_error_code];
1355
    case BE_SOCKET:
1356
      return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1357
    case BE_BGP_RX:
1358
    case BE_BGP_TX:
1359
      return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1360
    case BE_AUTO_DOWN:
1361
      return bgp_auto_errors[p->last_error_code];
1362
    default:
1363
      return "";
1364
    }
1365
}
1366

    
1367
static const char *
1368
bgp_state_dsc(struct bgp_proto *p)
1369
{
1370
  if (p->p.proto_state == PS_DOWN)
1371
    return "Down";
1372

    
1373
  int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1374
  if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1375
    return "Passive";
1376

    
1377
  return bgp_state_names[state];
1378
}
1379

    
1380
static void
1381
bgp_get_status(struct proto *P, byte *buf)
1382
{
1383
  struct bgp_proto *p = (struct bgp_proto *) P;
1384

    
1385
  const char *err1 = bgp_err_classes[p->last_error_class];
1386
  const char *err2 = bgp_last_errmsg(p);
1387

    
1388
  if (P->proto_state == PS_DOWN)
1389
    bsprintf(buf, "%s%s", err1, err2);
1390
  else
1391
    bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1392
}
1393

    
1394
static void
1395
bgp_show_proto_info(struct proto *P)
1396
{
1397
  struct bgp_proto *p = (struct bgp_proto *) P;
1398
  struct bgp_conn *c = p->conn;
1399

    
1400
  proto_show_basic_info(P);
1401

    
1402
  cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
1403
  cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
1404
  cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
1405

    
1406
  if (p->gr_active)
1407
    cli_msg(-1006, "    Neighbor graceful restart active");
1408

    
1409
  if (P->proto_state == PS_START)
1410
    {
1411
      struct bgp_conn *oc = &p->outgoing_conn;
1412

    
1413
      if ((p->start_state < BSS_CONNECT) &&
1414
          (p->startup_timer->expires))
1415
        cli_msg(-1006, "    Error wait:       %d/%d",
1416
                p->startup_timer->expires - now, p->startup_delay);
1417

    
1418
      if ((oc->state == BS_ACTIVE) &&
1419
          (oc->connect_retry_timer->expires))
1420
        cli_msg(-1006, "    Connect delay:    %d/%d",
1421
                oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
1422

    
1423
      if (p->gr_active && p->gr_timer->expires)
1424
        cli_msg(-1006, "    Restart timer:    %d/-", p->gr_timer->expires - now);
1425
    }
1426
  else if (P->proto_state == PS_UP)
1427
    {
1428
      cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
1429
      cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s",
1430
              c->peer_refresh_support ? " refresh" : "",
1431
              c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
1432
              c->peer_as4_support ? " AS4" : "",
1433
              (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
1434
              (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "");
1435
      cli_msg(-1006, "    Session:          %s%s%s%s%s%s%s",
1436
              p->is_internal ? "internal" : "external",
1437
              p->cf->multihop ? " multihop" : "",
1438
              p->rr_client ? " route-reflector" : "",
1439
              p->rs_client ? " route-server" : "",
1440
              p->as4_session ? " AS4" : "",
1441
              p->add_path_rx ? " add-path-rx" : "",
1442
              p->add_path_tx ? " add-path-tx" : "");
1443
      cli_msg(-1006, "    Source address:   %I", p->source_addr);
1444
      if (P->cf->in_limit)
1445
        cli_msg(-1006, "    Route limit:      %d/%d",
1446
                p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
1447
      cli_msg(-1006, "    Hold timer:       %d/%d",
1448
              tm_remains(c->hold_timer), c->hold_time);
1449
      cli_msg(-1006, "    Keepalive timer:  %d/%d",
1450
              tm_remains(c->keepalive_timer), c->keepalive_time);
1451
    }
1452

    
1453
  if ((p->last_error_class != BE_NONE) &&
1454
      (p->last_error_class != BE_MAN_DOWN))
1455
    {
1456
      const char *err1 = bgp_err_classes[p->last_error_class];
1457
      const char *err2 = bgp_last_errmsg(p);
1458
      cli_msg(-1006, "    Last error:       %s%s", err1, err2);
1459
    }
1460
}
1461

    
1462
struct protocol proto_bgp = {
1463
  .name =                 "BGP",
1464
  .template =                 "bgp%d",
1465
  .attr_class =         EAP_BGP,
1466
  .preference =         DEF_PREF_BGP,
1467
  .config_size =        sizeof(struct bgp_config),
1468
  .init =                 bgp_init,
1469
  .start =                 bgp_start,
1470
  .shutdown =                 bgp_shutdown,
1471
  .cleanup =                 bgp_cleanup,
1472
  .reconfigure =         bgp_reconfigure,
1473
  .copy_config =         bgp_copy_config,
1474
  .get_status =         bgp_get_status,
1475
  .get_attr =                 bgp_get_attr,
1476
  .get_route_info =         bgp_get_route_info,
1477
  .show_proto_info =         bgp_show_proto_info
1478
};