Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / bgp.c @ ccee67ca

History | View | Annotate | Download (57.5 KB)

1
/*
2
 *        BIRD -- The Border Gateway Protocol
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *        (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6
 *        (c) 2008--2016 CZ.NIC z.s.p.o.
7
 *
8
 *        Can be freely distributed and used under the terms of the GNU GPL.
9
 */
10

    
11
/**
12
 * DOC: Border Gateway Protocol
13
 *
14
 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
15
 * the connection and most of the interface with BIRD core, |packets.c| handling
16
 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
17
 * manipulation with BGP attribute lists.
18
 *
19
 * As opposed to the other existing routing daemons, BIRD has a sophisticated
20
 * core architecture which is able to keep all the information needed by BGP in
21
 * the primary routing table, therefore no complex data structures like a
22
 * central BGP table are needed. This increases memory footprint of a BGP router
23
 * with many connections, but not too much and, which is more important, it
24
 * makes BGP much easier to implement.
25
 *
26
 * Each instance of BGP (corresponding to a single BGP peer) is described by a
27
 * &bgp_proto structure to which are attached individual connections represented
28
 * by &bgp_connection (usually, there exists only one connection, but during BGP
29
 * session setup, there can be more of them). The connections are handled
30
 * according to the BGP state machine defined in the RFC with all the timers and
31
 * all the parameters configurable.
32
 *
33
 * In incoming direction, we listen on the connection's socket and each time we
34
 * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
35
 * markers and passes complete packets to bgp_rx_packet() which distributes the
36
 * packet according to its type.
37
 *
38
 * In outgoing direction, we gather all the routing updates and sort them to
39
 * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
40
 * fast comparison of &rta's and a &fib which helps us to find if we already
41
 * have another route for the same destination queued for sending, so that we
42
 * can replace it with the new one immediately instead of sending both
43
 * updates). There also exists a special bucket holding all the route
44
 * withdrawals which cannot be queued anywhere else as they don't have any
45
 * attributes. If we have any packet to send (due to either new routes or the
46
 * connection tracking code wanting to send a Open, Keepalive or Notification
47
 * message), we call bgp_schedule_packet() which sets the corresponding bit in a
48
 * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
49
 * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
50
 * packet type bits and calls the corresponding bgp_create_xx() functions,
51
 * eventually rescheduling the same packet type if we have more data of the same
52
 * type to send.
53
 *
54
 * The processing of attributes consists of two functions: bgp_decode_attrs()
55
 * for checking of the attribute blocks and translating them to the language of
56
 * BIRD's extended attributes and bgp_encode_attrs() which does the
57
 * converse. Both functions are built around a @bgp_attr_table array describing
58
 * all important characteristics of all known attributes.  Unknown transitive
59
 * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
60
 *
61
 * BGP protocol implements graceful restart in both restarting (local restart)
62
 * and receiving (neighbor restart) roles. The first is handled mostly by the
63
 * graceful restart code in the nest, BGP protocol just handles capabilities,
64
 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
65
 * The second is implemented by internal restart of the BGP state to %BS_IDLE
66
 * and protocol state to %PS_START, but keeping the protocol up from the core
67
 * point of view and therefore maintaining received routes. Routing table
68
 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
69
 * stale routes after reestablishment of BGP session during graceful restart.
70
 *
71
 * Supported standards:
72
 * <itemize>
73
 * <item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP)
74
 * <item> <rfc id="1997"> - BGP Communities Attribute
75
 * <item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature
76
 * <item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6
77
 * <item> <rfc id="2918"> - Route Refresh Capability
78
 * <item> <rfc id="3107"> - Carrying Label Information in BGP
79
 * <item> <rfc id="4360"> - BGP Extended Communities Attribute
80
 * <item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks
81
 * <item> <rfc id="4456"> - BGP Route Reflection
82
 * <item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message
83
 * <item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks
84
 * <item> <rfc id="4724"> - Graceful Restart Mechanism for BGP
85
 * <item> <rfc id="4760"> - Multiprotocol extensions for BGP
86
 * <item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS
87
 * <item> <rfc id="5065"> - AS confederations for BGP
88
 * <item> <rfc id="5082"> - Generalized TTL Security Mechanism
89
 * <item> <rfc id="5492"> - Capabilities Advertisement with BGP
90
 * <item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop
91
 * <item> <rfc id="5575"> - Dissemination of Flow Specification Rules
92
 * <item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community
93
 * <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
94
 * <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
95
 * <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
96
 * <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
97
 * <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
98
 * <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
99
 * <item> <rfc id="7947"> - Internet Exchange BGP Route Server
100
 * <item> <rfc id="8092"> - BGP Large Communities Attribute
101
 * </itemize>
102
*/
103

    
104
#undef LOCAL_DEBUG
105

    
106
#include <stdlib.h>
107

    
108
#include "nest/bird.h"
109
#include "nest/iface.h"
110
#include "nest/protocol.h"
111
#include "nest/route.h"
112
#include "nest/cli.h"
113
#include "nest/locks.h"
114
#include "conf/conf.h"
115
#include "lib/socket.h"
116
#include "lib/resource.h"
117
#include "lib/string.h"
118

    
119
#include "bgp.h"
120

    
121

    
122
struct linpool *bgp_linpool;                /* Global temporary pool */
123
struct linpool *bgp_linpool2;                /* Global temporary pool for bgp_rt_notify() */
124
static list bgp_sockets;                /* Global list of listening sockets */
125

    
126

    
127
static void bgp_connect(struct bgp_proto *p);
128
static void bgp_active(struct bgp_proto *p);
129
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
130

    
131
static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
132
static void bgp_listen_sock_err(sock *sk UNUSED, int err);
133

    
134
/**
135
 * bgp_open - open a BGP instance
136
 * @p: BGP instance
137
 *
138
 * This function allocates and configures shared BGP resources, mainly listening
139
 * sockets. Should be called as the last step during initialization (when lock
140
 * is acquired and neighbor is ready). When error, caller should change state to
141
 * PS_DOWN and return immediately.
142
 */
143
static int
144
bgp_open(struct bgp_proto *p)
145
{
146
  struct bgp_socket *bs = NULL;
147
  struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
148
  ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
149
    (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
150
  uint port = p->cf->local_port;
151

    
152
  /* FIXME: Add some global init? */
153
  if (!bgp_linpool)
154
    init_list(&bgp_sockets);
155

    
156
  /* We assume that cf->iface is defined iff cf->local_ip is link-local */
157

    
158
  WALK_LIST(bs, bgp_sockets)
159
    if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->iface == ifa) && (bs->sk->sport == port))
160
    {
161
      bs->uc++;
162
      p->sock = bs;
163
      return 0;
164
    }
165

    
166
  sock *sk = sk_new(proto_pool);
167
  sk->type = SK_TCP_PASSIVE;
168
  sk->ttl = 255;
169
  sk->saddr = addr;
170
  sk->sport = port;
171
  sk->flags = 0;
172
  sk->tos = IP_PREC_INTERNET_CONTROL;
173
  sk->rbsize = BGP_RX_BUFFER_SIZE;
174
  sk->tbsize = BGP_TX_BUFFER_SIZE;
175
  sk->rx_hook = bgp_incoming_connection;
176
  sk->err_hook = bgp_listen_sock_err;
177

    
178
  if (sk_open(sk) < 0)
179
    goto err;
180

    
181
  bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
182
  bs->sk = sk;
183
  bs->uc = 1;
184
  p->sock = bs;
185

    
186
  add_tail(&bgp_sockets, &bs->n);
187

    
188
  if (!bgp_linpool)
189
  {
190
    bgp_linpool  = lp_new_default(proto_pool);
191
    bgp_linpool2 = lp_new_default(proto_pool);
192
  }
193

    
194
  return 0;
195

    
196
err:
197
  sk_log_error(sk, p->p.name);
198
  log(L_ERR "%s: Cannot open listening socket", p->p.name);
199
  rfree(sk);
200
  return -1;
201
}
202

    
203
/**
204
 * bgp_close - close a BGP instance
205
 * @p: BGP instance
206
 *
207
 * This function frees and deconfigures shared BGP resources.
208
 */
209
static void
210
bgp_close(struct bgp_proto *p)
211
{
212
  struct bgp_socket *bs = p->sock;
213

    
214
  ASSERT(bs && bs->uc);
215

    
216
  if (--bs->uc)
217
    return;
218

    
219
  rfree(bs->sk);
220
  rem_node(&bs->n);
221
  mb_free(bs);
222

    
223
  if (!EMPTY_LIST(bgp_sockets))
224
    return;
225

    
226
  rfree(bgp_linpool);
227
  bgp_linpool = NULL;
228

    
229
  rfree(bgp_linpool2);
230
  bgp_linpool2 = NULL;
231
}
232

    
233
static inline int
234
bgp_setup_auth(struct bgp_proto *p, int enable)
235
{
236
  if (p->cf->password)
237
  {
238
    int rv = sk_set_md5_auth(p->sock->sk,
239
                             p->cf->local_ip, p->cf->remote_ip, p->cf->iface,
240
                             enable ? p->cf->password : NULL, p->cf->setkey);
241

    
242
    if (rv < 0)
243
      sk_log_error(p->sock->sk, p->p.name);
244

    
245
    return rv;
246
  }
247
  else
248
    return 0;
249
}
250

    
251
static inline struct bgp_channel *
252
bgp_find_channel(struct bgp_proto *p, u32 afi)
253
{
254
  struct bgp_channel *c;
255
  WALK_LIST(c, p->p.channels)
256
    if (c->afi == afi)
257
      return c;
258

    
259
  return NULL;
260
}
261

    
262
static void
263
bgp_startup(struct bgp_proto *p)
264
{
265
  BGP_TRACE(D_EVENTS, "Started");
266
  p->start_state = BSS_CONNECT;
267

    
268
  if (!p->cf->passive)
269
    bgp_active(p);
270
}
271

    
272
static void
273
bgp_startup_timeout(timer *t)
274
{
275
  bgp_startup(t->data);
276
}
277

    
278

    
279
static void
280
bgp_initiate(struct bgp_proto *p)
281
{
282
  int err_val;
283

    
284
  if (bgp_open(p) < 0)
285
  { err_val = BEM_NO_SOCKET; goto err1; }
286

    
287
  if (bgp_setup_auth(p, 1) < 0)
288
  { err_val = BEM_INVALID_MD5; goto err2; }
289

    
290
  if (p->cf->bfd)
291
    bgp_update_bfd(p, p->cf->bfd);
292

    
293
  if (p->startup_delay)
294
  {
295
    p->start_state = BSS_DELAY;
296
    BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
297
    bgp_start_timer(p->startup_timer, p->startup_delay);
298
  }
299
  else
300
    bgp_startup(p);
301

    
302
  return;
303

    
304
err2:
305
  bgp_close(p);
306
err1:
307
  p->p.disabled = 1;
308
  bgp_store_error(p, NULL, BE_MISC, err_val);
309
  proto_notify_state(&p->p, PS_DOWN);
310

    
311
  return;
312
}
313

    
314
/**
315
 * bgp_start_timer - start a BGP timer
316
 * @t: timer
317
 * @value: time (in seconds) to fire (0 to disable the timer)
318
 *
319
 * This functions calls tm_start() on @t with time @value and the amount of
320
 * randomization suggested by the BGP standard. Please use it for all BGP
321
 * timers.
322
 */
323
void
324
bgp_start_timer(timer *t, uint value)
325
{
326
  if (value)
327
  {
328
    /* The randomization procedure is specified in RFC 4271 section 10 */
329
    btime time = value S;
330
    btime randomize = random() % ((time / 4) + 1);
331
    tm_start(t, time - randomize);
332
  }
333
  else
334
    tm_stop(t);
335
}
336

    
337
/**
338
 * bgp_close_conn - close a BGP connection
339
 * @conn: connection to close
340
 *
341
 * This function takes a connection described by the &bgp_conn structure, closes
342
 * its socket and frees all resources associated with it.
343
 */
344
void
345
bgp_close_conn(struct bgp_conn *conn)
346
{
347
  // struct bgp_proto *p = conn->bgp;
348

    
349
  DBG("BGP: Closing connection\n");
350
  conn->packets_to_send = 0;
351
  conn->channels_to_send = 0;
352
  rfree(conn->connect_timer);
353
  conn->connect_timer = NULL;
354
  rfree(conn->keepalive_timer);
355
  conn->keepalive_timer = NULL;
356
  rfree(conn->hold_timer);
357
  conn->hold_timer = NULL;
358
  rfree(conn->tx_ev);
359
  conn->tx_ev = NULL;
360
  rfree(conn->sk);
361
  conn->sk = NULL;
362

    
363
  mb_free(conn->local_caps);
364
  conn->local_caps = NULL;
365
  mb_free(conn->remote_caps);
366
  conn->remote_caps = NULL;
367
}
368

    
369

    
370
/**
371
 * bgp_update_startup_delay - update a startup delay
372
 * @p: BGP instance
373
 *
374
 * This function updates a startup delay that is used to postpone next BGP
375
 * connect. It also handles disable_after_error and might stop BGP instance
376
 * when error happened and disable_after_error is on.
377
 *
378
 * It should be called when BGP protocol error happened.
379
 */
380
void
381
bgp_update_startup_delay(struct bgp_proto *p)
382
{
383
  struct bgp_config *cf = p->cf;
384

    
385
  DBG("BGP: Updating startup delay\n");
386

    
387
  if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
388
    p->startup_delay = 0;
389

    
390
  p->last_proto_error = current_time();
391

    
392
  if (cf->disable_after_error)
393
  {
394
    p->startup_delay = 0;
395
    p->p.disabled = 1;
396
    return;
397
  }
398

    
399
  if (!p->startup_delay)
400
    p->startup_delay = cf->error_delay_time_min;
401
  else
402
    p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
403
}
404

    
405
static void
406
bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
407
{
408
  switch (conn->state)
409
  {
410
  case BS_IDLE:
411
  case BS_CLOSE:
412
    return;
413

    
414
  case BS_CONNECT:
415
  case BS_ACTIVE:
416
    bgp_conn_enter_idle_state(conn);
417
    return;
418

    
419
  case BS_OPENSENT:
420
  case BS_OPENCONFIRM:
421
  case BS_ESTABLISHED:
422
    bgp_error(conn, 6, subcode, data, len);
423
    return;
424

    
425
  default:
426
    bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
427
  }
428
}
429

    
430
static void
431
bgp_down(struct bgp_proto *p)
432
{
433
  if (p->start_state > BSS_PREPARE)
434
  {
435
    bgp_setup_auth(p, 0);
436
    bgp_close(p);
437
  }
438

    
439
  BGP_TRACE(D_EVENTS, "Down");
440
  proto_notify_state(&p->p, PS_DOWN);
441
}
442

    
443
static void
444
bgp_decision(void *vp)
445
{
446
  struct bgp_proto *p = vp;
447

    
448
  DBG("BGP: Decision start\n");
449
  if ((p->p.proto_state == PS_START) &&
450
      (p->outgoing_conn.state == BS_IDLE) &&
451
      (p->incoming_conn.state != BS_OPENCONFIRM) &&
452
      !p->cf->passive)
453
    bgp_active(p);
454

    
455
  if ((p->p.proto_state == PS_STOP) &&
456
      (p->outgoing_conn.state == BS_IDLE) &&
457
      (p->incoming_conn.state == BS_IDLE))
458
    bgp_down(p);
459
}
460

    
461
void
462
bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
463
{
464
  proto_notify_state(&p->p, PS_STOP);
465
  bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
466
  bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
467
  ev_schedule(p->event);
468
}
469

    
470
static inline void
471
bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
472
{
473
  if (conn->bgp->p.mrtdump & MD_STATES)
474
    mrt_dump_bgp_state_change(conn, conn->state, new_state);
475

    
476
  conn->state = new_state;
477
}
478

    
479
void
480
bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
481
{
482
  /* Really, most of the work is done in bgp_rx_open(). */
483
  bgp_conn_set_state(conn, BS_OPENCONFIRM);
484
}
485

    
486
static const struct bgp_af_caps dummy_af_caps = { };
487

    
488
void
489
bgp_conn_enter_established_state(struct bgp_conn *conn)
490
{
491
  struct bgp_proto *p = conn->bgp;
492
  struct bgp_caps *local = conn->local_caps;
493
  struct bgp_caps *peer = conn->remote_caps;
494
  struct bgp_channel *c;
495

    
496
  BGP_TRACE(D_EVENTS, "BGP session established");
497

    
498
  /* For multi-hop BGP sessions */
499
  if (ipa_zero(p->source_addr))
500
    p->source_addr = conn->sk->saddr;
501

    
502
  conn->sk->fast_rx = 0;
503

    
504
  p->conn = conn;
505
  p->last_error_class = 0;
506
  p->last_error_code = 0;
507

    
508
  p->as4_session = conn->as4_session;
509

    
510
  p->route_refresh = peer->route_refresh;
511
  p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
512

    
513
  /* Whether we may handle possible GR of peer (it has some AF GR-able) */
514
  p->gr_ready = 0;        /* Updated later */
515

    
516
  /* Whether peer is ready to handle our GR recovery */
517
  int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
518

    
519
  if (p->gr_active_num)
520
    tm_stop(p->gr_timer);
521

    
522
  /* Number of active channels */
523
  int num = 0;
524

    
525
  WALK_LIST(c, p->p.channels)
526
  {
527
    const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
528
    const struct bgp_af_caps *rem = bgp_find_af_caps(peer,  c->afi);
529

    
530
    /* Ignore AFIs that were not announced in multiprotocol capability */
531
    if (!loc || !loc->ready)
532
      loc = &dummy_af_caps;
533

    
534
    if (!rem || !rem->ready)
535
      rem = &dummy_af_caps;
536

    
537
    int active = loc->ready && rem->ready;
538
    c->c.disabled = !active;
539
    c->c.reloadable = p->route_refresh;
540

    
541
    c->index = active ? num++ : 0;
542

    
543
    c->feed_state = BFS_NONE;
544
    c->load_state = BFS_NONE;
545

    
546
    /* Channels where peer may do GR */
547
    c->gr_ready = active && local->gr_aware && rem->gr_able;
548
    p->gr_ready = p->gr_ready || c->gr_ready;
549

    
550
    /* Channels not able to recover gracefully */
551
    if (p->p.gr_recovery && (!active || !peer_gr_ready))
552
      channel_graceful_restart_unlock(&c->c);
553

    
554
    /* Channels waiting for local convergence */
555
    if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
556
      c->c.gr_wait = 1;
557

    
558
    /* Channels where peer is not able to recover gracefully */
559
    if (c->gr_active && ! (c->gr_ready && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
560
      bgp_graceful_restart_done(c);
561

    
562
    /* GR capability implies that neighbor will send End-of-RIB */
563
    if (peer->gr_aware)
564
      c->load_state = BFS_LOADING;
565

    
566
    c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
567
    c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
568
    c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
569

    
570
    /* Update RA mode */
571
    if (c->add_path_tx)
572
      c->c.ra_mode = RA_ANY;
573
    else if (c->cf->secondary)
574
      c->c.ra_mode = RA_ACCEPTED;
575
    else
576
      c->c.ra_mode = RA_OPTIMAL;
577
  }
578

    
579
  p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
580
  p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
581
  p->channel_count = num;
582

    
583
  WALK_LIST(c, p->p.channels)
584
  {
585
    if (c->c.disabled)
586
      continue;
587

    
588
    p->afi_map[c->index] = c->afi;
589
    p->channel_map[c->index] = c;
590
  }
591

    
592
  /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
593

    
594
  bgp_conn_set_state(conn, BS_ESTABLISHED);
595
  proto_notify_state(&p->p, PS_UP);
596
}
597

    
598
static void
599
bgp_conn_leave_established_state(struct bgp_proto *p)
600
{
601
  BGP_TRACE(D_EVENTS, "BGP session closed");
602
  p->conn = NULL;
603

    
604
  // XXXX free these tables to avoid memory leak during graceful restart
605
  // bgp_free_prefix_table(p);
606
  // bgp_free_bucket_table(p);
607

    
608
  if (p->p.proto_state == PS_UP)
609
    bgp_stop(p, 0, NULL, 0);
610
}
611

    
612
void
613
bgp_conn_enter_close_state(struct bgp_conn *conn)
614
{
615
  struct bgp_proto *p = conn->bgp;
616
  int os = conn->state;
617

    
618
  bgp_conn_set_state(conn, BS_CLOSE);
619
  tm_stop(conn->keepalive_timer);
620
  conn->sk->rx_hook = NULL;
621

    
622
  /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
623
  bgp_start_timer(conn->hold_timer, 10);
624

    
625
  if (os == BS_ESTABLISHED)
626
    bgp_conn_leave_established_state(p);
627
}
628

    
629
void
630
bgp_conn_enter_idle_state(struct bgp_conn *conn)
631
{
632
  struct bgp_proto *p = conn->bgp;
633
  int os = conn->state;
634

    
635
  bgp_close_conn(conn);
636
  bgp_conn_set_state(conn, BS_IDLE);
637
  ev_schedule(p->event);
638

    
639
  if (os == BS_ESTABLISHED)
640
    bgp_conn_leave_established_state(p);
641
}
642

    
643
/**
644
 * bgp_handle_graceful_restart - handle detected BGP graceful restart
645
 * @p: BGP instance
646
 *
647
 * This function is called when a BGP graceful restart of the neighbor is
648
 * detected (when the TCP connection fails or when a new TCP connection
649
 * appears). The function activates processing of the restart - starts routing
650
 * table refresh cycle and activates BGP restart timer. The protocol state goes
651
 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
652
 * caller.
653
 */
654
void
655
bgp_handle_graceful_restart(struct bgp_proto *p)
656
{
657
  ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
658

    
659
  BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
660
            p->gr_active_num ? " - already pending" : "");
661

    
662
  p->gr_active_num = 0;
663

    
664
  struct bgp_channel *c;
665
  WALK_LIST(c, p->p.channels)
666
  {
667
    if (c->gr_ready)
668
    {
669
      if (c->gr_active)
670
        rt_refresh_end(c->c.table, &c->c);
671

    
672
      c->gr_active = 1;
673
      p->gr_active_num++;
674
      rt_refresh_begin(c->c.table, &c->c);
675
    }
676
    else
677
    {
678
      /* Just flush the routes */
679
      rt_refresh_begin(c->c.table, &c->c);
680
      rt_refresh_end(c->c.table, &c->c);
681
    }
682
  }
683

    
684
  proto_notify_state(&p->p, PS_START);
685
  bgp_start_timer(p->gr_timer, p->conn->local_caps->gr_time);
686
}
687

    
688
/**
689
 * bgp_graceful_restart_done - finish active BGP graceful restart
690
 * @c: BGP channel
691
 *
692
 * This function is called when the active BGP graceful restart of the neighbor
693
 * should be finished for channel @c - either successfully (the neighbor sends
694
 * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
695
 * unsuccessfully (the neighbor does not support BGP graceful restart on the new
696
 * session). The function ends the routing table refresh cycle.
697
 */
698
void
699
bgp_graceful_restart_done(struct bgp_channel *c)
700
{
701
  struct bgp_proto *p = (void *) c->c.proto;
702

    
703
  ASSERT(c->gr_active);
704
  c->gr_active = 0;
705
  p->gr_active_num--;
706

    
707
  if (!p->gr_active_num)
708
    BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
709

    
710
  rt_refresh_end(c->c.table, &c->c);
711
}
712

    
713
/**
714
 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
715
 * @t: timer
716
 *
717
 * This function is a timeout hook for @gr_timer, implementing BGP restart time
718
 * limit for reestablisment of the BGP session after the graceful restart. When
719
 * fired, we just proceed with the usual protocol restart.
720
 */
721

    
722
static void
723
bgp_graceful_restart_timeout(timer *t)
724
{
725
  struct bgp_proto *p = t->data;
726

    
727
  BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
728
  bgp_stop(p, 0, NULL, 0);
729
}
730

    
731

    
732
/**
733
 * bgp_refresh_begin - start incoming enhanced route refresh sequence
734
 * @c: BGP channel
735
 *
736
 * This function is called when an incoming enhanced route refresh sequence is
737
 * started by the neighbor, demarcated by the BoRR packet. The function updates
738
 * the load state and starts the routing table refresh cycle. Note that graceful
739
 * restart also uses routing table refresh cycle, but RFC 7313 and load states
740
 * ensure that these two sequences do not overlap.
741
 */
742
void
743
bgp_refresh_begin(struct bgp_channel *c)
744
{
745
  struct bgp_proto *p = (void *) c->c.proto;
746

    
747
  if (c->load_state == BFS_LOADING)
748
  { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
749

    
750
  c->load_state = BFS_REFRESHING;
751
  rt_refresh_begin(c->c.table, &c->c);
752
}
753

    
754
/**
755
 * bgp_refresh_end - finish incoming enhanced route refresh sequence
756
 * @c: BGP channel
757
 *
758
 * This function is called when an incoming enhanced route refresh sequence is
759
 * finished by the neighbor, demarcated by the EoRR packet. The function updates
760
 * the load state and ends the routing table refresh cycle. Routes not received
761
 * during the sequence are removed by the nest.
762
 */
763
void
764
bgp_refresh_end(struct bgp_channel *c)
765
{
766
  struct bgp_proto *p = (void *) c->c.proto;
767

    
768
  if (c->load_state != BFS_REFRESHING)
769
  { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
770

    
771
  c->load_state = BFS_NONE;
772
  rt_refresh_end(c->c.table, &c->c);
773
}
774

    
775

    
776
static void
777
bgp_send_open(struct bgp_conn *conn)
778
{
779
  DBG("BGP: Sending open\n");
780
  conn->sk->rx_hook = bgp_rx;
781
  conn->sk->tx_hook = bgp_tx;
782
  tm_stop(conn->connect_timer);
783
  bgp_schedule_packet(conn, NULL, PKT_OPEN);
784
  bgp_conn_set_state(conn, BS_OPENSENT);
785
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
786
}
787

    
788
static void
789
bgp_connected(sock *sk)
790
{
791
  struct bgp_conn *conn = sk->data;
792
  struct bgp_proto *p = conn->bgp;
793

    
794
  BGP_TRACE(D_EVENTS, "Connected");
795
  bgp_send_open(conn);
796
}
797

    
798
static void
799
bgp_connect_timeout(timer *t)
800
{
801
  struct bgp_conn *conn = t->data;
802
  struct bgp_proto *p = conn->bgp;
803

    
804
  DBG("BGP: connect_timeout\n");
805
  if (p->p.proto_state == PS_START)
806
  {
807
    bgp_close_conn(conn);
808
    bgp_connect(p);
809
  }
810
  else
811
    bgp_conn_enter_idle_state(conn);
812
}
813

    
814
static void
815
bgp_sock_err(sock *sk, int err)
816
{
817
  struct bgp_conn *conn = sk->data;
818
  struct bgp_proto *p = conn->bgp;
819

    
820
  /*
821
   * This error hook may be called either asynchronously from main
822
   * loop, or synchronously from sk_send().  But sk_send() is called
823
   * only from bgp_tx() and bgp_kick_tx(), which are both called
824
   * asynchronously from main loop. Moreover, they end if err hook is
825
   * called. Therefore, we could suppose that it is always called
826
   * asynchronously.
827
   */
828

    
829
  bgp_store_error(p, conn, BE_SOCKET, err);
830

    
831
  if (err)
832
    BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
833
  else
834
    BGP_TRACE(D_EVENTS, "Connection closed");
835

    
836
  if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
837
    bgp_handle_graceful_restart(p);
838

    
839
  bgp_conn_enter_idle_state(conn);
840
}
841

    
842
static void
843
bgp_hold_timeout(timer *t)
844
{
845
  struct bgp_conn *conn = t->data;
846
  struct bgp_proto *p = conn->bgp;
847

    
848
  DBG("BGP: Hold timeout\n");
849

    
850
  /* We are already closing the connection - just do hangup */
851
  if (conn->state == BS_CLOSE)
852
  {
853
    BGP_TRACE(D_EVENTS, "Connection stalled");
854
    bgp_conn_enter_idle_state(conn);
855
    return;
856
  }
857

    
858
  /* If there is something in input queue, we are probably congested
859
     and perhaps just not processed BGP packets in time. */
860

    
861
  if (sk_rx_ready(conn->sk) > 0)
862
    bgp_start_timer(conn->hold_timer, 10);
863
  else
864
    bgp_error(conn, 4, 0, NULL, 0);
865
}
866

    
867
static void
868
bgp_keepalive_timeout(timer *t)
869
{
870
  struct bgp_conn *conn = t->data;
871

    
872
  DBG("BGP: Keepalive timer\n");
873
  bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
874

    
875
  /* Kick TX a bit faster */
876
  if (ev_active(conn->tx_ev))
877
    ev_run(conn->tx_ev);
878
}
879

    
880
static void
881
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
882
{
883
  conn->sk = NULL;
884
  conn->bgp = p;
885

    
886
  conn->packets_to_send = 0;
887
  conn->channels_to_send = 0;
888
  conn->last_channel = 0;
889
  conn->last_channel_count = 0;
890

    
891
  conn->connect_timer        = tm_new_init(p->p.pool, bgp_connect_timeout,         conn, 0, 0);
892
  conn->hold_timer         = tm_new_init(p->p.pool, bgp_hold_timeout,         conn, 0, 0);
893
  conn->keepalive_timer        = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
894

    
895
  conn->tx_ev = ev_new(p->p.pool);
896
  conn->tx_ev->hook = bgp_kick_tx;
897
  conn->tx_ev->data = conn;
898
}
899

    
900
static void
901
bgp_setup_sk(struct bgp_conn *conn, sock *s)
902
{
903
  s->data = conn;
904
  s->err_hook = bgp_sock_err;
905
  s->fast_rx = 1;
906
  conn->sk = s;
907
}
908

    
909
static void
910
bgp_active(struct bgp_proto *p)
911
{
912
  int delay = MAX(1, p->cf->connect_delay_time);
913
  struct bgp_conn *conn = &p->outgoing_conn;
914

    
915
  BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
916
  bgp_setup_conn(p, conn);
917
  bgp_conn_set_state(conn, BS_ACTIVE);
918
  bgp_start_timer(conn->connect_timer, delay);
919
}
920

    
921
/**
922
 * bgp_connect - initiate an outgoing connection
923
 * @p: BGP instance
924
 *
925
 * The bgp_connect() function creates a new &bgp_conn and initiates
926
 * a TCP connection to the peer. The rest of connection setup is governed
927
 * by the BGP state machine as described in the standard.
928
 */
929
static void
930
bgp_connect(struct bgp_proto *p)        /* Enter Connect state and start establishing connection */
931
{
932
  struct bgp_conn *conn = &p->outgoing_conn;
933
  int hops = p->cf->multihop ? : 1;
934

    
935
  DBG("BGP: Connecting\n");
936
  sock *s = sk_new(p->p.pool);
937
  s->type = SK_TCP_ACTIVE;
938
  s->saddr = p->source_addr;
939
  s->daddr = p->cf->remote_ip;
940
  s->dport = p->cf->remote_port;
941
  s->iface = p->neigh ? p->neigh->iface : NULL;
942
  s->vrf = p->p.vrf;
943
  s->ttl = p->cf->ttl_security ? 255 : hops;
944
  s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
945
  s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
946
  s->tos = IP_PREC_INTERNET_CONTROL;
947
  s->password = p->cf->password;
948
  s->tx_hook = bgp_connected;
949
  BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
950
            s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
951
  bgp_setup_conn(p, conn);
952
  bgp_setup_sk(conn, s);
953
  bgp_conn_set_state(conn, BS_CONNECT);
954

    
955
  if (sk_open(s) < 0)
956
    goto err;
957

    
958
  /* Set minimal receive TTL if needed */
959
  if (p->cf->ttl_security)
960
    if (sk_set_min_ttl(s, 256 - hops) < 0)
961
      goto err;
962

    
963
  DBG("BGP: Waiting for connect success\n");
964
  bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
965
  return;
966

    
967
err:
968
  sk_log_error(s, p->p.name);
969
  bgp_sock_err(s, 0);
970
  return;
971
}
972

    
973
/**
974
 * bgp_find_proto - find existing proto for incoming connection
975
 * @sk: TCP socket
976
 *
977
 */
978
static struct bgp_proto *
979
bgp_find_proto(sock *sk)
980
{
981
  struct bgp_proto *p;
982

    
983
  WALK_LIST(p, proto_list)
984
    if ((p->p.proto == &proto_bgp) &&
985
        ipa_equal(p->cf->remote_ip, sk->daddr) &&
986
        (!p->cf->iface  || (p->cf->iface == sk->iface)) &&
987
        (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)) &&
988
        (p->cf->local_port == sk->sport))
989
      return p;
990

    
991
  return NULL;
992
}
993

    
994
/**
995
 * bgp_incoming_connection - handle an incoming connection
996
 * @sk: TCP socket
997
 * @dummy: unused
998
 *
999
 * This function serves as a socket hook for accepting of new BGP
1000
 * connections. It searches a BGP instance corresponding to the peer
1001
 * which has connected and if such an instance exists, it creates a
1002
 * &bgp_conn structure, attaches it to the instance and either sends
1003
 * an Open message or (if there already is an active connection) it
1004
 * closes the new connection by sending a Notification message.
1005
 */
1006
static int
1007
bgp_incoming_connection(sock *sk, uint dummy UNUSED)
1008
{
1009
  struct bgp_proto *p;
1010
  int acc, hops;
1011

    
1012
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
1013
  p = bgp_find_proto(sk);
1014
  if (!p)
1015
  {
1016
    log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
1017
        sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
1018
    rfree(sk);
1019
    return 0;
1020
  }
1021

    
1022
  /*
1023
   * BIRD should keep multiple incoming connections in OpenSent state (for
1024
   * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
1025
   * connections are rejected istead. The exception is the case where an
1026
   * incoming connection triggers a graceful restart.
1027
   */
1028

    
1029
  acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
1030
    (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
1031

    
1032
  if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1033
  {
1034
    bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
1035
    bgp_handle_graceful_restart(p);
1036
    bgp_conn_enter_idle_state(p->conn);
1037
    acc = 1;
1038

    
1039
    /* There might be separate incoming connection in OpenSent state */
1040
    if (p->incoming_conn.state > BS_ACTIVE)
1041
      bgp_close_conn(&p->incoming_conn);
1042
  }
1043

    
1044
  BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
1045
            sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
1046
            sk->dport, acc ? "accepted" : "rejected");
1047

    
1048
  if (!acc)
1049
  {
1050
    rfree(sk);
1051
    return 0;
1052
  }
1053

    
1054
  hops = p->cf->multihop ? : 1;
1055

    
1056
  if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
1057
    goto err;
1058

    
1059
  if (p->cf->ttl_security)
1060
    if (sk_set_min_ttl(sk, 256 - hops) < 0)
1061
      goto err;
1062

    
1063
  if (p->cf->enable_extended_messages)
1064
  {
1065
    sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
1066
    sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
1067
    sk_reallocate(sk);
1068
  }
1069

    
1070
  bgp_setup_conn(p, &p->incoming_conn);
1071
  bgp_setup_sk(&p->incoming_conn, sk);
1072
  bgp_send_open(&p->incoming_conn);
1073
  return 0;
1074

    
1075
err:
1076
  sk_log_error(sk, p->p.name);
1077
  log(L_ERR "%s: Incoming connection aborted", p->p.name);
1078
  rfree(sk);
1079
  return 0;
1080
}
1081

    
1082
static void
1083
bgp_listen_sock_err(sock *sk UNUSED, int err)
1084
{
1085
  if (err == ECONNABORTED)
1086
    log(L_WARN "BGP: Incoming connection aborted");
1087
  else
1088
    log(L_ERR "BGP: Error on listening socket: %M", err);
1089
}
1090

    
1091
static void
1092
bgp_start_neighbor(struct bgp_proto *p)
1093
{
1094
  /* Called only for single-hop BGP sessions */
1095

    
1096
  if (ipa_zero(p->source_addr))
1097
    p->source_addr = p->neigh->ifa->ip;
1098

    
1099
  if (ipa_is_link_local(p->source_addr))
1100
    p->link_addr = p->source_addr;
1101
  else if (p->neigh->iface->llv6)
1102
    p->link_addr = p->neigh->iface->llv6->ip;
1103

    
1104
  bgp_initiate(p);
1105
}
1106

    
1107
static void
1108
bgp_neigh_notify(neighbor *n)
1109
{
1110
  struct bgp_proto *p = (struct bgp_proto *) n->proto;
1111
  int ps = p->p.proto_state;
1112

    
1113
  if (n != p->neigh)
1114
    return;
1115

    
1116
  if ((ps == PS_DOWN) || (ps == PS_STOP))
1117
    return;
1118

    
1119
  int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1120

    
1121
  if (n->scope <= 0)
1122
  {
1123
    if (!prepare)
1124
    {
1125
      BGP_TRACE(D_EVENTS, "Neighbor lost");
1126
      bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1127
      /* Perhaps also run bgp_update_startup_delay(p)? */
1128
      bgp_stop(p, 0, NULL, 0);
1129
    }
1130
  }
1131
  else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1132
  {
1133
    if (!prepare)
1134
    {
1135
      BGP_TRACE(D_EVENTS, "Link down");
1136
      bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1137
      if (ps == PS_UP)
1138
        bgp_update_startup_delay(p);
1139
      bgp_stop(p, 0, NULL, 0);
1140
    }
1141
  }
1142
  else
1143
  {
1144
    if (prepare)
1145
    {
1146
      BGP_TRACE(D_EVENTS, "Neighbor ready");
1147
      bgp_start_neighbor(p);
1148
    }
1149
  }
1150
}
1151

    
1152
static void
1153
bgp_bfd_notify(struct bfd_request *req)
1154
{
1155
  struct bgp_proto *p = req->data;
1156
  int ps = p->p.proto_state;
1157

    
1158
  if (req->down && ((ps == PS_START) || (ps == PS_UP)))
1159
  {
1160
    BGP_TRACE(D_EVENTS, "BFD session down");
1161
    bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
1162
    if (ps == PS_UP)
1163
      bgp_update_startup_delay(p);
1164
    bgp_stop(p, 0, NULL, 0);
1165
  }
1166
}
1167

    
1168
static void
1169
bgp_update_bfd(struct bgp_proto *p, int use_bfd)
1170
{
1171
  if (use_bfd && !p->bfd_req)
1172
    p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
1173
                                     p->cf->multihop ? NULL : p->neigh->iface,
1174
                                     bgp_bfd_notify, p);
1175

    
1176
  if (!use_bfd && p->bfd_req)
1177
  {
1178
    rfree(p->bfd_req);
1179
    p->bfd_req = NULL;
1180
  }
1181
}
1182

    
1183
static void
1184
bgp_reload_routes(struct channel *C)
1185
{
1186
  struct bgp_proto *p = (void *) C->proto;
1187
  struct bgp_channel *c = (void *) C;
1188

    
1189
  ASSERT(p->conn && p->route_refresh);
1190

    
1191
  bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
1192
}
1193

    
1194
static void
1195
bgp_feed_begin(struct channel *C, int initial)
1196
{
1197
  struct bgp_proto *p = (void *) C->proto;
1198
  struct bgp_channel *c = (void *) C;
1199

    
1200
  /* This should not happen */
1201
  if (!p->conn)
1202
    return;
1203

    
1204
  if (initial && p->cf->gr_mode)
1205
    c->feed_state = BFS_LOADING;
1206

    
1207
  /* It is refeed and both sides support enhanced route refresh */
1208
  if (!initial && p->enhanced_refresh)
1209
  {
1210
    /* BoRR must not be sent before End-of-RIB */
1211
    if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
1212
      return;
1213

    
1214
    c->feed_state = BFS_REFRESHING;
1215
    bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
1216
  }
1217
}
1218

    
1219
static void
1220
bgp_feed_end(struct channel *C)
1221
{
1222
  struct bgp_proto *p = (void *) C->proto;
1223
  struct bgp_channel *c = (void *) C;
1224

    
1225
  /* This should not happen */
1226
  if (!p->conn)
1227
    return;
1228

    
1229
  /* Non-demarcated feed ended, nothing to do */
1230
  if (c->feed_state == BFS_NONE)
1231
    return;
1232

    
1233
  /* Schedule End-of-RIB packet */
1234
  if (c->feed_state == BFS_LOADING)
1235
    c->feed_state = BFS_LOADED;
1236

    
1237
  /* Schedule EoRR packet */
1238
  if (c->feed_state == BFS_REFRESHING)
1239
    c->feed_state = BFS_REFRESHED;
1240

    
1241
  /* Kick TX hook */
1242
  bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1243
}
1244

    
1245

    
1246
static void
1247
bgp_start_locked(struct object_lock *lock)
1248
{
1249
  struct bgp_proto *p = lock->data;
1250
  struct bgp_config *cf = p->cf;
1251

    
1252
  if (p->p.proto_state != PS_START)
1253
  {
1254
    DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1255
    return;
1256
  }
1257

    
1258
  DBG("BGP: Got lock\n");
1259

    
1260
  if (cf->multihop)
1261
  {
1262
    /* Multi-hop sessions do not use neighbor entries */
1263
    bgp_initiate(p);
1264
    return;
1265
  }
1266

    
1267
  neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
1268
  if (!n)
1269
  {
1270
    log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
1271
    /* As we do not start yet, we can just disable protocol */
1272
    p->p.disabled = 1;
1273
    bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1274
    proto_notify_state(&p->p, PS_DOWN);
1275
    return;
1276
  }
1277

    
1278
  p->neigh = n;
1279

    
1280
  if (n->scope <= 0)
1281
    BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1282
  else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1283
    BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1284
  else
1285
    bgp_start_neighbor(p);
1286
}
1287

    
1288
static int
1289
bgp_start(struct proto *P)
1290
{
1291
  struct bgp_proto *p = (struct bgp_proto *) P;
1292
  struct object_lock *lock;
1293

    
1294
  DBG("BGP: Startup.\n");
1295
  p->start_state = BSS_PREPARE;
1296
  p->outgoing_conn.state = BS_IDLE;
1297
  p->incoming_conn.state = BS_IDLE;
1298
  p->neigh = NULL;
1299
  p->bfd_req = NULL;
1300
  p->gr_ready = 0;
1301
  p->gr_active_num = 0;
1302

    
1303
  p->event = ev_new(p->p.pool);
1304
  p->event->hook = bgp_decision;
1305
  p->event->data = p;
1306

    
1307
  p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
1308
  p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
1309

    
1310
  p->local_id = proto_get_router_id(P->cf);
1311
  if (p->rr_client)
1312
    p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1313

    
1314
  p->remote_id = 0;
1315
  p->source_addr = p->cf->local_ip;
1316
  p->link_addr = IPA_NONE;
1317

    
1318
  /* XXXX */
1319
  if (p->p.gr_recovery && p->cf->gr_mode)
1320
  {
1321
    struct bgp_channel *c;
1322
    WALK_LIST(c, p->p.channels)
1323
      channel_graceful_restart_lock(&c->c);
1324
  }
1325

    
1326
  /*
1327
   * Before attempting to create the connection, we need to lock the port,
1328
   * so that we are the only instance attempting to talk with that neighbor.
1329
   */
1330

    
1331
  lock = p->lock = olock_new(P->pool);
1332
  lock->addr = p->cf->remote_ip;
1333
  lock->port = p->cf->remote_port;
1334
  lock->iface = p->cf->iface;
1335
  lock->vrf = p->cf->iface ? NULL : p->p.vrf;
1336
  lock->type = OBJLOCK_TCP;
1337
  lock->hook = bgp_start_locked;
1338
  lock->data = p;
1339
  olock_acquire(lock);
1340

    
1341
  return PS_START;
1342
}
1343

    
1344
extern int proto_restart;
1345

    
1346
static int
1347
bgp_shutdown(struct proto *P)
1348
{
1349
  struct bgp_proto *p = (struct bgp_proto *) P;
1350
  uint subcode = 0;
1351

    
1352
  char *message = NULL;
1353
  byte *data = NULL;
1354
  uint len = 0;
1355

    
1356
  BGP_TRACE(D_EVENTS, "Shutdown requested");
1357

    
1358
  switch (P->down_code)
1359
  {
1360
  case PDC_CF_REMOVE:
1361
  case PDC_CF_DISABLE:
1362
    subcode = 3; // Errcode 6, 3 - peer de-configured
1363
    break;
1364

    
1365
  case PDC_CF_RESTART:
1366
    subcode = 6; // Errcode 6, 6 - other configuration change
1367
    break;
1368

    
1369
  case PDC_CMD_DISABLE:
1370
  case PDC_CMD_SHUTDOWN:
1371
    subcode = 2; // Errcode 6, 2 - administrative shutdown
1372
    message = P->message;
1373
    break;
1374

    
1375
  case PDC_CMD_RESTART:
1376
    subcode = 4; // Errcode 6, 4 - administrative reset
1377
    message = P->message;
1378
    break;
1379

    
1380
  case PDC_RX_LIMIT_HIT:
1381
  case PDC_IN_LIMIT_HIT:
1382
    subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1383
    /* log message for compatibility */
1384
    log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1385
    goto limit;
1386

    
1387
  case PDC_OUT_LIMIT_HIT:
1388
    subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1389

    
1390
  limit:
1391
    bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1392
    if (proto_restart)
1393
      bgp_update_startup_delay(p);
1394
    else
1395
      p->startup_delay = 0;
1396
    goto done;
1397
  }
1398

    
1399
  bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1400
  p->startup_delay = 0;
1401

    
1402
  /* RFC 8203 - shutdown communication */
1403
  if (message)
1404
  {
1405
    uint msg_len = strlen(message);
1406
    msg_len = MIN(msg_len, 128);
1407

    
1408
    /* Buffer will be freed automatically by protocol shutdown */
1409
    data = mb_alloc(p->p.pool, msg_len + 1);
1410
    len = msg_len + 1;
1411

    
1412
    data[0] = msg_len;
1413
    memcpy(data+1, message, msg_len);
1414
  }
1415

    
1416
done:
1417
  bgp_stop(p, subcode, data, len);
1418
  return p->p.proto_state;
1419
}
1420

    
1421
static struct proto *
1422
bgp_init(struct proto_config *CF)
1423
{
1424
  struct proto *P = proto_new(CF);
1425
  struct bgp_proto *p = (struct bgp_proto *) P;
1426
  struct bgp_config *cf = (struct bgp_config *) CF;
1427

    
1428
  P->rt_notify = bgp_rt_notify;
1429
  P->import_control = bgp_import_control;
1430
  P->neigh_notify = bgp_neigh_notify;
1431
  P->reload_routes = bgp_reload_routes;
1432
  P->feed_begin = bgp_feed_begin;
1433
  P->feed_end = bgp_feed_end;
1434
  P->rte_better = bgp_rte_better;
1435
  P->rte_mergable = bgp_rte_mergable;
1436
  P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
1437

    
1438
  p->cf = cf;
1439
  p->local_as = cf->local_as;
1440
  p->remote_as = cf->remote_as;
1441
  p->public_as = cf->local_as;
1442
  p->is_internal = (cf->local_as == cf->remote_as);
1443
  p->is_interior = p->is_internal || cf->confederation_member;
1444
  p->rs_client = cf->rs_client;
1445
  p->rr_client = cf->rr_client;
1446

    
1447
  /* Confederation ID is used for truly external peers */
1448
  if (cf->confederation && !p->is_interior)
1449
    p->public_as = cf->confederation;
1450

    
1451
  /* Add all channels */
1452
  struct bgp_channel_config *cc;
1453
  WALK_LIST(cc, CF->channels)
1454
    proto_add_channel(P, &cc->c);
1455

    
1456
  return P;
1457
}
1458

    
1459
static void
1460
bgp_channel_init(struct channel *C, struct channel_config *CF)
1461
{
1462
  struct bgp_channel *c = (void *) C;
1463
  struct bgp_channel_config *cf = (void *) CF;
1464

    
1465
  c->cf = cf;
1466
  c->afi = cf->afi;
1467
  c->desc = cf->desc;
1468

    
1469
  if (cf->igp_table_ip4)
1470
    c->igp_table_ip4 = cf->igp_table_ip4->table;
1471

    
1472
  if (cf->igp_table_ip6)
1473
    c->igp_table_ip6 = cf->igp_table_ip6->table;
1474
}
1475

    
1476
static int
1477
bgp_channel_start(struct channel *C)
1478
{
1479
  struct bgp_proto *p = (void *) C->proto;
1480
  struct bgp_channel *c = (void *) C;
1481
  ip_addr src = p->source_addr;
1482

    
1483
  if (c->igp_table_ip4)
1484
    rt_lock_table(c->igp_table_ip4);
1485

    
1486
  if (c->igp_table_ip6)
1487
    rt_lock_table(c->igp_table_ip6);
1488

    
1489
  c->pool = p->p.pool; // XXXX
1490
  bgp_init_bucket_table(c);
1491
  bgp_init_prefix_table(c);
1492

    
1493
  c->next_hop_addr = c->cf->next_hop_addr;
1494
  c->link_addr = IPA_NONE;
1495
  c->packets_to_send = 0;
1496

    
1497
  /* Try to use source address as next hop address */
1498
  if (ipa_zero(c->next_hop_addr))
1499
  {
1500
    if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
1501
      c->next_hop_addr = src;
1502

    
1503
    if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
1504
      c->next_hop_addr = src;
1505
  }
1506

    
1507
  /* Use preferred addresses associated with interface / source address */
1508
  if (ipa_zero(c->next_hop_addr))
1509
  {
1510
    /* We know the iface for single-hop, we make lookup for multihop */
1511
    struct neighbor *nbr = p->neigh ?: neigh_find2(&p->p, &src, NULL, 0);
1512
    struct iface *iface = nbr ? nbr->iface : NULL;
1513

    
1514
    if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
1515
      c->next_hop_addr = iface->addr4->ip;
1516

    
1517
    if (bgp_channel_is_ipv6(c) && iface && iface->addr6)
1518
      c->next_hop_addr = iface->addr6->ip;
1519
  }
1520

    
1521
  /* Exit if no feasible next hop address is found */
1522
  if (ipa_zero(c->next_hop_addr))
1523
  {
1524
    log(L_WARN "%s: Missing next hop address", p->p.name);
1525
    return 0;
1526
  }
1527

    
1528
  /* Set link-local address for IPv6 single-hop BGP */
1529
  if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
1530
  {
1531
    c->link_addr = p->link_addr;
1532

    
1533
    if (ipa_zero(c->link_addr))
1534
      log(L_WARN "%s: Missing link-local address", p->p.name);
1535
  }
1536

    
1537
  /* Link local address is already in c->link_addr */
1538
  if (ipa_is_link_local(c->next_hop_addr))
1539
    c->next_hop_addr = IPA_NONE;
1540

    
1541
  return 0; /* XXXX: Currently undefined */
1542
}
1543

    
1544
static void
1545
bgp_channel_shutdown(struct channel *C)
1546
{
1547
  struct bgp_channel *c = (void *) C;
1548

    
1549
  /* XXXX: cleanup bucket and prefix tables */
1550

    
1551
  c->next_hop_addr = IPA_NONE;
1552
  c->link_addr = IPA_NONE;
1553
}
1554

    
1555
static void
1556
bgp_channel_cleanup(struct channel *C)
1557
{
1558
  struct bgp_channel *c = (void *) C;
1559

    
1560
  if (c->igp_table_ip4)
1561
    rt_unlock_table(c->igp_table_ip4);
1562

    
1563
  if (c->igp_table_ip6)
1564
    rt_unlock_table(c->igp_table_ip6);
1565
}
1566

    
1567
static inline struct bgp_channel_config *
1568
bgp_find_channel_config(struct bgp_config *cf, u32 afi)
1569
{
1570
  struct bgp_channel_config *cc;
1571

    
1572
  WALK_LIST(cc, cf->c.channels)
1573
    if (cc->afi == afi)
1574
      return cc;
1575

    
1576
  return NULL;
1577
}
1578

    
1579
struct rtable_config *
1580
bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
1581
{
1582
  struct bgp_channel_config *cc2;
1583
  struct rtable_config *tab;
1584

    
1585
  /* First, try table connected by the channel */
1586
  if (cc->c.table->addr_type == type)
1587
    return cc->c.table;
1588

    
1589
  /* Find paired channel with the same SAFI but the other AFI */
1590
  u32 afi2 = cc->afi ^ 0x30000;
1591
  cc2 = bgp_find_channel_config(cf, afi2);
1592

    
1593
  /* Second, try IGP table configured in the paired channel */
1594
  if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
1595
    return tab;
1596

    
1597
  /* Third, try table connected by the paired channel */
1598
  if (cc2 && (cc2->c.table->addr_type == type))
1599
    return cc2->c.table;
1600

    
1601
  /* Last, try default table of given type */
1602
  if (tab = cf->c.global->def_tables[type])
1603
    return tab;
1604

    
1605
  cf_error("Undefined IGP table");
1606
}
1607

    
1608

    
1609
void
1610
bgp_postconfig(struct proto_config *CF)
1611
{
1612
  struct bgp_config *cf = (void *) CF;
1613
  int internal = (cf->local_as == cf->remote_as);
1614

    
1615
  /* Do not check templates at all */
1616
  if (cf->c.class == SYM_TEMPLATE)
1617
    return;
1618

    
1619

    
1620
  /* EBGP direct by default, IBGP multihop by default */
1621
  if (cf->multihop < 0)
1622
    cf->multihop = internal ? 64 : 0;
1623

    
1624

    
1625
  if (!cf->local_as)
1626
    cf_error("Local AS number must be set");
1627

    
1628
  if (ipa_zero(cf->remote_ip))
1629
    cf_error("Neighbor must be configured");
1630

    
1631
  if (!cf->remote_as)
1632
    cf_error("Remote AS number must be set");
1633

    
1634
  if (ipa_is_link_local(cf->remote_ip) && !cf->iface)
1635
    cf_error("Link-local neighbor address requires specified interface");
1636

    
1637
  if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
1638
    cf_error("Neighbor AS number out of range (AS4 not available)");
1639

    
1640
  if (!internal && cf->rr_client)
1641
    cf_error("Only internal neighbor can be RR client");
1642

    
1643
  if (internal && cf->rs_client)
1644
    cf_error("Only external neighbor can be RS client");
1645

    
1646
  if (!cf->confederation && cf->confederation_member)
1647
    cf_error("Confederation ID must be set for member sessions");
1648

    
1649
  if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
1650
                       ipa_is_link_local(cf->remote_ip)))
1651
    cf_error("Multihop BGP cannot be used with link-local addresses");
1652

    
1653
  if (cf->multihop && cf->iface)
1654
    cf_error("Multihop BGP cannot be bound to interface");
1655

    
1656
  if (cf->multihop && cf->check_link)
1657
    cf_error("Multihop BGP cannot depend on link state");
1658

    
1659
  if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
1660
    cf_error("Multihop BGP with BFD requires specified local address");
1661

    
1662

    
1663
  struct bgp_channel_config *cc;
1664
  WALK_LIST(cc, CF->channels)
1665
  {
1666
    /* Disable after error incompatible with restart limit action */
1667
    if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
1668
      cc->c.in_limit.action = PLA_DISABLE;
1669

    
1670
    /* Different default based on rs_client */
1671
    if (!cc->missing_lladdr)
1672
      cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
1673

    
1674
    /* Different default for gw_mode */
1675
    if (!cc->gw_mode)
1676
      cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
1677

    
1678
    /* Default based on proto config */
1679
    if (cc->gr_able == 0xff)
1680
      cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
1681

    
1682
    /* Default values of IGP tables */
1683
    if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
1684
    {
1685
      if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
1686
        cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
1687

    
1688
      if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
1689
        cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
1690

    
1691
      if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
1692
        cf_error("Mismatched IGP table type");
1693

    
1694
      if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
1695
        cf_error("Mismatched IGP table type");
1696
    }
1697

    
1698
    if (cf->multihop && (cc->gw_mode == GW_DIRECT))
1699
      cf_error("Multihop BGP cannot use direct gateway mode");
1700

    
1701
    if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
1702
      cf_error("BGP in recursive mode prohibits sorted table");
1703

    
1704
    if (cf->deterministic_med && cc->c.table->sorted)
1705
      cf_error("BGP with deterministic MED prohibits sorted table");
1706

    
1707
    if (cc->secondary && !cc->c.table->sorted)
1708
      cf_error("BGP with secondary option requires sorted table");
1709
  }
1710
}
1711

    
1712
static int
1713
bgp_reconfigure(struct proto *P, struct proto_config *CF)
1714
{
1715
  struct bgp_proto *p = (void *) P;
1716
  struct bgp_config *new = (void *) CF;
1717
  struct bgp_config *old = p->cf;
1718

    
1719
  if (proto_get_router_id(CF) != p->local_id)
1720
    return 0;
1721

    
1722
  int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1723
                     ((byte *) new) + sizeof(struct proto_config),
1724
                     // password item is last and must be checked separately
1725
                     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1726
    && ((!old->password && !new->password)
1727
        || (old->password && new->password && !strcmp(old->password, new->password)));
1728

    
1729
  /* FIXME: Move channel reconfiguration to generic protocol code ? */
1730
  struct channel *C, *C2;
1731
  struct bgp_channel_config *cc;
1732

    
1733
  WALK_LIST(C, p->p.channels)
1734
    C->stale = 1;
1735

    
1736
  WALK_LIST(cc, new->c.channels)
1737
  {
1738
    C = (struct channel *) bgp_find_channel(p, cc->afi);
1739
    same = proto_configure_channel(P, &C, &cc->c) && same;
1740
    C->stale = 0;
1741
  }
1742

    
1743
  WALK_LIST_DELSAFE(C, C2, p->p.channels)
1744
    if (C->stale)
1745
      same = proto_configure_channel(P, &C, NULL) && same;
1746

    
1747

    
1748
  if (same && (p->start_state > BSS_PREPARE))
1749
    bgp_update_bfd(p, new->bfd);
1750

    
1751
  /* We should update our copy of configuration ptr as old configuration will be freed */
1752
  if (same)
1753
    p->cf = new;
1754

    
1755
  return same;
1756
}
1757

    
1758
#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
1759

    
1760
static int
1761
bgp_channel_reconfigure(struct channel *C, struct channel_config *CC)
1762
{
1763
  struct bgp_channel *c = (void *) C;
1764
  struct bgp_channel_config *new = (void *) CC;
1765
  struct bgp_channel_config *old = c->cf;
1766

    
1767
  if (memcmp(((byte *) old) + sizeof(struct channel_config),
1768
             ((byte *) new) + sizeof(struct channel_config),
1769
             /* Remaining items must be checked separately */
1770
             OFFSETOF(struct bgp_channel_config, rest) - sizeof(struct channel_config)))
1771
    return 0;
1772

    
1773
  /* Check change in IGP tables */
1774
  if ((IGP_TABLE(old, ip4) != IGP_TABLE(new, ip4)) ||
1775
      (IGP_TABLE(old, ip6) != IGP_TABLE(new, ip6)))
1776
    return 0;
1777

    
1778
  c->cf = new;
1779
  return 1;
1780
}
1781

    
1782
static void
1783
bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
1784
{
1785
  /* Just a shallow copy */
1786
}
1787

    
1788

    
1789
/**
1790
 * bgp_error - report a protocol error
1791
 * @c: connection
1792
 * @code: error code (according to the RFC)
1793
 * @subcode: error sub-code
1794
 * @data: data to be passed in the Notification message
1795
 * @len: length of the data
1796
 *
1797
 * bgp_error() sends a notification packet to tell the other side that a protocol
1798
 * error has occurred (including the data considered erroneous if possible) and
1799
 * closes the connection.
1800
 */
1801
void
1802
bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
1803
{
1804
  struct bgp_proto *p = c->bgp;
1805

    
1806
  if (c->state == BS_CLOSE)
1807
    return;
1808

    
1809
  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
1810
  bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1811
  bgp_conn_enter_close_state(c);
1812

    
1813
  c->notify_code = code;
1814
  c->notify_subcode = subcode;
1815
  c->notify_data = data;
1816
  c->notify_size = (len > 0) ? len : 0;
1817
  bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
1818

    
1819
  if (code != 6)
1820
  {
1821
    bgp_update_startup_delay(p);
1822
    bgp_stop(p, 0, NULL, 0);
1823
  }
1824
}
1825

    
1826
/**
1827
 * bgp_store_error - store last error for status report
1828
 * @p: BGP instance
1829
 * @c: connection
1830
 * @class: error class (BE_xxx constants)
1831
 * @code: error code (class specific)
1832
 *
1833
 * bgp_store_error() decides whether given error is interesting enough
1834
 * and store that error to last_error variables of @p
1835
 */
1836
void
1837
bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1838
{
1839
  /* During PS_UP, we ignore errors on secondary connection */
1840
  if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1841
    return;
1842

    
1843
  /* During PS_STOP, we ignore any errors, as we want to report
1844
   * the error that caused transition to PS_STOP
1845
   */
1846
  if (p->p.proto_state == PS_STOP)
1847
    return;
1848

    
1849
  p->last_error_class = class;
1850
  p->last_error_code = code;
1851
}
1852

    
1853
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1854
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1855
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1856
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1857

    
1858
static const char *
1859
bgp_last_errmsg(struct bgp_proto *p)
1860
{
1861
  switch (p->last_error_class)
1862
  {
1863
  case BE_MISC:
1864
    return bgp_misc_errors[p->last_error_code];
1865
  case BE_SOCKET:
1866
    return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1867
  case BE_BGP_RX:
1868
  case BE_BGP_TX:
1869
    return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1870
  case BE_AUTO_DOWN:
1871
    return bgp_auto_errors[p->last_error_code];
1872
  default:
1873
    return "";
1874
  }
1875
}
1876

    
1877
static const char *
1878
bgp_state_dsc(struct bgp_proto *p)
1879
{
1880
  if (p->p.proto_state == PS_DOWN)
1881
    return "Down";
1882

    
1883
  int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1884
  if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1885
    return "Passive";
1886

    
1887
  return bgp_state_names[state];
1888
}
1889

    
1890
static void
1891
bgp_get_status(struct proto *P, byte *buf)
1892
{
1893
  struct bgp_proto *p = (struct bgp_proto *) P;
1894

    
1895
  const char *err1 = bgp_err_classes[p->last_error_class];
1896
  const char *err2 = bgp_last_errmsg(p);
1897

    
1898
  if (P->proto_state == PS_DOWN)
1899
    bsprintf(buf, "%s%s", err1, err2);
1900
  else
1901
    bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1902
}
1903

    
1904
static void
1905
bgp_show_afis(int code, char *s, u32 *afis, uint count)
1906
{
1907
  buffer b;
1908
  LOG_BUFFER_INIT(b);
1909

    
1910
  buffer_puts(&b, s);
1911

    
1912
  for (u32 *af = afis; af < (afis + count); af++)
1913
  {
1914
    const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
1915
    if (desc)
1916
      buffer_print(&b, " %s", desc->name);
1917
    else
1918
      buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
1919
  }
1920

    
1921
  if (b.pos == b.end)
1922
    strcpy(b.end - 32, " ... <too long>");
1923

    
1924
  cli_msg(code, b.start);
1925
}
1926

    
1927
static void
1928
bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
1929
{
1930
  struct bgp_af_caps *ac;
1931
  uint any_mp_bgp = 0;
1932
  uint any_gr_able = 0;
1933
  uint any_add_path = 0;
1934
  uint any_ext_next_hop = 0;
1935
  u32 *afl1 = alloca(caps->af_count * sizeof(u32));
1936
  u32 *afl2 = alloca(caps->af_count * sizeof(u32));
1937
  uint afn1, afn2;
1938

    
1939
  WALK_AF_CAPS(caps, ac)
1940
  {
1941
    any_mp_bgp |= ac->ready;
1942
    any_gr_able |= ac->gr_able;
1943
    any_add_path |= ac->add_path;
1944
    any_ext_next_hop |= ac->ext_next_hop;
1945
  }
1946

    
1947
  if (any_mp_bgp)
1948
  {
1949
    cli_msg(-1006, "      Multiprotocol");
1950

    
1951
    afn1 = 0;
1952
    WALK_AF_CAPS(caps, ac)
1953
      if (ac->ready)
1954
        afl1[afn1++] = ac->afi;
1955

    
1956
    bgp_show_afis(-1006, "        AF announced:", afl1, afn1);
1957
  }
1958

    
1959
  if (caps->route_refresh)
1960
    cli_msg(-1006, "      Route refresh");
1961

    
1962
  if (any_ext_next_hop)
1963
  {
1964
    cli_msg(-1006, "      Extended next hop");
1965

    
1966
    afn1 = 0;
1967
    WALK_AF_CAPS(caps, ac)
1968
      if (ac->ext_next_hop)
1969
        afl1[afn1++] = ac->afi;
1970

    
1971
    bgp_show_afis(-1006, "        IPv6 nexthop:", afl1, afn1);
1972
  }
1973

    
1974
  if (caps->ext_messages)
1975
    cli_msg(-1006, "      Extended message");
1976

    
1977
  if (caps->gr_aware)
1978
    cli_msg(-1006, "      Graceful restart");
1979

    
1980
  if (any_gr_able)
1981
  {
1982
    /* Continues from gr_aware */
1983
    cli_msg(-1006, "        Restart time: %u", caps->gr_time);
1984
    if (caps->gr_flags & BGP_GRF_RESTART)
1985
      cli_msg(-1006, "        Restart recovery");
1986

    
1987
    afn1 = afn2 = 0;
1988
    WALK_AF_CAPS(caps, ac)
1989
    {
1990
      if (ac->gr_able)
1991
        afl1[afn1++] = ac->afi;
1992

    
1993
      if (ac->gr_af_flags & BGP_GRF_FORWARDING)
1994
        afl2[afn2++] = ac->afi;
1995
    }
1996

    
1997
    bgp_show_afis(-1006, "        AF supported:", afl1, afn1);
1998
    bgp_show_afis(-1006, "        AF preserved:", afl2, afn2);
1999
  }
2000

    
2001
  if (caps->as4_support)
2002
    cli_msg(-1006, "      4-octet AS numbers");
2003

    
2004
  if (any_add_path)
2005
  {
2006
    cli_msg(-1006, "      ADD-PATH");
2007

    
2008
    afn1 = afn2 = 0;
2009
    WALK_AF_CAPS(caps, ac)
2010
    {
2011
      if (ac->add_path & BGP_ADD_PATH_RX)
2012
        afl1[afn1++] = ac->afi;
2013

    
2014
      if (ac->add_path & BGP_ADD_PATH_TX)
2015
        afl2[afn2++] = ac->afi;
2016
    }
2017

    
2018
    bgp_show_afis(-1006, "        RX:", afl1, afn1);
2019
    bgp_show_afis(-1006, "        TX:", afl2, afn2);
2020
  }
2021

    
2022
  if (caps->enhanced_refresh)
2023
    cli_msg(-1006, "      Enhanced refresh");
2024
}
2025

    
2026
static void
2027
bgp_show_proto_info(struct proto *P)
2028
{
2029
  struct bgp_proto *p = (struct bgp_proto *) P;
2030

    
2031
  cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
2032
  cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
2033
  cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
2034

    
2035
  if (p->gr_active_num)
2036
    cli_msg(-1006, "    Neighbor graceful restart active");
2037

    
2038
  if (P->proto_state == PS_START)
2039
  {
2040
    struct bgp_conn *oc = &p->outgoing_conn;
2041

    
2042
    if ((p->start_state < BSS_CONNECT) &&
2043
        (tm_active(p->startup_timer)))
2044
      cli_msg(-1006, "    Error wait:       %t/%u",
2045
              tm_remains(p->startup_timer), p->startup_delay);
2046

    
2047
    if ((oc->state == BS_ACTIVE) &&
2048
        (tm_active(oc->connect_timer)))
2049
      cli_msg(-1006, "    Connect delay:    %t/%u",
2050
              tm_remains(oc->connect_timer), p->cf->connect_delay_time);
2051

    
2052
    if (p->gr_active_num && tm_active(p->gr_timer))
2053
      cli_msg(-1006, "    Restart timer:    %t/-",
2054
              tm_remains(p->gr_timer));
2055
  }
2056
  else if (P->proto_state == PS_UP)
2057
  {
2058
    cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
2059
    cli_msg(-1006, "    Local capabilities");
2060
    bgp_show_capabilities(p, p->conn->local_caps);
2061
    cli_msg(-1006, "    Neighbor capabilities");
2062
    bgp_show_capabilities(p, p->conn->remote_caps);
2063
/* XXXX
2064
      cli_msg(-1006, "    Session:          %s%s%s%s%s%s%s%s",
2065
              p->is_internal ? "internal" : "external",
2066
              p->cf->multihop ? " multihop" : "",
2067
              p->rr_client ? " route-reflector" : "",
2068
              p->rs_client ? " route-server" : "",
2069
              p->as4_session ? " AS4" : "",
2070
              p->add_path_rx ? " add-path-rx" : "",
2071
              p->add_path_tx ? " add-path-tx" : "",
2072
              p->ext_messages ? " ext-messages" : "");
2073
*/
2074
    cli_msg(-1006, "    Source address:   %I", p->source_addr);
2075
    cli_msg(-1006, "    Hold timer:       %t/%u",
2076
            tm_remains(p->conn->hold_timer), p->conn->hold_time);
2077
    cli_msg(-1006, "    Keepalive timer:  %t/%u",
2078
            tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
2079
  }
2080

    
2081
  if ((p->last_error_class != BE_NONE) &&
2082
      (p->last_error_class != BE_MAN_DOWN))
2083
  {
2084
    const char *err1 = bgp_err_classes[p->last_error_class];
2085
    const char *err2 = bgp_last_errmsg(p);
2086
    cli_msg(-1006, "    Last error:       %s%s", err1, err2);
2087
  }
2088

    
2089
  {
2090
    /* XXXX ?? */
2091
    struct bgp_channel *c;
2092
    WALK_LIST(c, p->p.channels)
2093
    {
2094
      channel_show_info(&c->c);
2095

    
2096
      if (ipa_zero(c->link_addr))
2097
        cli_msg(-1006, "    BGP Next hop:   %I", c->next_hop_addr);
2098
      else
2099
        cli_msg(-1006, "    BGP Next hop:   %I %I", c->next_hop_addr, c->link_addr);
2100

    
2101
      if (c->igp_table_ip4)
2102
        cli_msg(-1006, "    IGP IPv4 table: %s", c->igp_table_ip4->name);
2103

    
2104
      if (c->igp_table_ip6)
2105
        cli_msg(-1006, "    IGP IPv6 table: %s", c->igp_table_ip6->name);
2106
    }
2107
  }
2108
}
2109

    
2110
struct channel_class channel_bgp = {
2111
  .channel_size =        sizeof(struct bgp_channel),
2112
  .config_size =        sizeof(struct bgp_channel_config),
2113
  .init =                bgp_channel_init,
2114
  .start =                bgp_channel_start,
2115
  .shutdown =                bgp_channel_shutdown,
2116
  .cleanup =                bgp_channel_cleanup,
2117
  .reconfigure =        bgp_channel_reconfigure,
2118
};
2119

    
2120
struct protocol proto_bgp = {
2121
  .name =                 "BGP",
2122
  .template =                 "bgp%d",
2123
  .attr_class =         EAP_BGP,
2124
  .preference =         DEF_PREF_BGP,
2125
  .channel_mask =        NB_IP | NB_VPN | NB_FLOW,
2126
  .proto_size =                sizeof(struct bgp_proto),
2127
  .config_size =        sizeof(struct bgp_config),
2128
  .postconfig =                bgp_postconfig,
2129
  .init =                 bgp_init,
2130
  .start =                 bgp_start,
2131
  .shutdown =                 bgp_shutdown,
2132
  .reconfigure =         bgp_reconfigure,
2133
  .copy_config =         bgp_copy_config,
2134
  .get_status =         bgp_get_status,
2135
  .get_attr =                 bgp_get_attr,
2136
  .get_route_info =         bgp_get_route_info,
2137
  .show_proto_info =         bgp_show_proto_info
2138
};