Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / bgp.c @ f4a60a9b

History | View | Annotate | Download (43.2 KB)

1
/*
2
 *        BIRD -- The Border Gateway Protocol
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Border Gateway Protocol
11
 *
12
 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13
 * connection and most of the interface with BIRD core, |packets.c| handling
14
 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15
 * manipulation with BGP attribute lists.
16
 *
17
 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18
 * architecture which is able to keep all the information needed by BGP in the
19
 * primary routing table, therefore no complex data structures like a central
20
 * BGP table are needed. This increases memory footprint of a BGP router with
21
 * many connections, but not too much and, which is more important, it makes
22
 * BGP much easier to implement.
23
 *
24
 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25
 * structure to which are attached individual connections represented by &bgp_connection
26
 * (usually, there exists only one connection, but during BGP session setup, there
27
 * can be more of them). The connections are handled according to the BGP state machine
28
 * defined in the RFC with all the timers and all the parameters configurable.
29
 *
30
 * In incoming direction, we listen on the connection's socket and each time we receive
31
 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32
 * passes complete packets to bgp_rx_packet() which distributes the packet according
33
 * to its type.
34
 *
35
 * In outgoing direction, we gather all the routing updates and sort them to buckets
36
 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37
 * of &rta's and a &fib which helps us to find if we already have another route for
38
 * the same destination queued for sending, so that we can replace it with the new one
39
 * immediately instead of sending both updates). There also exists a special bucket holding
40
 * all the route withdrawals which cannot be queued anywhere else as they don't have any
41
 * attributes. If we have any packet to send (due to either new routes or the connection
42
 * tracking code wanting to send a Open, Keepalive or Notification message), we call
43
 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44
 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45
 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46
 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47
 * type if we have more data of the same type to send.
48
 *
49
 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50
 * of the attribute blocks and translating them to the language of BIRD's extended attributes
51
 * and bgp_encode_attrs() which does the converse. Both functions are built around a
52
 * @bgp_attr_table array describing all important characteristics of all known attributes.
53
 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54
 *
55
 * BGP protocol implements graceful restart in both restarting (local restart)
56
 * and receiving (neighbor restart) roles. The first is handled mostly by the
57
 * graceful restart code in the nest, BGP protocol just handles capabilities,
58
 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
59
 * The second is implemented by internal restart of the BGP state to %BS_IDLE
60
 * and protocol state to %PS_START, but keeping the protocol up from the core
61
 * point of view and therefore maintaining received routes. Routing table
62
 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
63
 * stale routes after reestablishment of BGP session during graceful restart.
64
 */
65

    
66
#undef LOCAL_DEBUG
67

    
68
#include "nest/bird.h"
69
#include "nest/iface.h"
70
#include "nest/protocol.h"
71
#include "nest/route.h"
72
#include "nest/cli.h"
73
#include "nest/locks.h"
74
#include "conf/conf.h"
75
#include "lib/socket.h"
76
#include "lib/resource.h"
77
#include "lib/string.h"
78

    
79
#include "bgp.h"
80

    
81

    
82
struct linpool *bgp_linpool;                /* Global temporary pool */
83
static sock *bgp_listen_sk;                /* Global listening socket */
84
static int bgp_counter;                        /* Number of protocol instances using the listening socket */
85

    
86
static void bgp_close(struct bgp_proto *p, int apply_md5);
87
static void bgp_connect(struct bgp_proto *p);
88
static void bgp_active(struct bgp_proto *p);
89
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
90
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
91

    
92

    
93
/**
94
 * bgp_open - open a BGP instance
95
 * @p: BGP instance
96
 *
97
 * This function allocates and configures shared BGP resources.
98
 * Should be called as the last step during initialization
99
 * (when lock is acquired and neighbor is ready).
100
 * When error, state changed to PS_DOWN, -1 is returned and caller
101
 * should return immediately.
102
 */
103
static int
104
bgp_open(struct bgp_proto *p)
105
{
106
  struct config *cfg = p->cf->c.global;
107
  int errcode;
108

    
109
  if (!bgp_listen_sk)
110
    bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
111

    
112
  if (!bgp_listen_sk)
113
    {
114
      errcode = BEM_NO_SOCKET;
115
      goto err;
116
    }
117

    
118
  if (!bgp_linpool)
119
    bgp_linpool = lp_new(&root_pool, 4080);
120

    
121
  bgp_counter++;
122

    
123
  if (p->cf->password)
124
    if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, p->cf->password) < 0)
125
      {
126
        sk_log_error(bgp_listen_sk, p->p.name);
127
        bgp_close(p, 0);
128
        errcode = BEM_INVALID_MD5;
129
        goto err;
130
      }
131

    
132
  return 0;
133

    
134
err:
135
  p->p.disabled = 1;
136
  bgp_store_error(p, NULL, BE_MISC, errcode);
137
  proto_notify_state(&p->p, PS_DOWN);
138
  return -1;
139
}
140

    
141
static void
142
bgp_startup(struct bgp_proto *p)
143
{
144
  BGP_TRACE(D_EVENTS, "Started");
145
  p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
146

    
147
  if (!p->cf->passive)
148
    bgp_active(p);
149
}
150

    
151
static void
152
bgp_startup_timeout(timer *t)
153
{
154
  bgp_startup(t->data);
155
}
156

    
157

    
158
static void
159
bgp_initiate(struct bgp_proto *p)
160
{
161
  int rv = bgp_open(p);
162
  if (rv < 0)
163
    return;
164

    
165
  if (p->cf->bfd)
166
    bgp_update_bfd(p, p->cf->bfd);
167

    
168
  if (p->startup_delay)
169
    {
170
      p->start_state = BSS_DELAY;
171
      BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
172
      bgp_start_timer(p->startup_timer, p->startup_delay);
173
    }
174
  else
175
    bgp_startup(p);
176
}
177

    
178
/**
179
 * bgp_close - close a BGP instance
180
 * @p: BGP instance
181
 * @apply_md5: 0 to disable unsetting MD5 auth
182
 *
183
 * This function frees and deconfigures shared BGP resources.
184
 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
185
 * from failed bgp_open().
186
 */
187
static void
188
bgp_close(struct bgp_proto *p, int apply_md5)
189
{
190
  ASSERT(bgp_counter);
191
  bgp_counter--;
192

    
193
  if (p->cf->password && apply_md5)
194
    if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, NULL) < 0)
195
      sk_log_error(bgp_listen_sk, p->p.name);
196

    
197
  if (!bgp_counter)
198
    {
199
      rfree(bgp_listen_sk);
200
      bgp_listen_sk = NULL;
201
      rfree(bgp_linpool);
202
      bgp_linpool = NULL;
203
    }
204
}
205

    
206
/**
207
 * bgp_start_timer - start a BGP timer
208
 * @t: timer
209
 * @value: time to fire (0 to disable the timer)
210
 *
211
 * This functions calls tm_start() on @t with time @value and the
212
 * amount of randomization suggested by the BGP standard. Please use
213
 * it for all BGP timers.
214
 */
215
void
216
bgp_start_timer(timer *t, int value)
217
{
218
  if (value)
219
    {
220
      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
221
      t->randomize = value / 4;
222
      tm_start(t, value - t->randomize);
223
    }
224
  else
225
    tm_stop(t);
226
}
227

    
228
/**
229
 * bgp_close_conn - close a BGP connection
230
 * @conn: connection to close
231
 *
232
 * This function takes a connection described by the &bgp_conn structure,
233
 * closes its socket and frees all resources associated with it.
234
 */
235
void
236
bgp_close_conn(struct bgp_conn *conn)
237
{
238
  // struct bgp_proto *p = conn->bgp;
239

    
240
  DBG("BGP: Closing connection\n");
241
  conn->packets_to_send = 0;
242
  rfree(conn->connect_retry_timer);
243
  conn->connect_retry_timer = NULL;
244
  rfree(conn->keepalive_timer);
245
  conn->keepalive_timer = NULL;
246
  rfree(conn->hold_timer);
247
  conn->hold_timer = NULL;
248
  rfree(conn->sk);
249
  conn->sk = NULL;
250
  rfree(conn->tx_ev);
251
  conn->tx_ev = NULL;
252
}
253

    
254

    
255
/**
256
 * bgp_update_startup_delay - update a startup delay
257
 * @p: BGP instance
258
 *
259
 * This function updates a startup delay that is used to postpone next BGP connect.
260
 * It also handles disable_after_error and might stop BGP instance when error
261
 * happened and disable_after_error is on.
262
 *
263
 * It should be called when BGP protocol error happened.
264
 */
265
void
266
bgp_update_startup_delay(struct bgp_proto *p)
267
{
268
  struct bgp_config *cf = p->cf;
269

    
270
  DBG("BGP: Updating startup delay\n");
271

    
272
  if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
273
    p->startup_delay = 0;
274

    
275
  p->last_proto_error = now;
276

    
277
  if (cf->disable_after_error)
278
    {
279
      p->startup_delay = 0;
280
      p->p.disabled = 1;
281
      return;
282
    }
283

    
284
  if (!p->startup_delay)
285
    p->startup_delay = cf->error_delay_time_min;
286
  else
287
    p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
288
}
289

    
290
static void
291
bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
292
{
293
  switch (conn->state)
294
    {
295
    case BS_IDLE:
296
    case BS_CLOSE:
297
      return;
298
    case BS_CONNECT:
299
    case BS_ACTIVE:
300
      bgp_conn_enter_idle_state(conn);
301
      return;
302
    case BS_OPENSENT:
303
    case BS_OPENCONFIRM:
304
    case BS_ESTABLISHED:
305
      bgp_error(conn, 6, subcode, NULL, 0);
306
      return;
307
    default:
308
      bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
309
    }
310
}
311

    
312
static void
313
bgp_down(struct bgp_proto *p)
314
{
315
  if (p->start_state > BSS_PREPARE)
316
    bgp_close(p, 1);
317

    
318
  BGP_TRACE(D_EVENTS, "Down");
319
  proto_notify_state(&p->p, PS_DOWN);
320
}
321

    
322
static void
323
bgp_decision(void *vp)
324
{
325
  struct bgp_proto *p = vp;
326

    
327
  DBG("BGP: Decision start\n");
328
  if ((p->p.proto_state == PS_START)
329
      && (p->outgoing_conn.state == BS_IDLE)
330
      && (p->incoming_conn.state != BS_OPENCONFIRM)
331
      && (!p->cf->passive))
332
    bgp_active(p);
333

    
334
  if ((p->p.proto_state == PS_STOP)
335
      && (p->outgoing_conn.state == BS_IDLE)
336
      && (p->incoming_conn.state == BS_IDLE))
337
    bgp_down(p);
338
}
339

    
340
void
341
bgp_stop(struct bgp_proto *p, unsigned subcode)
342
{
343
  proto_notify_state(&p->p, PS_STOP);
344
  bgp_graceful_close_conn(&p->outgoing_conn, subcode);
345
  bgp_graceful_close_conn(&p->incoming_conn, subcode);
346
  ev_schedule(p->event);
347
}
348

    
349
static inline void
350
bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
351
{
352
  if (conn->bgp->p.mrtdump & MD_STATES)
353
    mrt_dump_bgp_state_change(conn, conn->state, new_state);
354

    
355
  conn->state = new_state;
356
}
357

    
358
void
359
bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
360
{
361
  /* Really, most of the work is done in bgp_rx_open(). */
362
  bgp_conn_set_state(conn, BS_OPENCONFIRM);
363
}
364

    
365
void
366
bgp_conn_enter_established_state(struct bgp_conn *conn)
367
{
368
  struct bgp_proto *p = conn->bgp;
369

    
370
  BGP_TRACE(D_EVENTS, "BGP session established");
371
  DBG("BGP: UP!!!\n");
372

    
373
  /* For multi-hop BGP sessions */
374
  if (ipa_zero(p->source_addr))
375
    p->source_addr = conn->sk->saddr;
376

    
377
  p->conn = conn;
378
  p->last_error_class = 0;
379
  p->last_error_code = 0;
380
  p->feed_state = BFS_NONE;
381
  p->load_state = BFS_NONE;
382
  bgp_init_bucket_table(p);
383
  bgp_init_prefix_table(p, 8);
384

    
385
  int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
386

    
387
  if (p->p.gr_recovery && !peer_gr_ready)
388
    proto_graceful_restart_unlock(&p->p);
389

    
390
  if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
391
    p->p.gr_wait = 1;
392

    
393
  if (p->gr_active)
394
    tm_stop(p->gr_timer);
395

    
396
  if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
397
    bgp_graceful_restart_done(p);
398

    
399
  /* GR capability implies that neighbor will send End-of-RIB */
400
  if (conn->peer_gr_aware)
401
    p->load_state = BFS_LOADING;
402

    
403
  /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
404

    
405
  bgp_conn_set_state(conn, BS_ESTABLISHED);
406
  proto_notify_state(&p->p, PS_UP);
407
}
408

    
409
static void
410
bgp_conn_leave_established_state(struct bgp_proto *p)
411
{
412
  BGP_TRACE(D_EVENTS, "BGP session closed");
413
  p->conn = NULL;
414

    
415
  if (p->p.proto_state == PS_UP)
416
    bgp_stop(p, 0);
417
}
418

    
419
void
420
bgp_conn_enter_close_state(struct bgp_conn *conn)
421
{
422
  struct bgp_proto *p = conn->bgp;
423
  int os = conn->state;
424

    
425
  bgp_conn_set_state(conn, BS_CLOSE);
426
  tm_stop(conn->keepalive_timer);
427
  conn->sk->rx_hook = NULL;
428

    
429
  /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
430
  bgp_start_timer(conn->hold_timer, 10);
431

    
432
  if (os == BS_ESTABLISHED)
433
    bgp_conn_leave_established_state(p);
434
}
435

    
436
void
437
bgp_conn_enter_idle_state(struct bgp_conn *conn)
438
{
439
  struct bgp_proto *p = conn->bgp;
440
  int os = conn->state;
441

    
442
  bgp_close_conn(conn);
443
  bgp_conn_set_state(conn, BS_IDLE);
444
  ev_schedule(p->event);
445

    
446
  if (os == BS_ESTABLISHED)
447
    bgp_conn_leave_established_state(p);
448
}
449

    
450
/**
451
 * bgp_handle_graceful_restart - handle detected BGP graceful restart
452
 * @p: BGP instance
453
 *
454
 * This function is called when a BGP graceful restart of the neighbor is
455
 * detected (when the TCP connection fails or when a new TCP connection
456
 * appears). The function activates processing of the restart - starts routing
457
 * table refresh cycle and activates BGP restart timer. The protocol state goes
458
 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
459
 * caller.
460
 */
461
void
462
bgp_handle_graceful_restart(struct bgp_proto *p)
463
{
464
  ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
465

    
466
  BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
467
            p->gr_active ? " - already pending" : "");
468
  proto_notify_state(&p->p, PS_START);
469

    
470
  if (p->gr_active)
471
    rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
472

    
473
  p->gr_active = 1;
474
  bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
475
  rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
476
}
477

    
478
/**
479
 * bgp_graceful_restart_done - finish active BGP graceful restart
480
 * @p: BGP instance
481
 *
482
 * This function is called when the active BGP graceful restart of the neighbor
483
 * should be finished - either successfully (the neighbor sends all paths and
484
 * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
485
 * not support BGP graceful restart on the new session). The function ends
486
 * routing table refresh cycle and stops BGP restart timer.
487
 */
488
void
489
bgp_graceful_restart_done(struct bgp_proto *p)
490
{
491
  BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
492
  p->gr_active = 0;
493
  tm_stop(p->gr_timer);
494
  rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
495
}
496

    
497
/**
498
 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
499
 * @t: timer
500
 *
501
 * This function is a timeout hook for @gr_timer, implementing BGP restart time
502
 * limit for reestablisment of the BGP session after the graceful restart. When
503
 * fired, we just proceed with the usual protocol restart.
504
 */
505

    
506
static void
507
bgp_graceful_restart_timeout(timer *t)
508
{
509
  struct bgp_proto *p = t->data;
510

    
511
  BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
512
  bgp_stop(p, 0);
513
}
514

    
515

    
516
/**
517
 * bgp_refresh_begin - start incoming enhanced route refresh sequence
518
 * @p: BGP instance
519
 *
520
 * This function is called when an incoming enhanced route refresh sequence is
521
 * started by the neighbor, demarcated by the BoRR packet. The function updates
522
 * the load state and starts the routing table refresh cycle. Note that graceful
523
 * restart also uses routing table refresh cycle, but RFC 7313 and load states
524
 * ensure that these two sequences do not overlap.
525
 */
526
void
527
bgp_refresh_begin(struct bgp_proto *p)
528
{
529
  if (p->load_state == BFS_LOADING)
530
    { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
531

    
532
  p->load_state = BFS_REFRESHING;
533
  rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
534
}
535

    
536
/**
537
 * bgp_refresh_end - finish incoming enhanced route refresh sequence
538
 * @p: BGP instance
539
 *
540
 * This function is called when an incoming enhanced route refresh sequence is
541
 * finished by the neighbor, demarcated by the EoRR packet. The function updates
542
 * the load state and ends the routing table refresh cycle. Routes not received
543
 * during the sequence are removed by the nest.
544
 */
545
void
546
bgp_refresh_end(struct bgp_proto *p)
547
{
548
  if (p->load_state != BFS_REFRESHING)
549
    { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
550

    
551
  p->load_state = BFS_NONE;
552
  rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
553
}
554

    
555

    
556
static void
557
bgp_send_open(struct bgp_conn *conn)
558
{
559
  conn->start_state = conn->bgp->start_state;
560

    
561
  // Default values, possibly changed by receiving capabilities.
562
  conn->advertised_as = 0;
563
  conn->peer_refresh_support = 0;
564
  conn->peer_as4_support = 0;
565
  conn->peer_add_path = 0;
566
  conn->peer_enhanced_refresh_support = 0;
567
  conn->peer_gr_aware = 0;
568
  conn->peer_gr_able = 0;
569
  conn->peer_gr_time = 0;
570
  conn->peer_gr_flags = 0;
571
  conn->peer_gr_aflags = 0;
572
  conn->peer_ext_messages_support = 0;
573

    
574
  DBG("BGP: Sending open\n");
575
  conn->sk->rx_hook = bgp_rx;
576
  conn->sk->tx_hook = bgp_tx;
577
  tm_stop(conn->connect_retry_timer);
578
  bgp_schedule_packet(conn, PKT_OPEN);
579
  bgp_conn_set_state(conn, BS_OPENSENT);
580
  bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
581
}
582

    
583
static void
584
bgp_connected(sock *sk)
585
{
586
  struct bgp_conn *conn = sk->data;
587
  struct bgp_proto *p = conn->bgp;
588

    
589
  BGP_TRACE(D_EVENTS, "Connected");
590
  bgp_send_open(conn);
591
}
592

    
593
static void
594
bgp_connect_timeout(timer *t)
595
{
596
  struct bgp_conn *conn = t->data;
597
  struct bgp_proto *p = conn->bgp;
598

    
599
  DBG("BGP: connect_timeout\n");
600
  if (p->p.proto_state == PS_START)
601
    {
602
      bgp_close_conn(conn);
603
      bgp_connect(p);
604
    }
605
  else
606
    bgp_conn_enter_idle_state(conn);
607
}
608

    
609
static void
610
bgp_sock_err(sock *sk, int err)
611
{
612
  struct bgp_conn *conn = sk->data;
613
  struct bgp_proto *p = conn->bgp;
614

    
615
  /*
616
   * This error hook may be called either asynchronously from main
617
   * loop, or synchronously from sk_send().  But sk_send() is called
618
   * only from bgp_tx() and bgp_kick_tx(), which are both called
619
   * asynchronously from main loop. Moreover, they end if err hook is
620
   * called. Therefore, we could suppose that it is always called
621
   * asynchronously.
622
   */
623

    
624
  bgp_store_error(p, conn, BE_SOCKET, err);
625

    
626
  if (err)
627
    BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
628
  else
629
    BGP_TRACE(D_EVENTS, "Connection closed");
630

    
631
  if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
632
    bgp_handle_graceful_restart(p);
633

    
634
  bgp_conn_enter_idle_state(conn);
635
}
636

    
637
static void
638
bgp_hold_timeout(timer *t)
639
{
640
  struct bgp_conn *conn = t->data;
641
  struct bgp_proto *p = conn->bgp;
642

    
643
  DBG("BGP: Hold timeout\n");
644

    
645
  /* We are already closing the connection - just do hangup */
646
  if (conn->state == BS_CLOSE)
647
  {
648
    BGP_TRACE(D_EVENTS, "Connection stalled");
649
    bgp_conn_enter_idle_state(conn);
650
    return;
651
  }
652

    
653
  /* If there is something in input queue, we are probably congested
654
     and perhaps just not processed BGP packets in time. */
655

    
656
  if (sk_rx_ready(conn->sk) > 0)
657
    bgp_start_timer(conn->hold_timer, 10);
658
  else
659
    bgp_error(conn, 4, 0, NULL, 0);
660
}
661

    
662
static void
663
bgp_keepalive_timeout(timer *t)
664
{
665
  struct bgp_conn *conn = t->data;
666

    
667
  DBG("BGP: Keepalive timer\n");
668
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
669
}
670

    
671
static void
672
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
673
{
674
  timer *t;
675

    
676
  conn->sk = NULL;
677
  conn->bgp = p;
678
  conn->packets_to_send = 0;
679

    
680
  t = conn->connect_retry_timer = tm_new(p->p.pool);
681
  t->hook = bgp_connect_timeout;
682
  t->data = conn;
683
  t = conn->hold_timer = tm_new(p->p.pool);
684
  t->hook = bgp_hold_timeout;
685
  t->data = conn;
686
  t = conn->keepalive_timer = tm_new(p->p.pool);
687
  t->hook = bgp_keepalive_timeout;
688
  t->data = conn;
689
  conn->tx_ev = ev_new(p->p.pool);
690
  conn->tx_ev->hook = bgp_kick_tx;
691
  conn->tx_ev->data = conn;
692
}
693

    
694
static void
695
bgp_setup_sk(struct bgp_conn *conn, sock *s)
696
{
697
  s->data = conn;
698
  s->err_hook = bgp_sock_err;
699
  conn->sk = s;
700
}
701

    
702
static void
703
bgp_active(struct bgp_proto *p)
704
{
705
  int delay = MAX(1, p->cf->connect_delay_time);
706
  struct bgp_conn *conn = &p->outgoing_conn;
707

    
708
  BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
709
  bgp_setup_conn(p, conn);
710
  bgp_conn_set_state(conn, BS_ACTIVE);
711
  bgp_start_timer(conn->connect_retry_timer, delay);
712
}
713

    
714
/**
715
 * bgp_connect - initiate an outgoing connection
716
 * @p: BGP instance
717
 *
718
 * The bgp_connect() function creates a new &bgp_conn and initiates
719
 * a TCP connection to the peer. The rest of connection setup is governed
720
 * by the BGP state machine as described in the standard.
721
 */
722
static void
723
bgp_connect(struct bgp_proto *p)        /* Enter Connect state and start establishing connection */
724
{
725
  sock *s;
726
  struct bgp_conn *conn = &p->outgoing_conn;
727
  int hops = p->cf->multihop ? : 1;
728

    
729
  DBG("BGP: Connecting\n");
730
  s = sk_new(p->p.pool);
731
  s->type = SK_TCP_ACTIVE;
732
  s->saddr = p->source_addr;
733
  s->daddr = p->cf->remote_ip;
734
  s->dport = p->cf->remote_port;
735
  s->iface = p->neigh ? p->neigh->iface : NULL;
736
  s->ttl = p->cf->ttl_security ? 255 : hops;
737
  s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
738
  s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
739
  s->tos = IP_PREC_INTERNET_CONTROL;
740
  s->password = p->cf->password;
741
  s->tx_hook = bgp_connected;
742
  BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
743
            s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
744
  bgp_setup_conn(p, conn);
745
  bgp_setup_sk(conn, s);
746
  bgp_conn_set_state(conn, BS_CONNECT);
747

    
748
  if (sk_open(s) < 0)
749
    goto err;
750

    
751
  /* Set minimal receive TTL if needed */
752
  if (p->cf->ttl_security)
753
    if (sk_set_min_ttl(s, 256 - hops) < 0)
754
      goto err;
755

    
756
  DBG("BGP: Waiting for connect success\n");
757
  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
758
  return;
759

    
760
 err:
761
  sk_log_error(s, p->p.name);
762
  bgp_sock_err(s, 0);
763
  return;
764
}
765

    
766
/**
767
 * bgp_find_proto - find existing proto for incoming connection
768
 * @sk: TCP socket
769
 *
770
 */
771
static struct bgp_proto *
772
bgp_find_proto(sock *sk)
773
{
774
  struct proto_config *pc;
775

    
776
  WALK_LIST(pc, config->protos)
777
    if ((pc->protocol == &proto_bgp) && pc->proto)
778
      {
779
        struct bgp_proto *p = (struct bgp_proto *) pc->proto;
780
        if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
781
            (!ipa_is_link_local(sk->daddr) || (p->cf->iface == sk->iface)))
782
          return p;
783
      }
784

    
785
  return NULL;
786
}
787

    
788
/**
789
 * bgp_incoming_connection - handle an incoming connection
790
 * @sk: TCP socket
791
 * @dummy: unused
792
 *
793
 * This function serves as a socket hook for accepting of new BGP
794
 * connections. It searches a BGP instance corresponding to the peer
795
 * which has connected and if such an instance exists, it creates a
796
 * &bgp_conn structure, attaches it to the instance and either sends
797
 * an Open message or (if there already is an active connection) it
798
 * closes the new connection by sending a Notification message.
799
 */
800
static int
801
bgp_incoming_connection(sock *sk, int dummy UNUSED)
802
{
803
  struct bgp_proto *p;
804
  int acc, hops;
805

    
806
  DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
807
  p = bgp_find_proto(sk);
808
  if (!p)
809
    {
810
      log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
811
          sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
812
      rfree(sk);
813
      return 0;
814
    }
815

    
816
  /* We are in proper state and there is no other incoming connection */
817
  acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
818
    (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
819

    
820
  if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
821
    {
822
      bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
823
      bgp_handle_graceful_restart(p);
824
      bgp_conn_enter_idle_state(p->conn);
825
      acc = 1;
826
    }
827

    
828
  BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
829
            sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
830
            sk->dport, acc ? "accepted" : "rejected");
831

    
832
  if (!acc)
833
    {
834
      rfree(sk);
835
      return 0;
836
    }
837

    
838
  hops = p->cf->multihop ? : 1;
839

    
840
  if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
841
    goto err;
842

    
843
  if (p->cf->ttl_security)
844
    if (sk_set_min_ttl(sk, 256 - hops) < 0)
845
      goto err;
846

    
847
  if (p->cf->enable_extended_messages)
848
    {
849
      sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
850
      sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
851
      sk_reallocate(sk);
852
    }
853

    
854
  bgp_setup_conn(p, &p->incoming_conn);
855
  bgp_setup_sk(&p->incoming_conn, sk);
856
  bgp_send_open(&p->incoming_conn);
857
  return 0;
858

    
859
err:
860
  sk_log_error(sk, p->p.name);
861
  log(L_ERR "%s: Incoming connection aborted", p->p.name);
862
  rfree(sk);
863
  return 0;
864
}
865

    
866
static void
867
bgp_listen_sock_err(sock *sk UNUSED, int err)
868
{
869
  if (err == ECONNABORTED)
870
    log(L_WARN "BGP: Incoming connection aborted");
871
  else
872
    log(L_ERR "BGP: Error on listening socket: %M", err);
873
}
874

    
875
static sock *
876
bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
877
{
878
  sock *s = sk_new(&root_pool);
879
  DBG("BGP: Creating listening socket\n");
880
  s->type = SK_TCP_PASSIVE;
881
  s->ttl = 255;
882
  s->saddr = addr;
883
  s->sport = port ? port : BGP_PORT;
884
  s->flags = flags ? 0 : SKF_V6ONLY;
885
  s->tos = IP_PREC_INTERNET_CONTROL;
886
  s->rbsize = BGP_RX_BUFFER_SIZE;
887
  s->tbsize = BGP_TX_BUFFER_SIZE;
888
  s->rx_hook = bgp_incoming_connection;
889
  s->err_hook = bgp_listen_sock_err;
890

    
891
  if (sk_open(s) < 0)
892
    goto err;
893

    
894
  return s;
895

    
896
 err:
897
  sk_log_error(s, "BGP");
898
  log(L_ERR "BGP: Cannot open listening socket");
899
  rfree(s);
900
  return NULL;
901
}
902

    
903
static void
904
bgp_start_neighbor(struct bgp_proto *p)
905
{
906
  /* Called only for single-hop BGP sessions */
907

    
908
  if (ipa_zero(p->source_addr))
909
    p->source_addr = p->neigh->ifa->ip;
910

    
911
#ifdef IPV6
912
  {
913
    struct ifa *a;
914
    p->local_link = IPA_NONE;
915
    WALK_LIST(a, p->neigh->iface->addrs)
916
      if (a->scope == SCOPE_LINK)
917
        {
918
          p->local_link = a->ip;
919
          break;
920
        }
921

    
922
    if (! ipa_nonzero(p->local_link))
923
      log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
924

    
925
    DBG("BGP: Selected link-level address %I\n", p->local_link);
926
  }
927
#endif
928

    
929
  bgp_initiate(p);
930
}
931

    
932
static void
933
bgp_neigh_notify(neighbor *n)
934
{
935
  struct bgp_proto *p = (struct bgp_proto *) n->proto;
936
  int ps = p->p.proto_state;
937

    
938
  if (n != p->neigh)
939
    return;
940

    
941
  if ((ps == PS_DOWN) || (ps == PS_STOP))
942
    return;
943

    
944
  int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
945

    
946
  if (n->scope <= 0)
947
    {
948
      if (!prepare)
949
        {
950
          BGP_TRACE(D_EVENTS, "Neighbor lost");
951
          bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
952
          /* Perhaps also run bgp_update_startup_delay(p)? */
953
          bgp_stop(p, 0);
954
        }
955
    }
956
  else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
957
    {
958
      if (!prepare)
959
        {
960
          BGP_TRACE(D_EVENTS, "Link down");
961
          bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
962
          if (ps == PS_UP)
963
            bgp_update_startup_delay(p);
964
          bgp_stop(p, 0);
965
        }
966
    }
967
  else
968
    {
969
      if (prepare)
970
        {
971
          BGP_TRACE(D_EVENTS, "Neighbor ready");
972
          bgp_start_neighbor(p);
973
        }
974
    }
975
}
976

    
977
static void
978
bgp_bfd_notify(struct bfd_request *req)
979
{
980
  struct bgp_proto *p = req->data;
981
  int ps = p->p.proto_state;
982

    
983
  if (req->down && ((ps == PS_START) || (ps == PS_UP)))
984
    {
985
      BGP_TRACE(D_EVENTS, "BFD session down");
986
      bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
987
      if (ps == PS_UP)
988
        bgp_update_startup_delay(p);
989
      bgp_stop(p, 0);
990
    }
991
}
992

    
993
static void
994
bgp_update_bfd(struct bgp_proto *p, int use_bfd)
995
{
996
  if (use_bfd && !p->bfd_req)
997
    p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
998
                                     p->cf->multihop ? NULL : p->neigh->iface,
999
                                     bgp_bfd_notify, p);
1000

    
1001
  if (!use_bfd && p->bfd_req)
1002
    {
1003
      rfree(p->bfd_req);
1004
      p->bfd_req = NULL;
1005
    }
1006
}
1007

    
1008
static int
1009
bgp_reload_routes(struct proto *P)
1010
{
1011
  struct bgp_proto *p = (struct bgp_proto *) P;
1012
  if (!p->conn || !p->conn->peer_refresh_support)
1013
    return 0;
1014

    
1015
  bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
1016
  return 1;
1017
}
1018

    
1019
static void
1020
bgp_feed_begin(struct proto *P, int initial)
1021
{
1022
  struct bgp_proto *p = (struct bgp_proto *) P;
1023

    
1024
  /* This should not happen */
1025
  if (!p->conn)
1026
    return;
1027

    
1028
  if (initial && p->cf->gr_mode)
1029
    p->feed_state = BFS_LOADING;
1030

    
1031
  /* It is refeed and both sides support enhanced route refresh */
1032
  if (!initial && p->cf->enable_refresh &&
1033
      p->conn->peer_enhanced_refresh_support)
1034
    {
1035
      /* BoRR must not be sent before End-of-RIB */
1036
      if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
1037
        return;
1038

    
1039
      p->feed_state = BFS_REFRESHING;
1040
      bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
1041
    }
1042
}
1043

    
1044
static void
1045
bgp_feed_end(struct proto *P)
1046
{
1047
  struct bgp_proto *p = (struct bgp_proto *) P;
1048

    
1049
  /* This should not happen */
1050
  if (!p->conn)
1051
    return;
1052

    
1053
  /* Non-demarcated feed ended, nothing to do */
1054
  if (p->feed_state == BFS_NONE)
1055
    return;
1056

    
1057
  /* Schedule End-of-RIB packet */
1058
  if (p->feed_state == BFS_LOADING)
1059
    p->feed_state = BFS_LOADED;
1060

    
1061
  /* Schedule EoRR packet */
1062
  if (p->feed_state == BFS_REFRESHING)
1063
    p->feed_state = BFS_REFRESHED;
1064

    
1065
  /* Kick TX hook */
1066
  bgp_schedule_packet(p->conn, PKT_UPDATE);
1067
}
1068

    
1069

    
1070
static void
1071
bgp_start_locked(struct object_lock *lock)
1072
{
1073
  struct bgp_proto *p = lock->data;
1074
  struct bgp_config *cf = p->cf;
1075

    
1076
  if (p->p.proto_state != PS_START)
1077
    {
1078
      DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1079
      return;
1080
    }
1081

    
1082
  DBG("BGP: Got lock\n");
1083

    
1084
  if (cf->multihop)
1085
    {
1086
      /* Multi-hop sessions do not use neighbor entries */
1087
      bgp_initiate(p);
1088
      return;
1089
    }
1090

    
1091
  neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
1092
  if (!n)
1093
    {
1094
      log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
1095
      /* As we do not start yet, we can just disable protocol */
1096
      p->p.disabled = 1;
1097
      bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1098
      proto_notify_state(&p->p, PS_DOWN);
1099
      return;
1100
    }
1101

    
1102
  p->neigh = n;
1103

    
1104
  if (n->scope <= 0)
1105
    BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1106
  else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1107
    BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1108
  else
1109
    bgp_start_neighbor(p);
1110
}
1111

    
1112
static int
1113
bgp_start(struct proto *P)
1114
{
1115
  struct bgp_proto *p = (struct bgp_proto *) P;
1116
  struct object_lock *lock;
1117

    
1118
  DBG("BGP: Startup.\n");
1119
  p->start_state = BSS_PREPARE;
1120
  p->outgoing_conn.state = BS_IDLE;
1121
  p->incoming_conn.state = BS_IDLE;
1122
  p->neigh = NULL;
1123
  p->bfd_req = NULL;
1124
  p->gr_ready = 0;
1125
  p->gr_active = 0;
1126

    
1127
  rt_lock_table(p->igp_table);
1128

    
1129
  p->event = ev_new(p->p.pool);
1130
  p->event->hook = bgp_decision;
1131
  p->event->data = p;
1132

    
1133
  p->startup_timer = tm_new(p->p.pool);
1134
  p->startup_timer->hook = bgp_startup_timeout;
1135
  p->startup_timer->data = p;
1136

    
1137
  p->gr_timer = tm_new(p->p.pool);
1138
  p->gr_timer->hook = bgp_graceful_restart_timeout;
1139
  p->gr_timer->data = p;
1140

    
1141
  p->local_id = proto_get_router_id(P->cf);
1142
  if (p->rr_client)
1143
    p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1144

    
1145
  p->remote_id = 0;
1146
  p->source_addr = p->cf->source_addr;
1147

    
1148
  if (p->p.gr_recovery && p->cf->gr_mode)
1149
    proto_graceful_restart_lock(P);
1150

    
1151
  /*
1152
   *  Before attempting to create the connection, we need to lock the
1153
   *  port, so that are sure we're the only instance attempting to talk
1154
   *  with that neighbor.
1155
   */
1156

    
1157
  lock = p->lock = olock_new(P->pool);
1158
  lock->addr = p->cf->remote_ip;
1159
  lock->port = p->cf->remote_port;
1160
  lock->iface = p->cf->iface;
1161
  lock->type = OBJLOCK_TCP;
1162
  lock->hook = bgp_start_locked;
1163
  lock->data = p;
1164
  olock_acquire(lock);
1165

    
1166
  return PS_START;
1167
}
1168

    
1169
extern int proto_restart;
1170

    
1171
static int
1172
bgp_shutdown(struct proto *P)
1173
{
1174
  struct bgp_proto *p = (struct bgp_proto *) P;
1175
  unsigned subcode = 0;
1176

    
1177
  BGP_TRACE(D_EVENTS, "Shutdown requested");
1178

    
1179
  switch (P->down_code)
1180
    {
1181
    case PDC_CF_REMOVE:
1182
    case PDC_CF_DISABLE:
1183
      subcode = 3; // Errcode 6, 3 - peer de-configured
1184
      break;
1185

    
1186
    case PDC_CF_RESTART:
1187
      subcode = 6; // Errcode 6, 6 - other configuration change
1188
      break;
1189

    
1190
    case PDC_CMD_DISABLE:
1191
    case PDC_CMD_SHUTDOWN:
1192
      subcode = 2; // Errcode 6, 2 - administrative shutdown
1193
      break;
1194

    
1195
    case PDC_CMD_RESTART:
1196
      subcode = 4; // Errcode 6, 4 - administrative reset
1197
      break;
1198

    
1199
    case PDC_RX_LIMIT_HIT:
1200
    case PDC_IN_LIMIT_HIT:
1201
      subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1202
      /* log message for compatibility */
1203
      log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1204
      goto limit;
1205

    
1206
    case PDC_OUT_LIMIT_HIT:
1207
      subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1208

    
1209
    limit:
1210
      bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1211
      if (proto_restart)
1212
        bgp_update_startup_delay(p);
1213
      else
1214
        p->startup_delay = 0;
1215
      goto done;
1216
    }
1217

    
1218
  bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1219
  p->startup_delay = 0;
1220

    
1221
 done:
1222
  bgp_stop(p, subcode);
1223
  return p->p.proto_state;
1224
}
1225

    
1226
static void
1227
bgp_cleanup(struct proto *P)
1228
{
1229
  struct bgp_proto *p = (struct bgp_proto *) P;
1230
  rt_unlock_table(p->igp_table);
1231
}
1232

    
1233
static rtable *
1234
get_igp_table(struct bgp_config *cf)
1235
{
1236
  return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
1237
}
1238

    
1239
static struct proto *
1240
bgp_init(struct proto_config *C)
1241
{
1242
  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
1243
  struct bgp_config *c = (struct bgp_config *) C;
1244
  struct bgp_proto *p = (struct bgp_proto *) P;
1245

    
1246
  P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
1247
  P->rt_notify = bgp_rt_notify;
1248
  P->import_control = bgp_import_control;
1249
  P->neigh_notify = bgp_neigh_notify;
1250
  P->reload_routes = bgp_reload_routes;
1251
  P->feed_begin = bgp_feed_begin;
1252
  P->feed_end = bgp_feed_end;
1253
  P->rte_better = bgp_rte_better;
1254
  P->rte_mergable = bgp_rte_mergable;
1255
  P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
1256

    
1257
  p->cf = c;
1258
  p->local_as = c->local_as;
1259
  p->remote_as = c->remote_as;
1260
  p->is_internal = (c->local_as == c->remote_as);
1261
  p->rs_client = c->rs_client;
1262
  p->rr_client = c->rr_client;
1263
  p->igp_table = get_igp_table(c);
1264

    
1265
  return P;
1266
}
1267

    
1268

    
1269
void
1270
bgp_check_config(struct bgp_config *c)
1271
{
1272
  int internal = (c->local_as == c->remote_as);
1273

    
1274
  /* Do not check templates at all */
1275
  if (c->c.class == SYM_TEMPLATE)
1276
    return;
1277

    
1278

    
1279
  /* EBGP direct by default, IBGP multihop by default */
1280
  if (c->multihop < 0)
1281
    c->multihop = internal ? 64 : 0;
1282

    
1283
  /* Different default for gw_mode */
1284
  if (!c->gw_mode)
1285
    c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
1286

    
1287
  /* Different default based on rs_client */
1288
  if (!c->missing_lladdr)
1289
    c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
1290

    
1291
  /* Disable after error incompatible with restart limit action */
1292
  if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
1293
    c->c.in_limit->action = PLA_DISABLE;
1294

    
1295

    
1296
  if (!c->local_as)
1297
    cf_error("Local AS number must be set");
1298

    
1299
  if (ipa_zero(c->remote_ip))
1300
    cf_error("Neighbor must be configured");
1301

    
1302
  if (!c->remote_as)
1303
    cf_error("Remote AS number must be set");
1304

    
1305
  // if (ipa_is_link_local(c->remote_ip) && !c->iface)
1306
  //   cf_error("Link-local neighbor address requires specified interface");
1307

    
1308
  if (!ipa_is_link_local(c->remote_ip) != !c->iface)
1309
    cf_error("Link-local address and interface scope must be used together");
1310

    
1311
  if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
1312
    cf_error("Neighbor AS number out of range (AS4 not available)");
1313

    
1314
  if (!internal && c->rr_client)
1315
    cf_error("Only internal neighbor can be RR client");
1316

    
1317
  if (internal && c->rs_client)
1318
    cf_error("Only external neighbor can be RS client");
1319

    
1320
  if (c->multihop && (c->gw_mode == GW_DIRECT))
1321
    cf_error("Multihop BGP cannot use direct gateway mode");
1322

    
1323
  if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
1324
                      ipa_is_link_local(c->source_addr)))
1325
    cf_error("Multihop BGP cannot be used with link-local addresses");
1326

    
1327
  if (c->multihop && c->check_link)
1328
    cf_error("Multihop BGP cannot depend on link state");
1329

    
1330
  if (c->multihop && c->bfd && ipa_zero(c->source_addr))
1331
    cf_error("Multihop BGP with BFD requires specified source address");
1332

    
1333
  if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
1334
    cf_error("BGP in recursive mode prohibits sorted table");
1335

    
1336
  if (c->deterministic_med && c->c.table->sorted)
1337
    cf_error("BGP with deterministic MED prohibits sorted table");
1338

    
1339
  if (c->secondary && !c->c.table->sorted)
1340
    cf_error("BGP with secondary option requires sorted table");
1341
}
1342

    
1343
static int
1344
bgp_reconfigure(struct proto *P, struct proto_config *C)
1345
{
1346
  struct bgp_config *new = (struct bgp_config *) C;
1347
  struct bgp_proto *p = (struct bgp_proto *) P;
1348
  struct bgp_config *old = p->cf;
1349

    
1350
  if (proto_get_router_id(C) != p->local_id)
1351
    return 0;
1352

    
1353
  int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1354
                     ((byte *) new) + sizeof(struct proto_config),
1355
                     // password item is last and must be checked separately
1356
                     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1357
    && ((!old->password && !new->password)
1358
        || (old->password && new->password && !strcmp(old->password, new->password)))
1359
    && (get_igp_table(old) == get_igp_table(new));
1360

    
1361
  if (same && (p->start_state > BSS_PREPARE))
1362
    bgp_update_bfd(p, new->bfd);
1363

    
1364
  /* We should update our copy of configuration ptr as old configuration will be freed */
1365
  if (same)
1366
    p->cf = new;
1367

    
1368
  return same;
1369
}
1370

    
1371
static void
1372
bgp_copy_config(struct proto_config *dest, struct proto_config *src)
1373
{
1374
  /* Just a shallow copy */
1375
}
1376

    
1377

    
1378
/**
1379
 * bgp_error - report a protocol error
1380
 * @c: connection
1381
 * @code: error code (according to the RFC)
1382
 * @subcode: error sub-code
1383
 * @data: data to be passed in the Notification message
1384
 * @len: length of the data
1385
 *
1386
 * bgp_error() sends a notification packet to tell the other side that a protocol
1387
 * error has occurred (including the data considered erroneous if possible) and
1388
 * closes the connection.
1389
 */
1390
void
1391
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
1392
{
1393
  struct bgp_proto *p = c->bgp;
1394

    
1395
  if (c->state == BS_CLOSE)
1396
    return;
1397

    
1398
  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
1399
  bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1400
  bgp_conn_enter_close_state(c);
1401

    
1402
  c->notify_code = code;
1403
  c->notify_subcode = subcode;
1404
  c->notify_data = data;
1405
  c->notify_size = (len > 0) ? len : 0;
1406
  bgp_schedule_packet(c, PKT_NOTIFICATION);
1407

    
1408
  if (code != 6)
1409
    {
1410
      bgp_update_startup_delay(p);
1411
      bgp_stop(p, 0);
1412
    }
1413
}
1414

    
1415
/**
1416
 * bgp_store_error - store last error for status report
1417
 * @p: BGP instance
1418
 * @c: connection
1419
 * @class: error class (BE_xxx constants)
1420
 * @code: error code (class specific)
1421
 *
1422
 * bgp_store_error() decides whether given error is interesting enough
1423
 * and store that error to last_error variables of @p
1424
 */
1425
void
1426
bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1427
{
1428
  /* During PS_UP, we ignore errors on secondary connection */
1429
  if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1430
    return;
1431

    
1432
  /* During PS_STOP, we ignore any errors, as we want to report
1433
   * the error that caused transition to PS_STOP
1434
   */
1435
  if (p->p.proto_state == PS_STOP)
1436
    return;
1437

    
1438
  p->last_error_class = class;
1439
  p->last_error_code = code;
1440
}
1441

    
1442
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1443
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1444
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1445
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1446

    
1447
static const char *
1448
bgp_last_errmsg(struct bgp_proto *p)
1449
{
1450
  switch (p->last_error_class)
1451
    {
1452
    case BE_MISC:
1453
      return bgp_misc_errors[p->last_error_code];
1454
    case BE_SOCKET:
1455
      return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1456
    case BE_BGP_RX:
1457
    case BE_BGP_TX:
1458
      return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1459
    case BE_AUTO_DOWN:
1460
      return bgp_auto_errors[p->last_error_code];
1461
    default:
1462
      return "";
1463
    }
1464
}
1465

    
1466
static const char *
1467
bgp_state_dsc(struct bgp_proto *p)
1468
{
1469
  if (p->p.proto_state == PS_DOWN)
1470
    return "Down";
1471

    
1472
  int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1473
  if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1474
    return "Passive";
1475

    
1476
  return bgp_state_names[state];
1477
}
1478

    
1479
static void
1480
bgp_get_status(struct proto *P, byte *buf)
1481
{
1482
  struct bgp_proto *p = (struct bgp_proto *) P;
1483

    
1484
  const char *err1 = bgp_err_classes[p->last_error_class];
1485
  const char *err2 = bgp_last_errmsg(p);
1486

    
1487
  if (P->proto_state == PS_DOWN)
1488
    bsprintf(buf, "%s%s", err1, err2);
1489
  else
1490
    bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1491
}
1492

    
1493
static void
1494
bgp_show_proto_info(struct proto *P)
1495
{
1496
  struct bgp_proto *p = (struct bgp_proto *) P;
1497
  struct bgp_conn *c = p->conn;
1498

    
1499
  proto_show_basic_info(P);
1500

    
1501
  cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
1502
  cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
1503
  cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
1504

    
1505
  if (p->gr_active)
1506
    cli_msg(-1006, "    Neighbor graceful restart active");
1507

    
1508
  if (P->proto_state == PS_START)
1509
    {
1510
      struct bgp_conn *oc = &p->outgoing_conn;
1511

    
1512
      if ((p->start_state < BSS_CONNECT) &&
1513
          (p->startup_timer->expires))
1514
        cli_msg(-1006, "    Error wait:       %d/%d",
1515
                p->startup_timer->expires - now, p->startup_delay);
1516

    
1517
      if ((oc->state == BS_ACTIVE) &&
1518
          (oc->connect_retry_timer->expires))
1519
        cli_msg(-1006, "    Connect delay:    %d/%d",
1520
                oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
1521

    
1522
      if (p->gr_active && p->gr_timer->expires)
1523
        cli_msg(-1006, "    Restart timer:    %d/-", p->gr_timer->expires - now);
1524
    }
1525
  else if (P->proto_state == PS_UP)
1526
    {
1527
      cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
1528
      cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s%s%s",
1529
              c->peer_refresh_support ? " refresh" : "",
1530
              c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
1531
              c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
1532
              c->peer_as4_support ? " AS4" : "",
1533
              (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
1534
              (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
1535
              c->peer_ext_messages_support ? " ext-messages" : "");
1536
      cli_msg(-1006, "    Session:          %s%s%s%s%s%s%s%s",
1537
              p->is_internal ? "internal" : "external",
1538
              p->cf->multihop ? " multihop" : "",
1539
              p->rr_client ? " route-reflector" : "",
1540
              p->rs_client ? " route-server" : "",
1541
              p->as4_session ? " AS4" : "",
1542
              p->add_path_rx ? " add-path-rx" : "",
1543
              p->add_path_tx ? " add-path-tx" : "",
1544
              p->ext_messages ? " ext-messages" : "");
1545
      cli_msg(-1006, "    Source address:   %I", p->source_addr);
1546
      if (P->cf->in_limit)
1547
        cli_msg(-1006, "    Route limit:      %d/%d",
1548
                p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
1549
      cli_msg(-1006, "    Hold timer:       %d/%d",
1550
              tm_remains(c->hold_timer), c->hold_time);
1551
      cli_msg(-1006, "    Keepalive timer:  %d/%d",
1552
              tm_remains(c->keepalive_timer), c->keepalive_time);
1553
    }
1554

    
1555
  if ((p->last_error_class != BE_NONE) &&
1556
      (p->last_error_class != BE_MAN_DOWN))
1557
    {
1558
      const char *err1 = bgp_err_classes[p->last_error_class];
1559
      const char *err2 = bgp_last_errmsg(p);
1560
      cli_msg(-1006, "    Last error:       %s%s", err1, err2);
1561
    }
1562
}
1563

    
1564
struct protocol proto_bgp = {
1565
  .name =                 "BGP",
1566
  .template =                 "bgp%d",
1567
  .attr_class =         EAP_BGP,
1568
  .preference =         DEF_PREF_BGP,
1569
  .config_size =        sizeof(struct bgp_config),
1570
  .init =                 bgp_init,
1571
  .start =                 bgp_start,
1572
  .shutdown =                 bgp_shutdown,
1573
  .cleanup =                 bgp_cleanup,
1574
  .reconfigure =         bgp_reconfigure,
1575
  .copy_config =         bgp_copy_config,
1576
  .get_status =         bgp_get_status,
1577
  .get_attr =                 bgp_get_attr,
1578
  .get_route_info =         bgp_get_route_info,
1579
  .show_proto_info =         bgp_show_proto_info
1580
};