Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / bgp.h @ 3213273d

History | View | Annotate | Download (16.8 KB)

1
/*
2
 *        BIRD -- The Border Gateway Protocol
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#ifndef _BIRD_BGP_H_
10
#define _BIRD_BGP_H_
11

    
12
#include <stdint.h>
13
#include "nest/route.h"
14
#include "nest/bfd.h"
15
#include "lib/hash.h"
16

    
17
struct linpool;
18
struct eattr;
19

    
20
struct bgp_config {
21
  struct proto_config c;
22
  u32 local_as, remote_as;
23
  ip_addr remote_ip;
24
  ip_addr source_addr;                        /* Source address to use */
25
  struct iface *iface;                        /* Interface for link-local addresses */
26
  u16 remote_port;                         /* Neighbor destination port */
27
  int multihop;                                /* Number of hops if multihop */
28
  int ttl_security;                        /* Enable TTL security [RFC5082] */
29
  int next_hop_self;                        /* Always set next hop to local IP address */
30
  int next_hop_keep;                        /* Do not touch next hop attribute */
31
  int missing_lladdr;                        /* What we will do when we don' know link-local addr, see MLL_* */
32
  int gw_mode;                                /* How we compute route gateway from next_hop attr, see GW_* */
33
  int compare_path_lengths;                /* Use path lengths when selecting best route */
34
  int med_metric;                        /* Compare MULTI_EXIT_DISC even between routes from differen ASes */
35
  int igp_metric;                        /* Use IGP metrics when selecting best route */
36
  int prefer_older;                        /* Prefer older routes according to RFC 5004 */
37
  int deterministic_med;                /* Use more complicated algo to have strict RFC 4271 MED comparison */
38
  u32 default_local_pref;                /* Default value for LOCAL_PREF attribute */
39
  u32 default_med;                        /* Default value for MULTI_EXIT_DISC attribute */
40
  int capabilities;                        /* Enable capability handshake [RFC3392] */
41
  int enable_refresh;                        /* Enable local support for route refresh [RFC2918] */
42
  int enable_as4;                        /* Enable local support for 4B AS numbers [RFC4893] */
43
  int enable_extended_messages;                /* Enable local support for extended messages [draft] */
44
  u32 rr_cluster_id;                        /* Route reflector cluster ID, if different from local ID */
45
  int rr_client;                        /* Whether neighbor is RR client of me */
46
  int rs_client;                        /* Whether neighbor is RS client of me */
47
  int advertise_ipv4;                        /* Whether we should add IPv4 capability advertisement to OPEN message */
48
  int passive;                                /* Do not initiate outgoing connection */
49
  int interpret_communities;                /* Hardwired handling of well-known communities */
50
  int secondary;                        /* Accept also non-best routes (i.e. RA_ACCEPTED) */
51
  int add_path;                                /* Use ADD-PATH extension [draft] */
52
  int allow_local_as;                        /* Allow that number of local ASNs in incoming AS_PATHs */
53
  int gr_mode;                                /* Graceful restart mode (BGP_GR_*) */
54
  int setkey;                                /* Set MD5 password to system SA/SP database */
55
  unsigned gr_time;                        /* Graceful restart timeout */
56
  unsigned connect_delay_time;                /* Minimum delay between connect attempts */
57
  unsigned connect_retry_time;                /* Timeout for connect attempts */
58
  unsigned hold_time, initial_hold_time;
59
  unsigned keepalive_time;
60
  unsigned error_amnesia_time;                /* Errors are forgotten after */
61
  unsigned error_delay_time_min;        /* Time to wait after an error is detected */
62
  unsigned error_delay_time_max;
63
  unsigned disable_after_error;                /* Disable the protocol when error is detected */
64

    
65
  char *password;                        /* Password used for MD5 authentication */
66
  struct rtable_config *igp_table;        /* Table used for recursive next hop lookups */
67
  int check_link;                        /* Use iface link state for liveness detection */
68
  int bfd;                                /* Use BFD for liveness detection */
69
};
70

    
71
#define MLL_SELF 1
72
#define MLL_DROP 2
73
#define MLL_IGNORE 3
74

    
75
#define GW_DIRECT 1
76
#define GW_RECURSIVE 2
77

    
78
#define ADD_PATH_RX 1
79
#define ADD_PATH_TX 2
80
#define ADD_PATH_FULL 3
81

    
82
#define BGP_GR_ABLE 1
83
#define BGP_GR_AWARE 2
84

    
85
/* For peer_gr_flags */
86
#define BGP_GRF_RESTART 0x80
87

    
88
/* For peer_gr_aflags */
89
#define BGP_GRF_FORWARDING 0x80
90

    
91

    
92
struct bgp_conn {
93
  struct bgp_proto *bgp;
94
  struct birdsock *sk;
95
  uint state;                                /* State of connection state machine */
96
  struct timer *connect_retry_timer;
97
  struct timer *hold_timer;
98
  struct timer *keepalive_timer;
99
  struct event *tx_ev;
100
  int packets_to_send;                        /* Bitmap of packet types to be sent */
101
  int notify_code, notify_subcode, notify_size;
102
  byte *notify_data;
103
  u32 advertised_as;                        /* Temporary value for AS number received */
104
  int start_state;                        /* protocol start_state snapshot when connection established */
105
  u8 peer_refresh_support;                /* Peer supports route refresh [RFC2918] */
106
  u8 peer_as4_support;                        /* Peer supports 4B AS numbers [RFC4893] */
107
  u8 peer_add_path;                        /* Peer supports ADD-PATH [draft] */
108
  u8 peer_enhanced_refresh_support;        /* Peer supports enhanced refresh [RFC7313] */
109
  u8 peer_gr_aware;
110
  u8 peer_gr_able;
111
  u16 peer_gr_time;
112
  u8 peer_gr_flags;
113
  u8 peer_gr_aflags;
114
  u8 peer_ext_messages_support;                /* Peer supports extended message length [draft] */
115
  unsigned hold_time, keepalive_time;        /* Times calculated from my and neighbor's requirements */
116
};
117

    
118
struct bgp_proto {
119
  struct proto p;
120
  struct bgp_config *cf;                /* Shortcut to BGP configuration */
121
  u32 local_as, remote_as;
122
  int start_state;                        /* Substates that partitions BS_START */
123
  u8 is_internal;                        /* Internal BGP connection (local_as == remote_as) */
124
  u8 as4_session;                        /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
125
  u8 add_path_rx;                        /* Session expects receive of ADD-PATH extended NLRI */
126
  u8 add_path_tx;                        /* Session expects transmit of ADD-PATH extended NLRI */
127
  u8 ext_messages;                        /* Session allows to use extended messages (both sides support it) */
128
  u32 local_id;                                /* BGP identifier of this router */
129
  u32 remote_id;                        /* BGP identifier of the neighbor */
130
  u32 rr_cluster_id;                        /* Route reflector cluster ID */
131
  int rr_client;                        /* Whether neighbor is RR client of me */
132
  int rs_client;                        /* Whether neighbor is RS client of me */
133
  u8 gr_ready;                                /* Neighbor could do graceful restart */
134
  u8 gr_active;                                /* Neighbor is doing graceful restart */
135
  u8 feed_state;                        /* Feed state (TX) for EoR, RR packets, see BFS_* */
136
  u8 load_state;                        /* Load state (RX) for EoR, RR packets, see BFS_* */
137
  struct bgp_conn *conn;                /* Connection we have established */
138
  struct bgp_conn outgoing_conn;        /* Outgoing connection we're working with */
139
  struct bgp_conn incoming_conn;        /* Incoming connection we have neither accepted nor rejected yet */
140
  struct object_lock *lock;                /* Lock for neighbor connection */
141
  struct neighbor *neigh;                /* Neighbor entry corresponding to remote ip, NULL if multihop */
142
  struct bfd_request *bfd_req;                /* BFD request, if BFD is used */
143
  ip_addr source_addr;                        /* Local address used as an advertised next hop */
144
  rtable *igp_table;                        /* Table used for recursive next hop lookups */
145
  struct event *event;                        /* Event for respawning and shutting process */
146
  struct timer *startup_timer;                /* Timer used to delay protocol startup due to previous errors (startup_delay) */
147
  struct timer *gr_timer;                /* Timer waiting for reestablishment after graceful restart */
148
  struct bgp_bucket **bucket_hash;        /* Hash table of attribute buckets */
149
  uint hash_size, hash_count, hash_limit;
150
  HASH(struct bgp_prefix) prefix_hash;        /* Prefixes to be sent */
151
  slab *prefix_slab;                        /* Slab holding prefix nodes */
152
  list bucket_queue;                        /* Queue of buckets to send */
153
  struct bgp_bucket *withdraw_bucket;        /* Withdrawn routes */
154
  unsigned startup_delay;                /* Time to delay protocol startup by due to errors */
155
  bird_clock_t last_proto_error;        /* Time of last error that leads to protocol stop */
156
  u8 last_error_class;                         /* Error class of last error */
157
  u32 last_error_code;                        /* Error code of last error. BGP protocol errors
158
                                           are encoded as (bgp_err_code << 16 | bgp_err_subcode) */
159
#ifdef IPV6
160
  byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */
161
  unsigned mp_reach_len, mp_unreach_len;
162
  ip_addr local_link;                        /* Link-level version of source_addr */
163
#endif
164
};
165

    
166
struct bgp_prefix {
167
  struct {
168
    ip_addr prefix;
169
    int pxlen;
170
  } n;
171
  u32 path_id;
172
  struct bgp_prefix *next;
173
  node bucket_node;                        /* Node in per-bucket list */
174
};
175

    
176
struct bgp_bucket {
177
  node send_node;                        /* Node in send queue */
178
  struct bgp_bucket *hash_next, *hash_prev;        /* Node in bucket hash table */
179
  unsigned hash;                        /* Hash over extended attributes */
180
  list prefixes;                        /* Prefixes in this buckets */
181
  ea_list eattrs[0];                        /* Per-bucket extended attributes */
182
};
183

    
184
#define BGP_PORT                179
185
#define BGP_VERSION                4
186
#define BGP_HEADER_LENGTH        19
187
#define BGP_MAX_MESSAGE_LENGTH        4096
188
#define BGP_MAX_EXT_MSG_LENGTH        65535
189
#define BGP_RX_BUFFER_SIZE        4096
190
#define BGP_TX_BUFFER_SIZE        4096
191
#define BGP_RX_BUFFER_EXT_SIZE        65535
192
#define BGP_TX_BUFFER_EXT_SIZE        65535
193

    
194
static inline int bgp_max_packet_length(struct bgp_proto *p)
195
{ return p->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
196

    
197
extern struct linpool *bgp_linpool;
198

    
199

    
200
void bgp_start_timer(struct timer *t, int value);
201
void bgp_check_config(struct bgp_config *c);
202
void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
203
void bgp_close_conn(struct bgp_conn *c);
204
void bgp_update_startup_delay(struct bgp_proto *p);
205
void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn);
206
void bgp_conn_enter_established_state(struct bgp_conn *conn);
207
void bgp_conn_enter_close_state(struct bgp_conn *conn);
208
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
209
void bgp_handle_graceful_restart(struct bgp_proto *p);
210
void bgp_graceful_restart_done(struct bgp_proto *p);
211
void bgp_refresh_begin(struct bgp_proto *p);
212
void bgp_refresh_end(struct bgp_proto *p);
213
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
214
void bgp_stop(struct bgp_proto *p, unsigned subcode);
215

    
216
struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
217
struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
218

    
219

    
220

    
221
#ifdef LOCAL_DEBUG
222
#define BGP_FORCE_DEBUG 1
223
#else
224
#define BGP_FORCE_DEBUG 0
225
#endif
226
#define BGP_TRACE(flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
227
        log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
228

    
229
#define BGP_TRACE_RL(rl, flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
230
        log_rl(rl, L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
231

    
232

    
233
/* attrs.c */
234

    
235
/* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
236
 * we store two addesses in it - a global address and a link local address.
237
 */
238
#ifdef IPV6
239
#define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
240
static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; }
241
#else
242
#define NEXT_HOP_LENGTH sizeof(ip_addr)
243
static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; }
244
#endif
245

    
246
void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
247
byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
248
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
249
int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
250
int bgp_rte_better(struct rte *, struct rte *);
251
int bgp_rte_mergable(rte *pri, rte *sec);
252
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
253
void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
254
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
255
void bgp_init_bucket_table(struct bgp_proto *);
256
void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
257
void bgp_init_prefix_table(struct bgp_proto *p, u32 order);
258
void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp);
259
uint bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
260
void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
261

    
262
inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
263
{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }
264

    
265
/* packets.c */
266

    
267
void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new);
268
void bgp_schedule_packet(struct bgp_conn *conn, int type);
269
void bgp_kick_tx(void *vconn);
270
void bgp_tx(struct birdsock *sk);
271
int bgp_rx(struct birdsock *sk, int size);
272
const char * bgp_error_dsc(unsigned code, unsigned subcode);
273
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
274

    
275
/* Packet types */
276

    
277
#define PKT_OPEN                0x01
278
#define PKT_UPDATE                0x02
279
#define PKT_NOTIFICATION        0x03
280
#define PKT_KEEPALIVE                0x04
281
#define PKT_ROUTE_REFRESH        0x05        /* [RFC2918] */
282
#define PKT_BEGIN_REFRESH        0x1e        /* Dummy type for BoRR packet [RFC7313] */
283
#define PKT_SCHEDULE_CLOSE        0x1f        /* Used internally to schedule socket close */
284

    
285
/* Attributes */
286

    
287
#define BAF_OPTIONAL                0x80
288
#define BAF_TRANSITIVE                0x40
289
#define BAF_PARTIAL                0x20
290
#define BAF_EXT_LEN                0x10
291

    
292
#define BA_ORIGIN                0x01        /* [RFC1771] */                /* WM */
293
#define BA_AS_PATH                0x02                                /* WM */
294
#define BA_NEXT_HOP                0x03                                /* WM */
295
#define BA_MULTI_EXIT_DISC        0x04                                /* ON */
296
#define BA_LOCAL_PREF                0x05                                /* WD */
297
#define BA_ATOMIC_AGGR                0x06                                /* WD */
298
#define BA_AGGREGATOR                0x07                                /* OT */
299
#define BA_COMMUNITY                0x08        /* [RFC1997] */                /* OT */
300
#define BA_ORIGINATOR_ID        0x09        /* [RFC1966] */                /* ON */
301
#define BA_CLUSTER_LIST                0x0a                                /* ON */
302
/* We don't support these: */
303
#define BA_DPA                        0x0b        /* ??? */
304
#define BA_ADVERTISER                0x0c        /* [RFC1863] */
305
#define BA_RCID_PATH                0x0d
306
#define BA_MP_REACH_NLRI        0x0e        /* [RFC2283] */
307
#define BA_MP_UNREACH_NLRI        0x0f
308
#define BA_EXT_COMMUNITY        0x10        /* [RFC4360] */
309
#define BA_AS4_PATH             0x11    /* [RFC4893] */
310
#define BA_AS4_AGGREGATOR       0x12
311
#define BA_LARGE_COMMUNITY        0x20        /* [draft-ietf-idr-large-community] */
312

    
313
/* BGP connection states */
314

    
315
#define BS_IDLE                        0
316
#define BS_CONNECT                1        /* Attempting to connect */
317
#define BS_ACTIVE                2        /* Waiting for connection retry & listening */
318
#define BS_OPENSENT                3
319
#define BS_OPENCONFIRM                4
320
#define BS_ESTABLISHED                5
321
#define BS_CLOSE                6        /* Used during transition to BS_IDLE */
322

    
323
#define BS_MAX                        7
324

    
325
/* BGP start states
326
 *
327
 * Used in PS_START for fine-grained specification of starting state.
328
 *
329
 * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
330
 * protocol done what is neccessary to start itself (like acquiring the lock),
331
 * it goes to BSS_CONNECT.  When some connection attempt failed because of
332
 * option or capability error, it goes to BSS_CONNECT_NOCAP.
333
 */
334

    
335
#define BSS_PREPARE                0        /* Used before ordinary BGP started, i. e. waiting for lock */
336
#define BSS_DELAY                1        /* Startup delay due to previous errors */
337
#define BSS_CONNECT                2        /* Ordinary BGP connecting */
338
#define BSS_CONNECT_NOCAP        3        /* Legacy BGP connecting (without capabilities) */
339

    
340

    
341
/* BGP feed states (TX)
342
 *
343
 * RFC 4724 specifies that an initial feed should end with End-of-RIB mark.
344
 *
345
 * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
346
 *
347
 * These states (stored in p->feed_state) are used to keep track of these
348
 * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
349
 * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
350
 * or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
351
 *
352
 * Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load
353
 * without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR
354
 * demarcation) is active, BFS_NONE is set.
355
 *
356
 * BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX)
357
 * with correspondent semantics (-, expecting End-of-RIB, expecting EoRR).
358
 */
359

    
360
#define BFS_NONE                0        /* No feed or original non-demarcated feed */
361
#define BFS_LOADING                1        /* Initial feed active, End-of-RIB planned */
362
#define BFS_LOADED                2        /* Loading done, End-of-RIB marker scheduled */
363
#define BFS_REFRESHING                3        /* Route refresh (introduced by BoRR) active */
364
#define BFS_REFRESHED                4        /* Refresh done, EoRR packet scheduled */
365

    
366

    
367
/* Error classes */
368

    
369
#define BE_NONE                        0
370
#define BE_MISC                        1        /* Miscellaneous error */
371
#define BE_SOCKET                2        /* Socket error */
372
#define BE_BGP_RX                3        /* BGP protocol error notification received */
373
#define BE_BGP_TX                4        /* BGP protocol error notification sent */
374
#define BE_AUTO_DOWN                5        /* Automatic shutdown */
375
#define BE_MAN_DOWN                6        /* Manual shutdown */
376

    
377
/* Misc error codes */
378

    
379
#define BEM_NEIGHBOR_LOST        1
380
#define BEM_INVALID_NEXT_HOP        2
381
#define BEM_INVALID_MD5                3        /* MD5 authentication kernel request failed (possibly not supported) */
382
#define BEM_NO_SOCKET                4
383
#define BEM_LINK_DOWN                5
384
#define BEM_BFD_DOWN                6
385
#define BEM_GRACEFUL_RESTART        7
386

    
387
/* Automatic shutdown error codes */
388

    
389
#define BEA_ROUTE_LIMIT_EXCEEDED 1
390

    
391
/* Well-known communities */
392

    
393
#define BGP_COMM_NO_EXPORT                0xffffff01        /* Don't export outside local AS / confed. */
394
#define BGP_COMM_NO_ADVERTISE                0xffffff02        /* Don't export at all */
395
#define BGP_COMM_NO_EXPORT_SUBCONFED        0xffffff03        /* NO_EXPORT even in local confederation */
396

    
397
/* Origins */
398

    
399
#define ORIGIN_IGP                0
400
#define ORIGIN_EGP                1
401
#define ORIGIN_INCOMPLETE        2
402

    
403
/* Address families */
404

    
405
#define BGP_AF_IPV4                1
406
#define BGP_AF_IPV6                2
407

    
408
#ifdef IPV6
409
#define BGP_AF BGP_AF_IPV6
410
#else
411
#define BGP_AF BGP_AF_IPV4
412
#endif
413

    
414
#endif