Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / bgp.h @ ae80a2de

History | View | Annotate | Download (16.1 KB)

1
/*
2
 *        BIRD -- The Border Gateway Protocol
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#ifndef _BIRD_BGP_H_
10
#define _BIRD_BGP_H_
11

    
12
#include <stdint.h>
13
#include "nest/route.h"
14
#include "nest/bfd.h"
15
#include "lib/hash.h"
16

    
17
struct linpool;
18
struct eattr;
19

    
20
struct bgp_config {
21
  struct proto_config c;
22
  u32 local_as, remote_as;
23
  ip_addr remote_ip;
24
  ip_addr source_addr;                        /* Source address to use */
25
  struct iface *iface;                        /* Interface for link-local addresses */
26
  u16 remote_port;                         /* Neighbor destination port */
27
  int multihop;                                /* Number of hops if multihop */
28
  int ttl_security;                        /* Enable TTL security [RFC5082] */
29
  int next_hop_self;                        /* Always set next hop to local IP address */
30
  int next_hop_keep;                        /* Do not touch next hop attribute */
31
  int missing_lladdr;                        /* What we will do when we don' know link-local addr, see MLL_* */
32
  int gw_mode;                                /* How we compute route gateway from next_hop attr, see GW_* */
33
  int compare_path_lengths;                /* Use path lengths when selecting best route */
34
  int med_metric;                        /* Compare MULTI_EXIT_DISC even between routes from differen ASes */
35
  int igp_metric;                        /* Use IGP metrics when selecting best route */
36
  int prefer_older;                        /* Prefer older routes according to RFC 5004 */
37
  int deterministic_med;                /* Use more complicated algo to have strict RFC 4271 MED comparison */
38
  u32 default_local_pref;                /* Default value for LOCAL_PREF attribute */
39
  u32 default_med;                        /* Default value for MULTI_EXIT_DISC attribute */
40
  int capabilities;                        /* Enable capability handshake [RFC3392] */
41
  int enable_refresh;                        /* Enable local support for route refresh [RFC2918] */
42
  int enable_as4;                        /* Enable local support for 4B AS numbers [RFC4893] */
43
  u32 rr_cluster_id;                        /* Route reflector cluster ID, if different from local ID */
44
  int rr_client;                        /* Whether neighbor is RR client of me */
45
  int rs_client;                        /* Whether neighbor is RS client of me */
46
  int advertise_ipv4;                        /* Whether we should add IPv4 capability advertisement to OPEN message */
47
  int passive;                                /* Do not initiate outgoing connection */
48
  int interpret_communities;                /* Hardwired handling of well-known communities */
49
  int secondary;                        /* Accept also non-best routes (i.e. RA_ACCEPTED) */
50
  int add_path;                                /* Use ADD-PATH extension [draft] */
51
  int allow_local_as;                        /* Allow that number of local ASNs in incoming AS_PATHs */
52
  int gr_mode;                                /* Graceful restart mode (BGP_GR_*) */
53
  unsigned gr_time;                        /* Graceful restart timeout */
54
  unsigned connect_delay_time;                /* Minimum delay between connect attempts */
55
  unsigned connect_retry_time;                /* Timeout for connect attempts */
56
  unsigned hold_time, initial_hold_time;
57
  unsigned keepalive_time;
58
  unsigned error_amnesia_time;                /* Errors are forgotten after */
59
  unsigned error_delay_time_min;        /* Time to wait after an error is detected */
60
  unsigned error_delay_time_max;
61
  unsigned disable_after_error;                /* Disable the protocol when error is detected */
62

    
63
  char *password;                        /* Password used for MD5 authentication */
64
  struct rtable_config *igp_table;        /* Table used for recursive next hop lookups */
65
  int check_link;                        /* Use iface link state for liveness detection */
66
  int bfd;                                /* Use BFD for liveness detection */
67
};
68

    
69
#define MLL_SELF 1
70
#define MLL_DROP 2
71
#define MLL_IGNORE 3
72

    
73
#define GW_DIRECT 1
74
#define GW_RECURSIVE 2
75

    
76
#define ADD_PATH_RX 1
77
#define ADD_PATH_TX 2
78
#define ADD_PATH_FULL 3
79

    
80
#define BGP_GR_ABLE 1
81
#define BGP_GR_AWARE 2
82

    
83
/* For peer_gr_flags */
84
#define BGP_GRF_RESTART 0x80
85

    
86
/* For peer_gr_aflags */
87
#define BGP_GRF_FORWARDING 0x80
88

    
89

    
90
struct bgp_conn {
91
  struct bgp_proto *bgp;
92
  struct birdsock *sk;
93
  uint state;                                /* State of connection state machine */
94
  struct timer *connect_retry_timer;
95
  struct timer *hold_timer;
96
  struct timer *keepalive_timer;
97
  struct event *tx_ev;
98
  int packets_to_send;                        /* Bitmap of packet types to be sent */
99
  int notify_code, notify_subcode, notify_size;
100
  byte *notify_data;
101
  u32 advertised_as;                        /* Temporary value for AS number received */
102
  int start_state;                        /* protocol start_state snapshot when connection established */
103
  u8 peer_refresh_support;                /* Peer supports route refresh [RFC2918] */
104
  u8 peer_as4_support;                        /* Peer supports 4B AS numbers [RFC4893] */
105
  u8 peer_add_path;                        /* Peer supports ADD-PATH [draft] */
106
  u8 peer_enhanced_refresh_support;        /* Peer supports enhanced refresh [RFC7313] */
107
  u8 peer_gr_aware;
108
  u8 peer_gr_able;
109
  u16 peer_gr_time;
110
  u8 peer_gr_flags;
111
  u8 peer_gr_aflags;
112
  unsigned hold_time, keepalive_time;        /* Times calculated from my and neighbor's requirements */
113
};
114

    
115
struct bgp_proto {
116
  struct proto p;
117
  struct bgp_config *cf;                /* Shortcut to BGP configuration */
118
  u32 local_as, remote_as;
119
  int start_state;                        /* Substates that partitions BS_START */
120
  u8 is_internal;                        /* Internal BGP connection (local_as == remote_as) */
121
  u8 as4_session;                        /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
122
  u8 add_path_rx;                        /* Session expects receive of ADD-PATH extended NLRI */
123
  u8 add_path_tx;                        /* Session expects transmit of ADD-PATH extended NLRI */
124
  u32 local_id;                                /* BGP identifier of this router */
125
  u32 remote_id;                        /* BGP identifier of the neighbor */
126
  u32 rr_cluster_id;                        /* Route reflector cluster ID */
127
  int rr_client;                        /* Whether neighbor is RR client of me */
128
  int rs_client;                        /* Whether neighbor is RS client of me */
129
  u8 gr_ready;                                /* Neighbor could do graceful restart */
130
  u8 gr_active;                                /* Neighbor is doing graceful restart */
131
  u8 feed_state;                        /* Feed state (TX) for EoR, RR packets, see BFS_* */
132
  u8 load_state;                        /* Load state (RX) for EoR, RR packets, see BFS_* */
133
  struct bgp_conn *conn;                /* Connection we have established */
134
  struct bgp_conn outgoing_conn;        /* Outgoing connection we're working with */
135
  struct bgp_conn incoming_conn;        /* Incoming connection we have neither accepted nor rejected yet */
136
  struct object_lock *lock;                /* Lock for neighbor connection */
137
  struct neighbor *neigh;                /* Neighbor entry corresponding to remote ip, NULL if multihop */
138
  struct bfd_request *bfd_req;                /* BFD request, if BFD is used */
139
  ip_addr source_addr;                        /* Local address used as an advertised next hop */
140
  rtable *igp_table;                        /* Table used for recursive next hop lookups */
141
  struct event *event;                        /* Event for respawning and shutting process */
142
  struct timer *startup_timer;                /* Timer used to delay protocol startup due to previous errors (startup_delay) */
143
  struct timer *gr_timer;                /* Timer waiting for reestablishment after graceful restart */
144
  struct bgp_bucket **bucket_hash;        /* Hash table of attribute buckets */
145
  uint hash_size, hash_count, hash_limit;
146
  HASH(struct bgp_prefix) prefix_hash;        /* Prefixes to be sent */
147
  slab *prefix_slab;                        /* Slab holding prefix nodes */
148
  list bucket_queue;                        /* Queue of buckets to send */
149
  struct bgp_bucket *withdraw_bucket;        /* Withdrawn routes */
150
  unsigned startup_delay;                /* Time to delay protocol startup by due to errors */
151
  bird_clock_t last_proto_error;        /* Time of last error that leads to protocol stop */
152
  u8 last_error_class;                         /* Error class of last error */
153
  u32 last_error_code;                        /* Error code of last error. BGP protocol errors
154
                                           are encoded as (bgp_err_code << 16 | bgp_err_subcode) */
155
#ifdef IPV6
156
  byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */
157
  unsigned mp_reach_len, mp_unreach_len;
158
  ip_addr local_link;                        /* Link-level version of source_addr */
159
#endif
160
};
161

    
162
struct bgp_prefix {
163
  struct {
164
    ip_addr prefix;
165
    int pxlen;
166
  } n;
167
  u32 path_id;
168
  struct bgp_prefix *next;
169
  node bucket_node;                        /* Node in per-bucket list */
170
};
171

    
172
struct bgp_bucket {
173
  node send_node;                        /* Node in send queue */
174
  struct bgp_bucket *hash_next, *hash_prev;        /* Node in bucket hash table */
175
  unsigned hash;                        /* Hash over extended attributes */
176
  list prefixes;                        /* Prefixes in this buckets */
177
  ea_list eattrs[0];                        /* Per-bucket extended attributes */
178
};
179

    
180
#define BGP_PORT                179
181
#define BGP_VERSION                4
182
#define BGP_HEADER_LENGTH        19
183
#define BGP_MAX_PACKET_LENGTH        4096
184
#define BGP_RX_BUFFER_SIZE        4096
185
#define BGP_TX_BUFFER_SIZE        BGP_MAX_PACKET_LENGTH
186

    
187
extern struct linpool *bgp_linpool;
188

    
189

    
190
void bgp_start_timer(struct timer *t, int value);
191
void bgp_check_config(struct bgp_config *c);
192
void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
193
void bgp_close_conn(struct bgp_conn *c);
194
void bgp_update_startup_delay(struct bgp_proto *p);
195
void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn);
196
void bgp_conn_enter_established_state(struct bgp_conn *conn);
197
void bgp_conn_enter_close_state(struct bgp_conn *conn);
198
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
199
void bgp_handle_graceful_restart(struct bgp_proto *p);
200
void bgp_graceful_restart_done(struct bgp_proto *p);
201
void bgp_refresh_begin(struct bgp_proto *p);
202
void bgp_refresh_end(struct bgp_proto *p);
203
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
204
void bgp_stop(struct bgp_proto *p, unsigned subcode);
205

    
206
struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
207
struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
208

    
209

    
210

    
211
#ifdef LOCAL_DEBUG
212
#define BGP_FORCE_DEBUG 1
213
#else
214
#define BGP_FORCE_DEBUG 0
215
#endif
216
#define BGP_TRACE(flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
217
        log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
218

    
219
#define BGP_TRACE_RL(rl, flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
220
        log_rl(rl, L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
221

    
222

    
223
/* attrs.c */
224

    
225
/* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
226
 * we store two addesses in it - a global address and a link local address.
227
 */
228
#ifdef IPV6
229
#define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
230
static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; }
231
#else
232
#define NEXT_HOP_LENGTH sizeof(ip_addr)
233
static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; }
234
#endif
235

    
236
void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
237
byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
238
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
239
int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
240
int bgp_rte_better(struct rte *, struct rte *);
241
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
242
void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
243
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
244
void bgp_init_bucket_table(struct bgp_proto *);
245
void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
246
void bgp_init_prefix_table(struct bgp_proto *p, u32 order);
247
void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp);
248
uint bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
249
void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
250

    
251
inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
252
{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }
253

    
254
/* packets.c */
255

    
256
void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new);
257
void bgp_schedule_packet(struct bgp_conn *conn, int type);
258
void bgp_kick_tx(void *vconn);
259
void bgp_tx(struct birdsock *sk);
260
int bgp_rx(struct birdsock *sk, int size);
261
const char * bgp_error_dsc(unsigned code, unsigned subcode);
262
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
263

    
264
/* Packet types */
265

    
266
#define PKT_OPEN                0x01
267
#define PKT_UPDATE                0x02
268
#define PKT_NOTIFICATION        0x03
269
#define PKT_KEEPALIVE                0x04
270
#define PKT_ROUTE_REFRESH        0x05        /* [RFC2918] */
271
#define PKT_BEGIN_REFRESH        0x1e        /* Dummy type for BoRR packet [RFC7313] */
272
#define PKT_SCHEDULE_CLOSE        0x1f        /* Used internally to schedule socket close */
273

    
274
/* Attributes */
275

    
276
#define BAF_OPTIONAL                0x80
277
#define BAF_TRANSITIVE                0x40
278
#define BAF_PARTIAL                0x20
279
#define BAF_EXT_LEN                0x10
280

    
281
#define BA_ORIGIN                0x01        /* [RFC1771] */                /* WM */
282
#define BA_AS_PATH                0x02                                /* WM */
283
#define BA_NEXT_HOP                0x03                                /* WM */
284
#define BA_MULTI_EXIT_DISC        0x04                                /* ON */
285
#define BA_LOCAL_PREF                0x05                                /* WD */
286
#define BA_ATOMIC_AGGR                0x06                                /* WD */
287
#define BA_AGGREGATOR                0x07                                /* OT */
288
#define BA_COMMUNITY                0x08        /* [RFC1997] */                /* OT */
289
#define BA_ORIGINATOR_ID        0x09        /* [RFC1966] */                /* ON */
290
#define BA_CLUSTER_LIST                0x0a                                /* ON */
291
/* We don't support these: */
292
#define BA_DPA                        0x0b        /* ??? */
293
#define BA_ADVERTISER                0x0c        /* [RFC1863] */
294
#define BA_RCID_PATH                0x0d
295
#define BA_MP_REACH_NLRI        0x0e        /* [RFC2283] */
296
#define BA_MP_UNREACH_NLRI        0x0f
297
#define BA_EXT_COMMUNITY        0x10        /* [RFC4360] */
298
#define BA_AS4_PATH             0x11    /* [RFC4893] */
299
#define BA_AS4_AGGREGATOR       0x12
300

    
301
/* BGP connection states */
302

    
303
#define BS_IDLE                        0
304
#define BS_CONNECT                1        /* Attempting to connect */
305
#define BS_ACTIVE                2        /* Waiting for connection retry & listening */
306
#define BS_OPENSENT                3
307
#define BS_OPENCONFIRM                4
308
#define BS_ESTABLISHED                5
309
#define BS_CLOSE                6        /* Used during transition to BS_IDLE */
310

    
311
#define BS_MAX                        7
312

    
313
/* BGP start states
314
 *
315
 * Used in PS_START for fine-grained specification of starting state.
316
 *
317
 * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
318
 * protocol done what is neccessary to start itself (like acquiring the lock),
319
 * it goes to BSS_CONNECT.  When some connection attempt failed because of
320
 * option or capability error, it goes to BSS_CONNECT_NOCAP.
321
 */
322

    
323
#define BSS_PREPARE                0        /* Used before ordinary BGP started, i. e. waiting for lock */
324
#define BSS_DELAY                1        /* Startup delay due to previous errors */
325
#define BSS_CONNECT                2        /* Ordinary BGP connecting */
326
#define BSS_CONNECT_NOCAP        3        /* Legacy BGP connecting (without capabilities) */
327

    
328

    
329
/* BGP feed states (TX)
330
 *
331
 * RFC 4724 specifies that an initial feed should end with End-of-RIB mark.
332
 *
333
 * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
334
 *
335
 * These states (stored in p->feed_state) are used to keep track of these
336
 * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
337
 * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
338
 * or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
339
 *
340
 * Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load
341
 * without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR
342
 * demarcation) is active, BFS_NONE is set.
343
 *
344
 * BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX)
345
 * with correspondent semantics (-, expecting End-of-RIB, expecting EoRR).
346
 */
347

    
348
#define BFS_NONE                0        /* No feed or original non-demarcated feed */
349
#define BFS_LOADING                1        /* Initial feed active, End-of-RIB planned */
350
#define BFS_LOADED                2        /* Loading done, End-of-RIB marker scheduled */
351
#define BFS_REFRESHING                3        /* Route refresh (introduced by BoRR) active */
352
#define BFS_REFRESHED                4        /* Refresh done, EoRR packet scheduled */
353

    
354

    
355
/* Error classes */
356

    
357
#define BE_NONE                        0
358
#define BE_MISC                        1        /* Miscellaneous error */
359
#define BE_SOCKET                2        /* Socket error */
360
#define BE_BGP_RX                3        /* BGP protocol error notification received */
361
#define BE_BGP_TX                4        /* BGP protocol error notification sent */
362
#define BE_AUTO_DOWN                5        /* Automatic shutdown */
363
#define BE_MAN_DOWN                6        /* Manual shutdown */
364

    
365
/* Misc error codes */
366

    
367
#define BEM_NEIGHBOR_LOST        1
368
#define BEM_INVALID_NEXT_HOP        2
369
#define BEM_INVALID_MD5                3        /* MD5 authentication kernel request failed (possibly not supported) */
370
#define BEM_NO_SOCKET                4
371
#define BEM_LINK_DOWN                5
372
#define BEM_BFD_DOWN                6
373
#define BEM_GRACEFUL_RESTART        7
374

    
375
/* Automatic shutdown error codes */
376

    
377
#define BEA_ROUTE_LIMIT_EXCEEDED 1
378

    
379
/* Well-known communities */
380

    
381
#define BGP_COMM_NO_EXPORT                0xffffff01        /* Don't export outside local AS / confed. */
382
#define BGP_COMM_NO_ADVERTISE                0xffffff02        /* Don't export at all */
383
#define BGP_COMM_NO_EXPORT_SUBCONFED        0xffffff03        /* NO_EXPORT even in local confederation */
384

    
385
/* Origins */
386

    
387
#define ORIGIN_IGP                0
388
#define ORIGIN_EGP                1
389
#define ORIGIN_INCOMPLETE        2
390

    
391
/* Address families */
392

    
393
#define BGP_AF_IPV4                1
394
#define BGP_AF_IPV6                2
395

    
396
#ifdef IPV6
397
#define BGP_AF BGP_AF_IPV6
398
#else
399
#define BGP_AF BGP_AF_IPV4
400
#endif
401

    
402
#endif