iofbirddaemon / nest / rtfib.c @ 62e64905
History  View  Annotate  Download (15.4 KB)
1 
/*


2 
* BIRD  Forwarding Information Base  Data Structures

3 
*

4 
* (c) 19982000 Martin Mares <mj@ucw.cz>

5 
*

6 
* Can be freely distributed and used under the terms of the GNU GPL.

7 
*/

8  
9 
/**

10 
* DOC: Forwarding Information Base

11 
*

12 
* FIB is a data structure designed for storage of routes indexed by their

13 
* network prefixes. It supports insertion, deletion, searching by prefix,

14 
* `routing' (in CIDR sense, that is searching for a longest prefix matching

15 
* a given IP address) and (which makes the structure very tricky to implement)

16 
* asynchronous reading, that is enumerating the contents of a FIB while other

17 
* modules add, modify or remove entries.

18 
*

19 
* Internally, each FIB is represented as a collection of nodes of type &fib_node

20 
* indexed using a sophisticated hashing mechanism.

21 
* We use twostage hashing where we calculate a 16bit primary hash key independent

22 
* on hash table size and then we just divide the primary keys modulo table size

23 
* to get a real hash key used for determining the bucket containing the node.

24 
* The lists of nodes in each bucket are sorted according to the primary hash

25 
* key, hence if we keep the total number of buckets to be a power of two,

26 
* rehashing of the structure keeps the relative order of the nodes.

27 
*

28 
* To get the asynchronous reading consistent over node deletions, we need to

29 
* keep a list of readers for each node. When a node gets deleted, its readers

30 
* are automatically moved to the next node in the table.

31 
*

32 
* Basic FIB operations are performed by functions defined by this module,

33 
* enumerating of FIB contents is accomplished by using the FIB_WALK() macro

34 
* or FIB_ITERATE_START() if you want to do it asynchronously.

35 
*/

36  
37 
#undef LOCAL_DEBUG

38  
39 
#include "nest/bird.h" 
40 
#include "nest/route.h" 
41 
#include "lib/string.h" 
42  
43 
#define HASH_DEF_ORDER 10 
44 
#define HASH_HI_MARK *4 
45 
#define HASH_HI_STEP 2 
46 
#define HASH_HI_MAX 16 
47 
#define HASH_LO_MARK /5 
48 
#define HASH_LO_STEP 2 
49 
#define HASH_LO_MIN 10 
50  
51  
52 
static void 
53 
fib_ht_alloc(struct fib *f)

54 
{ 
55 
f>hash_size = 1 << f>hash_order;

56 
f>hash_shift = 32  f>hash_order;

57 
if (f>hash_order > HASH_HI_MAX  HASH_HI_STEP)

58 
f>entries_max = ~0;

59 
else

60 
f>entries_max = f>hash_size HASH_HI_MARK; 
61 
if (f>hash_order < HASH_LO_MIN + HASH_LO_STEP)

62 
f>entries_min = 0;

63 
else

64 
f>entries_min = f>hash_size HASH_LO_MARK; 
65 
DBG("Allocating FIB hash of order %d: %d entries, %d low, %d high\n",

66 
f>hash_order, f>hash_size, f>entries_min, f>entries_max); 
67 
f>hash_table = mb_alloc(f>fib_pool, f>hash_size * sizeof(struct fib_node *)); 
68 
} 
69  
70 
static inline void 
71 
fib_ht_free(struct fib_node **h)

72 
{ 
73 
mb_free(h); 
74 
} 
75  
76  
77 
static u32

78 
fib_hash(struct fib *f, const net_addr *a); 
79  
80 
/**

81 
* fib_init  initialize a new FIB

82 
* @f: the FIB to be initialized (the structure itself being allocated by the caller)

83 
* @p: pool to allocate the nodes in

84 
* @node_size: node size to be used (each node consists of a standard header &fib_node

85 
* followed by user data)

86 
* @hash_order: initial hash order (a binary logarithm of hash table size), 0 to use default order

87 
* (recommended)

88 
* @init: pointer a function to be called to initialize a newly created node

89 
*

90 
* This function initializes a newly allocated FIB and prepares it for use.

91 
*/

92 
void

93 
fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init)

94 
{ 
95 
uint addr_length = net_addr_length[addr_type]; 
96  
97 
if (!hash_order)

98 
hash_order = HASH_DEF_ORDER; 
99 
f>fib_pool = p; 
100 
f>fib_slab = addr_length ? sl_new(p, node_size + addr_length) : NULL;

101 
f>addr_type = addr_type; 
102 
f>node_size = node_size; 
103 
f>node_offset = node_offset; 
104 
f>hash_order = hash_order; 
105 
fib_ht_alloc(f); 
106 
bzero(f>hash_table, f>hash_size * sizeof(struct fib_node *)); 
107 
f>entries = 0;

108 
f>entries_min = 0;

109 
f>init = init; 
110 
} 
111  
112 
static void 
113 
fib_rehash(struct fib *f, int step) 
114 
{ 
115 
unsigned old, new, oldn, newn, ni, nh;

116 
struct fib_node **n, *e, *x, **t, **m, **h;

117  
118 
old = f>hash_order; 
119 
oldn = f>hash_size; 
120 
new = old + step; 
121 
m = h = f>hash_table; 
122 
DBG("Rehashing FIB from order %d to %d\n", old, new);

123 
f>hash_order = new; 
124 
fib_ht_alloc(f); 
125 
t = n = f>hash_table; 
126 
newn = f>hash_size; 
127 
ni = 0;

128  
129 
while (oldn)

130 
{ 
131 
x = *h++; 
132 
while (e = x)

133 
{ 
134 
x = e>next; 
135 
nh = fib_hash(f, e>addr); 
136 
while (nh > ni)

137 
{ 
138 
*t = NULL;

139 
ni++; 
140 
t = ++n; 
141 
} 
142 
*t = e; 
143 
t = &e>next; 
144 
} 
145 
} 
146 
while (ni < newn)

147 
{ 
148 
*t = NULL;

149 
ni++; 
150 
t = ++n; 
151 
} 
152 
fib_ht_free(m); 
153 
} 
154  
155 
#define CAST(t) (const net_addr_##t *) 
156 
#define CAST2(t) (net_addr_##t *) 
157  
158 
#define FIB_HASH(f,a,t) (net_hash_##t(CAST(t) a) >> f>hash_shift) 
159  
160 
#define FIB_FIND(f,a,t) \

161 
({ \ 
162 
struct fib_node *e = f>hash_table[FIB_HASH(f, a, t)]; \

163 
while (e && !net_equal_##t(CAST(t) e>addr, CAST(t) a)) \ 
164 
e = e>next; \ 
165 
fib_node_to_user(f, e); \ 
166 
}) 
167  
168 
#define FIB_INSERT(f,a,e,t) \

169 
({ \ 
170 
u32 h = net_hash_##t(CAST(t) a); \ 
171 
struct fib_node **ee = f>hash_table + (h >> f>hash_shift); \

172 
struct fib_node *g; \

173 
\ 
174 
while ((g = *ee) && (net_hash_##t(CAST(t) g>addr) < h)) \ 
175 
ee = &g>next; \ 
176 
\ 
177 
net_copy_##t(CAST2(t) e>addr, CAST(t) a); \ 
178 
e>next = *ee; \ 
179 
*ee = e; \ 
180 
}) 
181  
182  
183 
static u32

184 
fib_hash(struct fib *f, const net_addr *a) 
185 
{ 
186 
ASSERT(f>addr_type == a>type); 
187  
188 
switch (f>addr_type)

189 
{ 
190 
case NET_IP4: return FIB_HASH(f, a, ip4); 
191 
case NET_IP6: return FIB_HASH(f, a, ip6); 
192 
case NET_VPN4: return FIB_HASH(f, a, vpn4); 
193 
case NET_VPN6: return FIB_HASH(f, a, vpn6); 
194 
case NET_ROA4: return FIB_HASH(f, a, roa4); 
195 
case NET_ROA6: return FIB_HASH(f, a, roa6); 
196 
case NET_FLOW4: return FIB_HASH(f, a, flow4); 
197 
case NET_FLOW6: return FIB_HASH(f, a, flow6); 
198 
default: bug("invalid type"); 
199 
} 
200 
} 
201  
202 
void *

203 
fib_get_chain(struct fib *f, const net_addr *a) 
204 
{ 
205 
ASSERT(f>addr_type == a>type); 
206  
207 
struct fib_node *e = f>hash_table[fib_hash(f, a)];

208 
return e;

209 
} 
210  
211 
/**

212 
* fib_find  search for FIB node by prefix

213 
* @f: FIB to search in

214 
* @n: network address

215 
*

216 
* Search for a FIB node corresponding to the given prefix, return

217 
* a pointer to it or %NULL if no such node exists.

218 
*/

219 
void *

220 
fib_find(struct fib *f, const net_addr *a) 
221 
{ 
222 
ASSERT(f>addr_type == a>type); 
223  
224 
switch (f>addr_type)

225 
{ 
226 
case NET_IP4: return FIB_FIND(f, a, ip4); 
227 
case NET_IP6: return FIB_FIND(f, a, ip6); 
228 
case NET_VPN4: return FIB_FIND(f, a, vpn4); 
229 
case NET_VPN6: return FIB_FIND(f, a, vpn6); 
230 
case NET_ROA4: return FIB_FIND(f, a, roa4); 
231 
case NET_ROA6: return FIB_FIND(f, a, roa6); 
232 
case NET_FLOW4: return FIB_FIND(f, a, flow4); 
233 
case NET_FLOW6: return FIB_FIND(f, a, flow6); 
234 
default: bug("invalid type"); 
235 
} 
236 
} 
237  
238 
static void 
239 
fib_insert(struct fib *f, const net_addr *a, struct fib_node *e) 
240 
{ 
241 
ASSERT(f>addr_type == a>type); 
242  
243 
switch (f>addr_type)

244 
{ 
245 
case NET_IP4: FIB_INSERT(f, a, e, ip4); return; 
246 
case NET_IP6: FIB_INSERT(f, a, e, ip6); return; 
247 
case NET_VPN4: FIB_INSERT(f, a, e, vpn4); return; 
248 
case NET_VPN6: FIB_INSERT(f, a, e, vpn6); return; 
249 
case NET_ROA4: FIB_INSERT(f, a, e, roa4); return; 
250 
case NET_ROA6: FIB_INSERT(f, a, e, roa6); return; 
251 
case NET_FLOW4: FIB_INSERT(f, a, e, flow4); return; 
252 
case NET_FLOW6: FIB_INSERT(f, a, e, flow6); return; 
253 
default: bug("invalid type"); 
254 
} 
255 
} 
256  
257  
258 
/**

259 
* fib_get  find or create a FIB node

260 
* @f: FIB to work with

261 
* @n: network address

262 
*

263 
* Search for a FIB node corresponding to the given prefix and

264 
* return a pointer to it. If no such node exists, create it.

265 
*/

266 
void *

267 
fib_get(struct fib *f, const net_addr *a) 
268 
{ 
269 
void *b = fib_find(f, a);

270 
if (b)

271 
return b;

272  
273 
if (f>fib_slab)

274 
b = sl_alloc(f>fib_slab); 
275 
else

276 
b = mb_alloc(f>fib_pool, f>node_size + a>length); 
277  
278 
struct fib_node *e = fib_user_to_node(f, b);

279 
e>readers = NULL;

280 
e>flags = 0;

281 
fib_insert(f, a, e); 
282  
283 
memset(b, 0, f>node_offset);

284 
if (f>init)

285 
f>init(b); 
286  
287 
if (f>entries++ > f>entries_max)

288 
fib_rehash(f, HASH_HI_STEP); 
289  
290 
return b;

291 
} 
292  
293 
static inline void * 
294 
fib_route_ip4(struct fib *f, net_addr_ip4 *n)

295 
{ 
296 
void *r;

297  
298 
while (!(r = fib_find(f, (net_addr *) n)) && (n>pxlen > 0)) 
299 
{ 
300 
n>pxlen; 
301 
ip4_clrbit(&n>prefix, n>pxlen); 
302 
} 
303  
304 
return r;

305 
} 
306  
307 
static inline void * 
308 
fib_route_ip6(struct fib *f, net_addr_ip6 *n)

309 
{ 
310 
void *r;

311  
312 
while (!(r = fib_find(f, (net_addr *) n)) && (n>pxlen > 0)) 
313 
{ 
314 
n>pxlen; 
315 
ip6_clrbit(&n>prefix, n>pxlen); 
316 
} 
317  
318 
return r;

319 
} 
320  
321 
/**

322 
* fib_route  CIDR routing lookup

323 
* @f: FIB to search in

324 
* @n: network address

325 
*

326 
* Search for a FIB node with longest prefix matching the given

327 
* network, that is a node which a CIDR router would use for routing

328 
* that network.

329 
*/

330 
void *

331 
fib_route(struct fib *f, const net_addr *n) 
332 
{ 
333 
ASSERT(f>addr_type == n>type); 
334  
335 
net_addr *n0 = alloca(n>length); 
336 
net_copy(n0, n); 
337  
338 
switch (n>type)

339 
{ 
340 
case NET_IP4:

341 
case NET_VPN4:

342 
case NET_ROA4:

343 
case NET_FLOW4:

344 
return fib_route_ip4(f, (net_addr_ip4 *) n0);

345  
346 
case NET_IP6:

347 
case NET_VPN6:

348 
case NET_ROA6:

349 
case NET_FLOW6:

350 
return fib_route_ip6(f, (net_addr_ip6 *) n0);

351  
352 
default:

353 
return NULL; 
354 
} 
355 
} 
356  
357  
358 
static inline void 
359 
fib_merge_readers(struct fib_iterator *i, struct fib_node *to) 
360 
{ 
361 
if (to)

362 
{ 
363 
struct fib_iterator *j = to>readers;

364 
if (!j)

365 
{ 
366 
/* Fast path */

367 
to>readers = i; 
368 
i>prev = (struct fib_iterator *) to;

369 
} 
370 
else

371 
{ 
372 
/* Really merging */

373 
while (j>next)

374 
j = j>next; 
375 
j>next = i; 
376 
i>prev = j; 
377 
} 
378 
while (i && i>node)

379 
{ 
380 
i>node = NULL;

381 
i = i>next; 
382 
} 
383 
} 
384 
else /* No more nodes */ 
385 
while (i)

386 
{ 
387 
i>prev = NULL;

388 
i = i>next; 
389 
} 
390 
} 
391  
392 
/**

393 
* fib_delete  delete a FIB node

394 
* @f: FIB to delete from

395 
* @E: entry to delete

396 
*

397 
* This function removes the given entry from the FIB,

398 
* taking care of all the asynchronous readers by shifting

399 
* them to the next node in the canonical reading order.

400 
*/

401 
void

402 
fib_delete(struct fib *f, void *E) 
403 
{ 
404 
struct fib_node *e = fib_user_to_node(f, E);

405 
uint h = fib_hash(f, e>addr); 
406 
struct fib_node **ee = f>hash_table + h;

407 
struct fib_iterator *it;

408  
409 
while (*ee)

410 
{ 
411 
if (*ee == e)

412 
{ 
413 
*ee = e>next; 
414 
if (it = e>readers)

415 
{ 
416 
struct fib_node *l = e>next;

417 
while (!l)

418 
{ 
419 
h++; 
420 
if (h >= f>hash_size)

421 
break;

422 
else

423 
l = f>hash_table[h]; 
424 
} 
425 
fib_merge_readers(it, l); 
426 
} 
427  
428 
if (f>fib_slab)

429 
sl_free(f>fib_slab, E); 
430 
else

431 
mb_free(E); 
432  
433 
if (f>entries < f>entries_min)

434 
fib_rehash(f, HASH_LO_STEP); 
435 
return;

436 
} 
437 
ee = &((*ee)>next); 
438 
} 
439 
bug("fib_delete() called for invalid node");

440 
} 
441  
442 
/**

443 
* fib_free  delete a FIB

444 
* @f: FIB to be deleted

445 
*

446 
* This function deletes a FIB  it frees all memory associated

447 
* with it and all its entries.

448 
*/

449 
void

450 
fib_free(struct fib *f)

451 
{ 
452 
fib_ht_free(f>hash_table); 
453 
rfree(f>fib_slab); 
454 
} 
455  
456 
void

457 
fit_init(struct fib_iterator *i, struct fib *f) 
458 
{ 
459 
unsigned h;

460 
struct fib_node *n;

461  
462 
i>efef = 0xff;

463 
for(h=0; h<f>hash_size; h++) 
464 
if (n = f>hash_table[h])

465 
{ 
466 
i>prev = (struct fib_iterator *) n;

467 
if (i>next = n>readers)

468 
i>next>prev = i; 
469 
n>readers = i; 
470 
i>node = n; 
471 
return;

472 
} 
473 
/* The fib is empty, nothing to do */

474 
i>prev = i>next = NULL;

475 
i>node = NULL;

476 
} 
477  
478 
struct fib_node *

479 
fit_get(struct fib *f, struct fib_iterator *i) 
480 
{ 
481 
struct fib_node *n;

482 
struct fib_iterator *j, *k;

483  
484 
if (!i>prev)

485 
{ 
486 
/* We are at the end */

487 
i>hash = ~0  1; 
488 
return NULL; 
489 
} 
490 
if (!(n = i>node))

491 
{ 
492 
/* No node info available, we are a victim of merging. Try harder. */

493 
j = i; 
494 
while (j>efef == 0xff) 
495 
j = j>prev; 
496 
n = (struct fib_node *) j;

497 
} 
498 
j = i>prev; 
499 
if (k = i>next)

500 
k>prev = j; 
501 
j>next = k; 
502 
i>hash = fib_hash(f, n>addr); 
503 
return n;

504 
} 
505  
506 
void

507 
fit_put(struct fib_iterator *i, struct fib_node *n) 
508 
{ 
509 
struct fib_iterator *j;

510  
511 
i>node = n; 
512 
if (j = n>readers)

513 
j>prev = i; 
514 
i>next = j; 
515 
n>readers = i; 
516 
i>prev = (struct fib_iterator *) n;

517 
} 
518  
519 
void

520 
fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos) 
521 
{ 
522 
if (n = n>next)

523 
goto found;

524  
525 
while (++hpos < f>hash_size)

526 
if (n = f>hash_table[hpos])

527 
goto found;

528  
529 
/* We are at the end */

530 
i>prev = i>next = NULL;

531 
i>node = NULL;

532 
return;

533  
534 
found:

535 
fit_put(i, n); 
536 
} 
537  
538 
#ifdef DEBUGGING

539  
540 
/**

541 
* fib_check  audit a FIB

542 
* @f: FIB to be checked

543 
*

544 
* This debugging function audits a FIB by checking its internal consistency.

545 
* Use when you suspect somebody of corrupting innocent data structures.

546 
*/

547 
void

548 
fib_check(struct fib *f)

549 
{ 
550 
#if 0

551 
uint i, ec, lo, nulls;

552 

553 
ec = 0;

554 
for(i=0; i<f>hash_size; i++)

555 
{

556 
struct fib_node *n;

557 
lo = 0;

558 
for(n=f>hash_table[i]; n; n=n>next)

559 
{

560 
struct fib_iterator *j, *j0;

561 
uint h0 = ipa_hash(n>prefix);

562 
if (h0 < lo)

563 
bug("fib_check: discord in hash chains");

564 
lo = h0;

565 
if ((h0 >> f>hash_shift) != i)

566 
bug("fib_check: mishashed %x>%x (order %d)", h0, i, f>hash_order);

567 
j0 = (struct fib_iterator *) n;

568 
nulls = 0;

569 
for(j=n>readers; j; j=j>next)

570 
{

571 
if (j>prev != j0)

572 
bug("fib_check: iterator>prev mismatch");

573 
j0 = j;

574 
if (!j>node)

575 
nulls++;

576 
else if (nulls)

577 
bug("fib_check: iterator nullified");

578 
else if (j>node != n)

579 
bug("fib_check: iterator>node mismatch");

580 
}

581 
ec++;

582 
}

583 
}

584 
if (ec != f>entries)

585 
bug("fib_check: invalid entry count (%d != %d)", ec, f>entries);

586 
#endif

587 
return;

588 
} 
589  
590 
/*

591 
int

592 
fib_histogram(struct fib *f)

593 
{

594 
log(L_WARN "Histogram dump start %d %d", f>hash_size, f>entries);

595 

596 
int i, j;

597 
struct fib_node *e;

598 

599 
for (i = 0; i < f>hash_size; i++)

600 
{

601 
j = 0;

602 
for (e = f>hash_table[i]; e != NULL; e = e>next)

603 
j++;

604 
if (j > 0)

605 
log(L_WARN "Histogram line %d: %d", i, j);

606 
}

607 

608 
log(L_WARN "Histogram dump end");

609 
}

610 
*/

611  
612 
#endif

613  
614 
#ifdef TEST

615  
616 
#include "lib/resource.h" 
617  
618 
struct fib f;

619  
620 
void dump(char *m) 
621 
{ 
622 
uint i; 
623  
624 
debug("%s ... order=%d, size=%d, entries=%d\n", m, f.hash_order, f.hash_size, f.hash_size);

625 
for(i=0; i<f.hash_size; i++) 
626 
{ 
627 
struct fib_node *n;

628 
struct fib_iterator *j;

629 
for(n=f.hash_table[i]; n; n=n>next)

630 
{ 
631 
debug("%04x %08x %p %N", i, ipa_hash(n>prefix), n, n>addr);

632 
for(j=n>readers; j; j=j>next)

633 
debug(" %p[%p]", j, j>node);

634 
debug("\n");

635 
} 
636 
} 
637 
fib_check(&f); 
638 
debug("\n");

639 
} 
640  
641 
void init(struct fib_node *n) 
642 
{ 
643 
} 
644  
645 
int main(void) 
646 
{ 
647 
struct fib_node *n;

648 
struct fib_iterator i, j;

649 
ip_addr a; 
650 
int c;

651  
652 
log_init_debug(NULL);

653 
resource_init(); 
654 
fib_init(&f, &root_pool, sizeof(struct fib_node), 4, init); 
655 
dump("init");

656  
657 
a = ipa_from_u32(0x01020304); n = fib_get(&f, &a, 32); 
658 
a = ipa_from_u32(0x02030405); n = fib_get(&f, &a, 32); 
659 
a = ipa_from_u32(0x03040506); n = fib_get(&f, &a, 32); 
660 
a = ipa_from_u32(0x00000000); n = fib_get(&f, &a, 32); 
661 
a = ipa_from_u32(0x00000c01); n = fib_get(&f, &a, 32); 
662 
a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32); 
663 
dump("fill");

664  
665 
fit_init(&i, &f); 
666 
dump("iter init");

667  
668 
fib_rehash(&f, 1);

669 
dump("rehash up");

670  
671 
fib_rehash(&f, 1);

672 
dump("rehash down");

673  
674 
next:

675 
c = 0;

676 
FIB_ITERATE_START(&f, &i, z) 
677 
{ 
678 
if (c)

679 
{ 
680 
FIB_ITERATE_PUT(&i, z); 
681 
dump("iter");

682 
goto next;

683 
} 
684 
c = 1;

685 
debug("got %p\n", z);

686 
} 
687 
FIB_ITERATE_END(z); 
688 
dump("iter end");

689  
690 
fit_init(&i, &f); 
691 
fit_init(&j, &f); 
692 
dump("iter init 2");

693  
694 
n = fit_get(&f, &i); 
695 
dump("iter step 2");

696  
697 
fit_put(&i, n>next); 
698 
dump("iter step 3");

699  
700 
a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32); 
701 
fib_delete(&f, n); 
702 
dump("iter step 3");

703  
704 
return 0; 
705 
} 
706  
707 
#endif
