Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ 04632fd7

History | View | Annotate | Download (64.8 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/cli.h"
37
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
45

    
46
pool *rt_table_pool;
47

    
48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

    
51
static list routing_tables;
52

    
53
static void rt_format_via(rte *e, byte *via);
54
static void rt_free_hostcache(rtable *tab);
55
static void rt_notify_hostcache(rtable *tab, net *net);
56
static void rt_update_hostcache(rtable *tab);
57
static void rt_next_hop_update(rtable *tab);
58
static inline int rt_prune_table(rtable *tab);
59
static inline void rt_schedule_gc(rtable *tab);
60
static inline void rt_schedule_prune(rtable *tab);
61

    
62

    
63
static inline struct ea_list *
64
make_tmp_attrs(struct rte *rt, struct linpool *pool)
65
{
66
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
67
  mta = rt->attrs->src->proto->make_tmp_attrs;
68
  return mta ? mta(rt, rte_update_pool) : NULL;
69
}
70

    
71

    
72
/* Like fib_route(), but skips empty net entries */
73
static inline void *
74
net_route_ip4(struct fib *f, net_addr_ip4 *n)
75
{
76
  net *r;
77

    
78
  while (r = fib_find(f, (net_addr *) n),
79
         !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
80
  {
81
    n->pxlen--;
82
    ip4_clrbit(&n->prefix, n->pxlen);
83
  }
84

    
85
  return r;
86
}
87

    
88
static inline void *
89
net_route_ip6(struct fib *f, net_addr_ip6 *n)
90
{
91
  net *r;
92

    
93
  while (r = fib_find(f, (net_addr *) n),
94
         !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
95
  {
96
    n->pxlen--;
97
    ip6_clrbit(&n->prefix, n->pxlen);
98
  }
99

    
100
  return r;
101
}
102

    
103
void *
104
net_route(rtable *tab, const net_addr *n)
105
{
106
  ASSERT(f->addr_type == n->type);
107

    
108
  net_addr *n0 = alloca(n->length);
109
  net_copy(n0, n);
110

    
111
  switch (n->type)
112
  {
113
  case NET_IP4:
114
  case NET_VPN4:
115
    return net_route_ip4(&tab->fib, (net_addr_ip4 *) n0);
116

    
117
  case NET_IP6:
118
  case NET_VPN6:
119
    return net_route_ip6(&tab->fib, (net_addr_ip6 *) n0);
120

    
121
  default:
122
    return NULL;
123
  }
124
}
125

    
126
/**
127
 * rte_find - find a route
128
 * @net: network node
129
 * @src: route source
130
 *
131
 * The rte_find() function returns a route for destination @net
132
 * which is from route source @src.
133
 */
134
rte *
135
rte_find(net *net, struct rte_src *src)
136
{
137
  rte *e = net->routes;
138

    
139
  while (e && e->attrs->src != src)
140
    e = e->next;
141
  return e;
142
}
143

    
144
/**
145
 * rte_get_temp - get a temporary &rte
146
 * @a: attributes to assign to the new route (a &rta; in case it's
147
 * un-cached, rte_update() will create a cached copy automatically)
148
 *
149
 * Create a temporary &rte and bind it with the attributes @a.
150
 * Also set route preference to the default preference set for
151
 * the protocol.
152
 */
153
rte *
154
rte_get_temp(rta *a)
155
{
156
  rte *e = sl_alloc(rte_slab);
157

    
158
  e->attrs = a;
159
  e->flags = 0;
160
  e->pref = a->src->proto->preference;
161
  return e;
162
}
163

    
164
rte *
165
rte_do_cow(rte *r)
166
{
167
  rte *e = sl_alloc(rte_slab);
168

    
169
  memcpy(e, r, sizeof(rte));
170
  e->attrs = rta_clone(r->attrs);
171
  e->flags = 0;
172
  return e;
173
}
174

    
175
/**
176
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
177
 * @r: a route entry to be copied
178
 * @lp: a linpool from which to allocate &rta
179
 *
180
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
181
 * modification. There are three possibilities: First, both &rte and &rta are
182
 * private copies, in that case they are returned unchanged.  Second, &rte is
183
 * private copy, but &rta is cached, in that case &rta is duplicated using
184
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
185
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
186
 *
187
 * Note that in the second case, cached &rta loses one reference, while private
188
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
189
 * nexthops, ...) with it. To work properly, original shared &rta should have
190
 * another reference during the life of created private copy.
191
 *
192
 * Result: a pointer to the new writable &rte with writable &rta.
193
 */
194
rte *
195
rte_cow_rta(rte *r, linpool *lp)
196
{
197
  if (!rta_is_cached(r->attrs))
198
    return r;
199

    
200
  rte *e = rte_cow(r);
201
  rta *a = rta_do_cow(r->attrs, lp);
202
  rta_free(e->attrs);
203
  e->attrs = a;
204
  return e;
205
}
206

    
207
static int                                /* Actually better or at least as good as */
208
rte_better(rte *new, rte *old)
209
{
210
  int (*better)(rte *, rte *);
211

    
212
  if (!rte_is_valid(old))
213
    return 1;
214
  if (!rte_is_valid(new))
215
    return 0;
216

    
217
  if (new->pref > old->pref)
218
    return 1;
219
  if (new->pref < old->pref)
220
    return 0;
221
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
222
    {
223
      /*
224
       *  If the user has configured protocol preferences, so that two different protocols
225
       *  have the same preference, try to break the tie by comparing addresses. Not too
226
       *  useful, but keeps the ordering of routes unambiguous.
227
       */
228
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
229
    }
230
  if (better = new->attrs->src->proto->rte_better)
231
    return better(new, old);
232
  return 0;
233
}
234

    
235
static int
236
rte_mergable(rte *pri, rte *sec)
237
{
238
  int (*mergable)(rte *, rte *);
239

    
240
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
241
    return 0;
242

    
243
  if (pri->pref != sec->pref)
244
    return 0;
245

    
246
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
247
    return 0;
248

    
249
  if (mergable = pri->attrs->src->proto->rte_mergable)
250
    return mergable(pri, sec);
251

    
252
  return 0;
253
}
254

    
255
static void
256
rte_trace(struct proto *p, rte *e, int dir, char *msg)
257
{
258
  byte via[IPA_MAX_TEXT_LENGTH+32];
259

    
260
  rt_format_via(e, via);
261
  log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, via);
262
}
263

    
264
static inline void
265
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
266
{
267
  if (p->debug & flag)
268
    rte_trace(p, e, '>', msg);
269
}
270

    
271
static inline void
272
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
273
{
274
  if (p->debug & flag)
275
    rte_trace(p, e, '<', msg);
276
}
277

    
278
static rte *
279
export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
280
{
281
  struct proto *p = ah->proto;
282
  struct filter *filter = ah->out_filter;
283
  struct proto_stats *stats = ah->stats;
284
  ea_list *tmpb = NULL;
285
  rte *rt;
286
  int v;
287

    
288
  rt = rt0;
289
  *rt_free = NULL;
290

    
291
  if (!tmpa)
292
    tmpa = &tmpb;
293

    
294
  *tmpa = make_tmp_attrs(rt, rte_update_pool);
295

    
296
  v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
297
  if (v < 0)
298
    {
299
      if (silent)
300
        goto reject;
301

    
302
      stats->exp_updates_rejected++;
303
      if (v == RIC_REJECT)
304
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
305
      goto reject;
306
    }
307
  if (v > 0)
308
    {
309
      if (!silent)
310
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
311
      goto accept;
312
    }
313

    
314
  v = filter && ((filter == FILTER_REJECT) ||
315
                 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
316
  if (v)
317
    {
318
      if (silent)
319
        goto reject;
320

    
321
      stats->exp_updates_filtered++;
322
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
323
      goto reject;
324
    }
325

    
326
 accept:
327
  if (rt != rt0)
328
    *rt_free = rt;
329
  return rt;
330

    
331
 reject:
332
  /* Discard temporary rte */
333
  if (rt != rt0)
334
    rte_free(rt);
335
  return NULL;
336
}
337

    
338
static void
339
do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
340
{
341
  struct proto *p = ah->proto;
342
  struct proto_stats *stats = ah->stats;
343

    
344

    
345
  /*
346
   * First, apply export limit.
347
   *
348
   * Export route limits has several problems. Because exp_routes
349
   * counter is reset before refeed, we don't really know whether
350
   * limit is breached and whether the update is new or not. Therefore
351
   * the number of really exported routes may exceed the limit
352
   * temporarily (routes exported before and new routes in refeed).
353
   *
354
   * Minor advantage is that if the limit is decreased and refeed is
355
   * requested, the number of exported routes really decrease.
356
   *
357
   * Second problem is that with export limits, we don't know whether
358
   * old was really exported (it might be blocked by limit). When a
359
   * withdraw is exported, we announce it even when the previous
360
   * update was blocked. This is not a big issue, but the same problem
361
   * is in updating exp_routes counter. Therefore, to be consistent in
362
   * increases and decreases of exp_routes, we count exported routes
363
   * regardless of blocking by limits.
364
   *
365
   * Similar problem is in handling updates - when a new route is
366
   * received and blocking is active, the route would be blocked, but
367
   * when an update for the route will be received later, the update
368
   * would be propagated (as old != NULL). Therefore, we have to block
369
   * also non-new updates (contrary to import blocking).
370
   */
371

    
372
  struct proto_limit *l = ah->out_limit;
373
  if (l && new)
374
    {
375
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
376
        proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
377

    
378
      if (l->state == PLS_BLOCKED)
379
        {
380
          stats->exp_routes++;        /* see note above */
381
          stats->exp_updates_rejected++;
382
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
383
          new = NULL;
384

    
385
          if (!old)
386
            return;
387
        }
388
    }
389

    
390

    
391
  if (new)
392
    stats->exp_updates_accepted++;
393
  else
394
    stats->exp_withdraws_accepted++;
395

    
396
  /* Hack: We do not decrease exp_routes during refeed, we instead
397
     reset exp_routes at the start of refeed. */
398
  if (new)
399
    stats->exp_routes++;
400
  if (old && !refeed)
401
    stats->exp_routes--;
402

    
403
  if (p->debug & D_ROUTES)
404
    {
405
      if (new && old)
406
        rte_trace_out(D_ROUTES, p, new, "replaced");
407
      else if (new)
408
        rte_trace_out(D_ROUTES, p, new, "added");
409
      else if (old)
410
        rte_trace_out(D_ROUTES, p, old, "removed");
411
    }
412
  if (!new)
413
    p->rt_notify(p, ah->table, net, NULL, old, NULL);
414
  else if (tmpa)
415
    {
416
      ea_list *t = tmpa;
417
      while (t->next)
418
        t = t->next;
419
      t->next = new->attrs->eattrs;
420
      p->rt_notify(p, ah->table, net, new, old, tmpa);
421
      t->next = NULL;
422
    }
423
  else
424
    p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
425
}
426

    
427
static void
428
rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed)
429
{
430
  struct proto *p = ah->proto;
431
  struct proto_stats *stats = ah->stats;
432

    
433
  rte *new = new0;
434
  rte *old = old0;
435
  rte *new_free = NULL;
436
  rte *old_free = NULL;
437
  ea_list *tmpa = NULL;
438

    
439
  if (new)
440
    stats->exp_updates_received++;
441
  else
442
    stats->exp_withdraws_received++;
443

    
444
  /*
445
   * This is a tricky part - we don't know whether route 'old' was
446
   * exported to protocol 'p' or was filtered by the export filter.
447
   * We try to run the export filter to know this to have a correct
448
   * value in 'old' argument of rte_update (and proper filter value)
449
   *
450
   * FIXME - this is broken because 'configure soft' may change
451
   * filters but keep routes. Refeed is expected to be called after
452
   * change of the filters and with old == new, therefore we do not
453
   * even try to run the filter on an old route, This may lead to
454
   * 'spurious withdraws' but ensure that there are no 'missing
455
   * withdraws'.
456
   *
457
   * This is not completely safe as there is a window between
458
   * reconfiguration and the end of refeed - if a newly filtered
459
   * route disappears during this period, proper withdraw is not
460
   * sent (because old would be also filtered) and the route is
461
   * not refeeded (because it disappeared before that).
462
   */
463

    
464
  if (new)
465
    new = export_filter(ah, new, &new_free, &tmpa, 0);
466

    
467
  if (old && !refeed)
468
    old = export_filter(ah, old, &old_free, NULL, 1);
469

    
470
  if (!new && !old)
471
  {
472
    /*
473
     * As mentioned above, 'old' value may be incorrect in some race conditions.
474
     * We generally ignore it with the exception of withdraw to pipe protocol.
475
     * In that case we rather propagate unfiltered withdraws regardless of
476
     * export filters to ensure that when a protocol is flushed, its routes are
477
     * removed from all tables. Possible spurious unfiltered withdraws are not
478
     * problem here as they are ignored if there is no corresponding route at
479
     * the other end of the pipe. We directly call rt_notify() hook instead of
480
     * do_rt_notify() to avoid logging and stat counters.
481
     */
482

    
483
#ifdef CONFIG_PIPE
484
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
485
      p->rt_notify(p, ah->table, net, NULL, old0, NULL);
486
#endif
487

    
488
    return;
489
  }
490

    
491
  do_rt_notify(ah, net, new, old, tmpa, refeed);
492

    
493
  /* Discard temporary rte's */
494
  if (new_free)
495
    rte_free(new_free);
496
  if (old_free)
497
    rte_free(old_free);
498
}
499

    
500
static void
501
rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
502
{
503
  // struct proto *p = ah->proto;
504
  struct proto_stats *stats = ah->stats;
505

    
506
  rte *r;
507
  rte *new_best = NULL;
508
  rte *old_best = NULL;
509
  rte *new_free = NULL;
510
  rte *old_free = NULL;
511
  ea_list *tmpa = NULL;
512

    
513
  /* Used to track whether we met old_changed position. If before_old is NULL
514
     old_changed was the first and we met it implicitly before current best route. */
515
  int old_meet = old_changed && !before_old;
516

    
517
  /* Note that before_old is either NULL or valid (not rejected) route.
518
     If old_changed is valid, before_old have to be too. If old changed route
519
     was not valid, caller must use NULL for both old_changed and before_old. */
520

    
521
  if (new_changed)
522
    stats->exp_updates_received++;
523
  else
524
    stats->exp_withdraws_received++;
525

    
526
  /* First, find the new_best route - first accepted by filters */
527
  for (r=net->routes; rte_is_valid(r); r=r->next)
528
    {
529
      if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
530
        break;
531

    
532
      /* Note if we walked around the position of old_changed route */
533
      if (r == before_old)
534
        old_meet = 1;
535
    }
536

    
537
  /* 
538
   * Second, handle the feed case. That means we do not care for
539
   * old_best. It is NULL for feed, and the new_best for refeed. 
540
   * For refeed, there is a hack similar to one in rt_notify_basic()
541
   * to ensure withdraws in case of changed filters
542
   */
543
  if (feed)
544
    {
545
      if (feed == 2)        /* refeed */
546
        old_best = new_best ? new_best :
547
          (rte_is_valid(net->routes) ? net->routes : NULL);
548
      else
549
        old_best = NULL;
550

    
551
      if (!new_best && !old_best)
552
        return;
553

    
554
      goto found;
555
    }
556

    
557
  /*
558
   * Now, we find the old_best route. Generally, it is the same as the
559
   * new_best, unless new_best is the same as new_changed or
560
   * old_changed is accepted before new_best.
561
   *
562
   * There are four cases:
563
   *
564
   * - We would find and accept old_changed before new_best, therefore
565
   *   old_changed is old_best. In remaining cases we suppose this
566
   *   is not true.
567
   *
568
   * - We found no new_best, therefore there is also no old_best and
569
   *   we ignore this withdraw.
570
   *
571
   * - We found new_best different than new_changed, therefore
572
   *   old_best is the same as new_best and we ignore this update.
573
   *
574
   * - We found new_best the same as new_changed, therefore it cannot
575
   *   be old_best and we have to continue search for old_best.
576
   */
577

    
578
  /* First case */
579
  if (old_meet)
580
    if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
581
      goto found;
582

    
583
  /* Second case */
584
  if (!new_best)
585
    return;
586

    
587
  /* Third case, we use r instead of new_best, because export_filter() could change it */
588
  if (r != new_changed)
589
    {
590
      if (new_free)
591
        rte_free(new_free);
592
      return;
593
    }
594

    
595
  /* Fourth case */
596
  for (r=r->next; rte_is_valid(r); r=r->next)
597
    {
598
      if (old_best = export_filter(ah, r, &old_free, NULL, 1))
599
        goto found;
600

    
601
      if (r == before_old)
602
        if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
603
          goto found;
604
    }
605

    
606
  /* Implicitly, old_best is NULL and new_best is non-NULL */
607

    
608
 found:
609
  do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));
610

    
611
  /* Discard temporary rte's */
612
  if (new_free)
613
    rte_free(new_free);
614
  if (old_free)
615
    rte_free(old_free);
616
}
617

    
618

    
619
static struct mpnh *
620
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
621
{
622
  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
623
  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
624
  return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
625
}
626

    
627
rte *
628
rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
629
{
630
  // struct proto *p = ah->proto;
631
  struct mpnh *nhs = NULL;
632
  rte *best0, *best, *rt0, *rt, *tmp;
633

    
634
  best0 = net->routes;
635
  *rt_free = NULL;
636

    
637
  if (!rte_is_valid(best0))
638
    return NULL;
639

    
640
  best = export_filter(ah, best0, rt_free, tmpa, silent);
641

    
642
  if (!best || !rte_is_reachable(best))
643
    return best;
644

    
645
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
646
  {
647
    if (!rte_mergable(best0, rt0))
648
      continue;
649

    
650
    rt = export_filter(ah, rt0, &tmp, NULL, 1);
651

    
652
    if (!rt)
653
      continue;
654

    
655
    if (rte_is_reachable(rt))
656
      nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
657

    
658
    if (tmp)
659
      rte_free(tmp);
660
  }
661

    
662
  if (nhs)
663
  {
664
    nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
665

    
666
    if (nhs->next)
667
    {
668
      best = rte_cow_rta(best, rte_update_pool);
669
      best->attrs->dest = RTD_MULTIPATH;
670
      best->attrs->nexthops = nhs;
671
    }
672
  }
673

    
674
  if (best != best0)
675
    *rt_free = best;
676

    
677
  return best;
678
}
679

    
680

    
681
static void
682
rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
683
                 rte *new_best, rte*old_best, int refeed)
684
{
685
  // struct proto *p = ah->proto;
686

    
687
  rte *new_best_free = NULL;
688
  rte *old_best_free = NULL;
689
  rte *new_changed_free = NULL;
690
  rte *old_changed_free = NULL;
691
  ea_list *tmpa = NULL;
692

    
693
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
694

    
695
  /* This check should be done by the caller */
696
  if (!new_best && !old_best)
697
    return;
698

    
699
  /* Check whether the change is relevant to the merged route */
700
  if ((new_best == old_best) && !refeed)
701
  {
702
    new_changed = rte_mergable(new_best, new_changed) ?
703
      export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
704

    
705
    old_changed = rte_mergable(old_best, old_changed) ?
706
      export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
707

    
708
    if (!new_changed && !old_changed)
709
      return;
710
  }
711

    
712
  if (new_best)
713
    ah->stats->exp_updates_received++;
714
  else
715
    ah->stats->exp_withdraws_received++;
716

    
717
  /* Prepare new merged route */
718
  if (new_best)
719
    new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
720

    
721
  /* Prepare old merged route (without proper merged next hops) */
722
  /* There are some issues with running filter on old route - see rt_notify_basic() */
723
  if (old_best && !refeed)
724
    old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
725

    
726
  if (new_best || old_best)
727
    do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
728

    
729
  /* Discard temporary rte's */
730
  if (new_best_free)
731
    rte_free(new_best_free);
732
  if (old_best_free)
733
    rte_free(old_best_free);
734
  if (new_changed_free)
735
    rte_free(new_changed_free);
736
  if (old_changed_free)
737
    rte_free(old_changed_free);
738
}
739

    
740

    
741
/**
742
 * rte_announce - announce a routing table change
743
 * @tab: table the route has been added to
744
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
745
 * @net: network in question
746
 * @new: the new route to be announced
747
 * @old: the previous route for the same network
748
 *
749
 * This function gets a routing table update and announces it
750
 * to all protocols that acccepts given type of route announcement
751
 * and are connected to the same table by their announcement hooks.
752
 *
753
 * Route announcement of type RA_OPTIMAL si generated when optimal
754
 * route (in routing table @tab) changes. In that case @old stores the
755
 * old optimal route.
756
 *
757
 * Route announcement of type RA_ANY si generated when any route (in
758
 * routing table @tab) changes In that case @old stores the old route
759
 * from the same protocol.
760
 *
761
 * For each appropriate protocol, we first call its import_control()
762
 * hook which performs basic checks on the route (each protocol has a
763
 * right to veto or force accept of the route before any filter is
764
 * asked) and adds default values of attributes specific to the new
765
 * protocol (metrics, tags etc.).  Then it consults the protocol's
766
 * export filter and if it accepts the route, the rt_notify() hook of
767
 * the protocol gets called.
768
 */
769
static void
770
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
771
             rte *new_best, rte *old_best, rte *before_old)
772
{
773
  if (!rte_is_valid(new))
774
    new = NULL;
775

    
776
  if (!rte_is_valid(old))
777
    old = before_old = NULL;
778

    
779
  if (!rte_is_valid(new_best))
780
    new_best = NULL;
781

    
782
  if (!rte_is_valid(old_best))
783
    old_best = NULL;
784

    
785
  if (!old && !new)
786
    return;
787

    
788
  if (type == RA_OPTIMAL)
789
    {
790
      if (new)
791
        new->attrs->src->proto->stats.pref_routes++;
792
      if (old)
793
        old->attrs->src->proto->stats.pref_routes--;
794

    
795
      if (tab->hostcache)
796
        rt_notify_hostcache(tab, net);
797
    }
798

    
799
  struct announce_hook *a;
800
  WALK_LIST(a, tab->hooks)
801
    {
802
      ASSERT(a->proto->export_state != ES_DOWN);
803
      if (a->proto->accept_ra_types == type)
804
        if (type == RA_ACCEPTED)
805
          rt_notify_accepted(a, net, new, old, before_old, 0);
806
        else if (type == RA_MERGED)
807
          rt_notify_merged(a, net, new, old, new_best, old_best, 0);
808
        else
809
          rt_notify_basic(a, net, new, old, 0);
810
    }
811
}
812

    
813
static inline int
814
rte_validate(rte *e)
815
{
816
  int c;
817
  net *n = e->net;
818

    
819
  // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
820
  if (!net_validate(n->n.addr))
821
  {
822
    log(L_WARN "Ignoring bogus prefix %N received via %s",
823
        n->n.addr, e->sender->proto->name);
824
    return 0;
825
  }
826

    
827
  c = net_classify(n->n.addr);
828
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
829
  {
830
    log(L_WARN "Ignoring bogus route %N received via %s",
831
        n->n.addr, e->sender->proto->name);
832
    return 0;
833
  }
834

    
835
  return 1;
836
}
837

    
838
/**
839
 * rte_free - delete a &rte
840
 * @e: &rte to be deleted
841
 *
842
 * rte_free() deletes the given &rte from the routing table it's linked to.
843
 */
844
void
845
rte_free(rte *e)
846
{
847
  if (rta_is_cached(e->attrs))
848
    rta_free(e->attrs);
849
  sl_free(rte_slab, e);
850
}
851

    
852
static inline void
853
rte_free_quick(rte *e)
854
{
855
  rta_free(e->attrs);
856
  sl_free(rte_slab, e);
857
}
858

    
859
static int
860
rte_same(rte *x, rte *y)
861
{
862
  return
863
    x->attrs == y->attrs &&
864
    x->flags == y->flags &&
865
    x->pflags == y->pflags &&
866
    x->pref == y->pref &&
867
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
868
}
869

    
870
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
871

    
872
static void
873
rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
874
{
875
  struct proto *p = ah->proto;
876
  struct rtable *table = ah->table;
877
  struct proto_stats *stats = ah->stats;
878
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
879
  rte *before_old = NULL;
880
  rte *old_best = net->routes;
881
  rte *old = NULL;
882
  rte **k;
883

    
884
  k = &net->routes;                        /* Find and remove original route from the same protocol */
885
  while (old = *k)
886
    {
887
      if (old->attrs->src == src)
888
        {
889
          /* If there is the same route in the routing table but from
890
           * a different sender, then there are two paths from the
891
           * source protocol to this routing table through transparent
892
           * pipes, which is not allowed.
893
           *
894
           * We log that and ignore the route. If it is withdraw, we
895
           * ignore it completely (there might be 'spurious withdraws',
896
           * see FIXME in do_rte_announce())
897
           */
898
          if (old->sender->proto != p)
899
            {
900
              if (new)
901
                {
902
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
903
                      net->n.addr, table->name);
904
                  rte_free_quick(new);
905
                }
906
              return;
907
            }
908

    
909
          if (new && rte_same(old, new))
910
            {
911
              /* No changes, ignore the new route */
912

    
913
              if (!rte_is_filtered(new))
914
                {
915
                  stats->imp_updates_ignored++;
916
                  rte_trace_in(D_ROUTES, p, new, "ignored");
917
                }
918

    
919
              rte_free_quick(new);
920
              return;
921
            }
922
          *k = old->next;
923
          break;
924
        }
925
      k = &old->next;
926
      before_old = old;
927
    }
928

    
929
  if (!old)
930
    before_old = NULL;
931

    
932
  if (!old && !new)
933
    {
934
      stats->imp_withdraws_ignored++;
935
      return;
936
    }
937

    
938
  int new_ok = rte_is_ok(new);
939
  int old_ok = rte_is_ok(old);
940

    
941
  struct proto_limit *l = ah->rx_limit;
942
  if (l && !old && new)
943
    {
944
      u32 all_routes = stats->imp_routes + stats->filt_routes;
945

    
946
      if (all_routes >= l->limit)
947
        proto_notify_limit(ah, l, PLD_RX, all_routes);
948

    
949
      if (l->state == PLS_BLOCKED)
950
        {
951
          /* In receive limit the situation is simple, old is NULL so
952
             we just free new and exit like nothing happened */
953

    
954
          stats->imp_updates_ignored++;
955
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
956
          rte_free_quick(new);
957
          return;
958
        }
959
    }
960

    
961
  l = ah->in_limit;
962
  if (l && !old_ok && new_ok)
963
    {
964
      if (stats->imp_routes >= l->limit)
965
        proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
966

    
967
      if (l->state == PLS_BLOCKED)
968
        {
969
          /* In import limit the situation is more complicated. We
970
             shouldn't just drop the route, we should handle it like
971
             it was filtered. We also have to continue the route
972
             processing if old or new is non-NULL, but we should exit
973
             if both are NULL as this case is probably assumed to be
974
             already handled. */
975

    
976
          stats->imp_updates_ignored++;
977
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
978

    
979
          if (ah->in_keep_filtered)
980
            new->flags |= REF_FILTERED;
981
          else
982
            { rte_free_quick(new); new = NULL; }
983

    
984
          /* Note that old && !new could be possible when
985
             ah->in_keep_filtered changed in the recent past. */
986

    
987
          if (!old && !new)
988
            return;
989

    
990
          new_ok = 0;
991
          goto skip_stats1;
992
        }
993
    }
994

    
995
  if (new_ok)
996
    stats->imp_updates_accepted++;
997
  else if (old_ok)
998
    stats->imp_withdraws_accepted++;
999
  else
1000
    stats->imp_withdraws_ignored++;
1001

    
1002
 skip_stats1:
1003

    
1004
  if (new)
1005
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1006
  if (old)
1007
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1008

    
1009
  if (table->config->sorted)
1010
    {
1011
      /* If routes are sorted, just insert new route to appropriate position */
1012
      if (new)
1013
        {
1014
          if (before_old && !rte_better(new, before_old))
1015
            k = &before_old->next;
1016
          else
1017
            k = &net->routes;
1018

    
1019
          for (; *k; k=&(*k)->next)
1020
            if (rte_better(new, *k))
1021
              break;
1022

    
1023
          new->next = *k;
1024
          *k = new;
1025
        }
1026
    }
1027
  else
1028
    {
1029
      /* If routes are not sorted, find the best route and move it on
1030
         the first position. There are several optimized cases. */
1031

    
1032
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1033
        goto do_recalculate;
1034

    
1035
      if (new && rte_better(new, old_best))
1036
        {
1037
          /* The first case - the new route is cleary optimal,
1038
             we link it at the first position */
1039

    
1040
          new->next = net->routes;
1041
          net->routes = new;
1042
        }
1043
      else if (old == old_best)
1044
        {
1045
          /* The second case - the old best route disappeared, we add the
1046
             new route (if we have any) to the list (we don't care about
1047
             position) and then we elect the new optimal route and relink
1048
             that route at the first position and announce it. New optimal
1049
             route might be NULL if there is no more routes */
1050

    
1051
        do_recalculate:
1052
          /* Add the new route to the list */
1053
          if (new)
1054
            {
1055
              new->next = net->routes;
1056
              net->routes = new;
1057
            }
1058

    
1059
          /* Find a new optimal route (if there is any) */
1060
          if (net->routes)
1061
            {
1062
              rte **bp = &net->routes;
1063
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1064
                if (rte_better(*k, *bp))
1065
                  bp = k;
1066

    
1067
              /* And relink it */
1068
              rte *best = *bp;
1069
              *bp = best->next;
1070
              best->next = net->routes;
1071
              net->routes = best;
1072
            }
1073
        }
1074
      else if (new)
1075
        {
1076
          /* The third case - the new route is not better than the old
1077
             best route (therefore old_best != NULL) and the old best
1078
             route was not removed (therefore old_best == net->routes).
1079
             We just link the new route after the old best route. */
1080

    
1081
          ASSERT(net->routes != NULL);
1082
          new->next = net->routes->next;
1083
          net->routes->next = new;
1084
        }
1085
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1086
    }
1087

    
1088
  if (new)
1089
    new->lastmod = now;
1090

    
1091
  /* Log the route change */
1092
  if (p->debug & D_ROUTES)
1093
    {
1094
      if (new_ok)
1095
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1096
      else if (old_ok)
1097
        {
1098
          if (old != old_best)
1099
            rte_trace(p, old, '>', "removed");
1100
          else if (rte_is_ok(net->routes))
1101
            rte_trace(p, old, '>', "removed [replaced]");
1102
          else
1103
            rte_trace(p, old, '>', "removed [sole]");
1104
        }
1105
    }
1106

    
1107
  /* Propagate the route change */
1108
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1109
  if (net->routes != old_best)
1110
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1111
  if (table->config->sorted)
1112
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1113
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1114

    
1115
  if (!net->routes &&
1116
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1117
      (table->gc_time + table->config->gc_min_time <= now))
1118
    rt_schedule_gc(table);
1119

    
1120
  if (old_ok && p->rte_remove)
1121
    p->rte_remove(net, old);
1122
  if (new_ok && p->rte_insert)
1123
    p->rte_insert(net, new);
1124

    
1125
  if (old)
1126
    rte_free_quick(old);
1127
}
1128

    
1129
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1130

    
1131
static inline void
1132
rte_update_lock(void)
1133
{
1134
  rte_update_nest_cnt++;
1135
}
1136

    
1137
static inline void
1138
rte_update_unlock(void)
1139
{
1140
  if (!--rte_update_nest_cnt)
1141
    lp_flush(rte_update_pool);
1142
}
1143

    
1144
static inline void
1145
rte_hide_dummy_routes(net *net, rte **dummy)
1146
{
1147
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1148
  {
1149
    *dummy = net->routes;
1150
    net->routes = (*dummy)->next;
1151
  }
1152
}
1153

    
1154
static inline void
1155
rte_unhide_dummy_routes(net *net, rte **dummy)
1156
{
1157
  if (*dummy)
1158
  {
1159
    (*dummy)->next = net->routes;
1160
    net->routes = *dummy;
1161
  }
1162
}
1163

    
1164
/**
1165
 * rte_update - enter a new update to a routing table
1166
 * @table: table to be updated
1167
 * @ah: pointer to table announce hook
1168
 * @net: network node
1169
 * @p: protocol submitting the update
1170
 * @src: protocol originating the update
1171
 * @new: a &rte representing the new route or %NULL for route removal.
1172
 *
1173
 * This function is called by the routing protocols whenever they discover
1174
 * a new route or wish to update/remove an existing route. The right announcement
1175
 * sequence is to build route attributes first (either un-cached with @aflags set
1176
 * to zero or a cached one using rta_lookup(); in this case please note that
1177
 * you need to increase the use count of the attributes yourself by calling
1178
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1179
 * the appropriate data and finally submit the new &rte by calling rte_update().
1180
 *
1181
 * @src specifies the protocol that originally created the route and the meaning
1182
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1183
 * same value as @new->attrs->proto. @p specifies the protocol that called
1184
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1185
 * stores @p in @new->sender;
1186
 *
1187
 * When rte_update() gets any route, it automatically validates it (checks,
1188
 * whether the network and next hop address are valid IP addresses and also
1189
 * whether a normal routing protocol doesn't try to smuggle a host or link
1190
 * scope route to the table), converts all protocol dependent attributes stored
1191
 * in the &rte to temporary extended attributes, consults import filters of the
1192
 * protocol to see if the route should be accepted and/or its attributes modified,
1193
 * stores the temporary attributes back to the &rte.
1194
 *
1195
 * Now, having a "public" version of the route, we
1196
 * automatically find any old route defined by the protocol @src
1197
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1198
 * recalculate the optimal route for this destination and finally broadcast
1199
 * the change (if any) to all routing protocols by calling rte_announce().
1200
 *
1201
 * All memory used for attribute lists and other temporary allocations is taken
1202
 * from a special linear pool @rte_update_pool and freed when rte_update()
1203
 * finishes.
1204
 */
1205

    
1206
void
1207
rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
1208
{
1209
  struct proto *p = ah->proto;
1210
  struct proto_stats *stats = ah->stats;
1211
  struct filter *filter = ah->in_filter;
1212
  ea_list *tmpa = NULL;
1213
  rte *dummy = NULL;
1214

    
1215
  rte_update_lock();
1216
  if (new)
1217
    {
1218
      new->sender = ah;
1219

    
1220
      stats->imp_updates_received++;
1221
      if (!rte_validate(new))
1222
        {
1223
          rte_trace_in(D_FILTERS, p, new, "invalid");
1224
          stats->imp_updates_invalid++;
1225
          goto drop;
1226
        }
1227

    
1228
      if (filter == FILTER_REJECT)
1229
        {
1230
          stats->imp_updates_filtered++;
1231
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1232

    
1233
          if (! ah->in_keep_filtered)
1234
            goto drop;
1235

    
1236
          /* new is a private copy, i could modify it */
1237
          new->flags |= REF_FILTERED;
1238
        }
1239
      else
1240
        {
1241
          tmpa = make_tmp_attrs(new, rte_update_pool);
1242
          if (filter && (filter != FILTER_REJECT))
1243
            {
1244
              ea_list *old_tmpa = tmpa;
1245
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1246
              if (fr > F_ACCEPT)
1247
                {
1248
                  stats->imp_updates_filtered++;
1249
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1250

    
1251
                  if (! ah->in_keep_filtered)
1252
                    goto drop;
1253

    
1254
                  new->flags |= REF_FILTERED;
1255
                }
1256
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1257
                src->proto->store_tmp_attrs(new, tmpa);
1258
            }
1259
        }
1260
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1261
        new->attrs = rta_lookup(new->attrs);
1262
      new->flags |= REF_COW;
1263
    }
1264
  else
1265
    {
1266
      stats->imp_withdraws_received++;
1267

    
1268
      if (!net || !src)
1269
        {
1270
          stats->imp_withdraws_ignored++;
1271
          rte_update_unlock();
1272
          return;
1273
        }
1274
    }
1275

    
1276
 recalc:
1277
  rte_hide_dummy_routes(net, &dummy);
1278
  rte_recalculate(ah, net, new, src);
1279
  rte_unhide_dummy_routes(net, &dummy);
1280
  rte_update_unlock();
1281
  return;
1282

    
1283
 drop:
1284
  rte_free(new);
1285
  new = NULL;
1286
  goto recalc;
1287
}
1288

    
1289
/* Independent call to rte_announce(), used from next hop
1290
   recalculation, outside of rte_update(). new must be non-NULL */
1291
static inline void 
1292
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1293
               rte *new_best, rte *old_best)
1294
{
1295
  rte_update_lock();
1296
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1297
  rte_update_unlock();
1298
}
1299

    
1300
void
1301
rte_discard(rtable *t, rte *old)        /* Non-filtered route deletion, used during garbage collection */
1302
{
1303
  rte_update_lock();
1304
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1305
  rte_update_unlock();
1306
}
1307

    
1308
/* Check rtable for best route to given net whether it would be exported do p */
1309
int
1310
rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1311
{
1312
  net *n = net_find(t, a);
1313
  rte *rt = n ? n->routes : NULL;
1314

    
1315
  if (!rte_is_valid(rt))
1316
    return 0;
1317

    
1318
  rte_update_lock();
1319

    
1320
  /* Rest is stripped down export_filter() */
1321
  ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1322
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1323
  if (v == RIC_PROCESS)
1324
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1325

    
1326
   /* Discard temporary rte */
1327
  if (rt != n->routes)
1328
    rte_free(rt);
1329

    
1330
  rte_update_unlock();
1331

    
1332
  return v > 0;
1333
}
1334

    
1335

    
1336
/**
1337
 * rt_refresh_begin - start a refresh cycle
1338
 * @t: related routing table
1339
 * @ah: related announce hook 
1340
 *
1341
 * This function starts a refresh cycle for given routing table and announce
1342
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1343
 * routes to the routing table (by rte_update()). After that, all protocol
1344
 * routes (more precisely routes with @ah as @sender) not sent during the
1345
 * refresh cycle but still in the table from the past are pruned. This is
1346
 * implemented by marking all related routes as stale by REF_STALE flag in
1347
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1348
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1349
 */
1350
void
1351
rt_refresh_begin(rtable *t, struct announce_hook *ah)
1352
{
1353
  FIB_WALK(&t->fib, net, n)
1354
    {
1355
      rte *e;
1356
      for (e = n->routes; e; e = e->next)
1357
        if (e->sender == ah)
1358
          e->flags |= REF_STALE;
1359
    }
1360
  FIB_WALK_END;
1361
}
1362

    
1363
/**
1364
 * rt_refresh_end - end a refresh cycle
1365
 * @t: related routing table
1366
 * @ah: related announce hook 
1367
 *
1368
 * This function starts a refresh cycle for given routing table and announce
1369
 * hook. See rt_refresh_begin() for description of refresh cycles.
1370
 */
1371
void
1372
rt_refresh_end(rtable *t, struct announce_hook *ah)
1373
{
1374
  int prune = 0;
1375

    
1376
  FIB_WALK(&t->fib, net, n)
1377
    {
1378
      rte *e;
1379
      for (e = n->routes; e; e = e->next)
1380
        if ((e->sender == ah) && (e->flags & REF_STALE))
1381
          {
1382
            e->flags |= REF_DISCARD;
1383
            prune = 1;
1384
          }
1385
    }
1386
  FIB_WALK_END;
1387

    
1388
  if (prune)
1389
    rt_schedule_prune(t);
1390
}
1391

    
1392

    
1393
/**
1394
 * rte_dump - dump a route
1395
 * @e: &rte to be dumped
1396
 *
1397
 * This functions dumps contents of a &rte to debug output.
1398
 */
1399
void
1400
rte_dump(rte *e)
1401
{
1402
  net *n = e->net;
1403
  debug("%-1N ", n->n.addr);
1404
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1405
  rta_dump(e->attrs);
1406
  if (e->attrs->src->proto->proto->dump_attrs)
1407
    e->attrs->src->proto->proto->dump_attrs(e);
1408
  debug("\n");
1409
}
1410

    
1411
/**
1412
 * rt_dump - dump a routing table
1413
 * @t: routing table to be dumped
1414
 *
1415
 * This function dumps contents of a given routing table to debug output.
1416
 */
1417
void
1418
rt_dump(rtable *t)
1419
{
1420
  debug("Dump of routing table <%s>\n", t->name);
1421
#ifdef DEBUGGING
1422
  fib_check(&t->fib);
1423
#endif
1424
  FIB_WALK(&t->fib, net, n)
1425
    {
1426
      rte *e;
1427
      for(e=n->routes; e; e=e->next)
1428
        rte_dump(e);
1429
    }
1430
  FIB_WALK_END;
1431

    
1432
  struct announce_hook *a;
1433
  WALK_LIST(a, t->hooks)
1434
    debug("\tAnnounces routes to protocol %s\n", a->proto->name);
1435
  debug("\n");
1436
}
1437

    
1438
/**
1439
 * rt_dump_all - dump all routing tables
1440
 *
1441
 * This function dumps contents of all routing tables to debug output.
1442
 */
1443
void
1444
rt_dump_all(void)
1445
{
1446
  rtable *t;
1447

    
1448
  WALK_LIST(t, routing_tables)
1449
    rt_dump(t);
1450
}
1451

    
1452
static inline void
1453
rt_schedule_prune(rtable *tab)
1454
{
1455
  rt_mark_for_prune(tab);
1456
  ev_schedule(tab->rt_event);
1457
}
1458

    
1459
static inline void
1460
rt_schedule_gc(rtable *tab)
1461
{
1462
  if (tab->gc_scheduled)
1463
    return;
1464

    
1465
  tab->gc_scheduled = 1;
1466
  ev_schedule(tab->rt_event);
1467
}
1468

    
1469
static inline void
1470
rt_schedule_hcu(rtable *tab)
1471
{
1472
  if (tab->hcu_scheduled)
1473
    return;
1474

    
1475
  tab->hcu_scheduled = 1;
1476
  ev_schedule(tab->rt_event);
1477
}
1478

    
1479
static inline void
1480
rt_schedule_nhu(rtable *tab)
1481
{
1482
  if (tab->nhu_state == 0)
1483
    ev_schedule(tab->rt_event);
1484

    
1485
  /* state change 0->1, 2->3 */
1486
  tab->nhu_state |= 1;
1487
}
1488

    
1489

    
1490
static void
1491
rt_prune_nets(rtable *tab)
1492
{
1493
  struct fib_iterator fit;
1494
  int ncnt = 0, ndel = 0;
1495

    
1496
#ifdef DEBUGGING
1497
  fib_check(&tab->fib);
1498
#endif
1499

    
1500
  FIB_ITERATE_INIT(&fit, &tab->fib);
1501
again:
1502
  FIB_ITERATE_START(&tab->fib, &fit, net, n)
1503
    {
1504
      ncnt++;
1505
      if (!n->routes)                /* Orphaned FIB entry */
1506
        {
1507
          FIB_ITERATE_PUT(&fit);
1508
          fib_delete(&tab->fib, n);
1509
          ndel++;
1510
          goto again;
1511
        }
1512
    }
1513
  FIB_ITERATE_END;
1514
  DBG("Pruned %d of %d networks\n", ndel, ncnt);
1515

    
1516
  tab->gc_counter = 0;
1517
  tab->gc_time = now;
1518
  tab->gc_scheduled = 0;
1519
}
1520

    
1521
static void
1522
rt_event(void *ptr)
1523
{
1524
  rtable *tab = ptr;
1525

    
1526
  if (tab->hcu_scheduled)
1527
    rt_update_hostcache(tab);
1528

    
1529
  if (tab->nhu_state)
1530
    rt_next_hop_update(tab);
1531

    
1532
  if (tab->prune_state)
1533
    if (!rt_prune_table(tab))
1534
      {
1535
        /* Table prune unfinished */
1536
        ev_schedule(tab->rt_event);
1537
        return;
1538
      }
1539

    
1540
  if (tab->gc_scheduled)
1541
    {
1542
      rt_prune_nets(tab);
1543
      rt_prune_sources(); // FIXME this should be moved to independent event
1544
    }
1545
}
1546

    
1547
void
1548
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1549
{
1550
  bzero(t, sizeof(*t));
1551
  t->name = name;
1552
  t->config = cf;
1553
  t->addr_type = cf ? cf->addr_type : NET_IP4;
1554
  fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1555
  init_list(&t->hooks);
1556
  if (cf)
1557
    {
1558
      t->rt_event = ev_new(p);
1559
      t->rt_event->hook = rt_event;
1560
      t->rt_event->data = t;
1561
      t->gc_time = now;
1562
    }
1563
}
1564

    
1565
/**
1566
 * rt_init - initialize routing tables
1567
 *
1568
 * This function is called during BIRD startup. It initializes the
1569
 * routing table module.
1570
 */
1571
void
1572
rt_init(void)
1573
{
1574
  rta_init();
1575
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1576
  rte_update_pool = lp_new(rt_table_pool, 4080);
1577
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1578
  init_list(&routing_tables);
1579
}
1580

    
1581

    
1582
static int
1583
rt_prune_step(rtable *tab, int *limit)
1584
{
1585
  struct fib_iterator *fit = &tab->prune_fit;
1586

    
1587
  DBG("Pruning route table %s\n", tab->name);
1588
#ifdef DEBUGGING
1589
  fib_check(&tab->fib);
1590
#endif
1591

    
1592
  if (tab->prune_state == RPS_NONE)
1593
    return 1;
1594

    
1595
  if (tab->prune_state == RPS_SCHEDULED)
1596
    {
1597
      FIB_ITERATE_INIT(fit, &tab->fib);
1598
      tab->prune_state = RPS_RUNNING;
1599
    }
1600

    
1601
again:
1602
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1603
    {
1604
      rte *e;
1605

    
1606
    rescan:
1607
      for (e=n->routes; e; e=e->next)
1608
        if (e->sender->proto->flushing || (e->flags & REF_DISCARD))
1609
          {
1610
            if (*limit <= 0)
1611
              {
1612
                FIB_ITERATE_PUT(fit);
1613
                return 0;
1614
              }
1615

    
1616
            rte_discard(tab, e);
1617
            (*limit)--;
1618

    
1619
            goto rescan;
1620
          }
1621
      if (!n->routes)                /* Orphaned FIB entry */
1622
        {
1623
          FIB_ITERATE_PUT(fit);
1624
          fib_delete(&tab->fib, n);
1625
          goto again;
1626
        }
1627
    }
1628
  FIB_ITERATE_END;
1629

    
1630
#ifdef DEBUGGING
1631
  fib_check(&tab->fib);
1632
#endif
1633

    
1634
  tab->prune_state = RPS_NONE;
1635
  return 1;
1636
}
1637

    
1638
/**
1639
 * rt_prune_table - prune a routing table
1640
 *
1641
 * This function scans the routing table @tab and removes routes belonging to
1642
 * flushing protocols, discarded routes and also stale network entries, in a
1643
 * similar fashion like rt_prune_loop(). Returns 1 when all such routes are
1644
 * pruned. Contrary to rt_prune_loop(), this function is not a part of the
1645
 * protocol flushing loop, but it is called from rt_event() for just one routing
1646
 * table.
1647
 *
1648
 * Note that rt_prune_table() and rt_prune_loop() share (for each table) the
1649
 * prune state (@prune_state) and also the pruning iterator (@prune_fit).
1650
 */
1651
static inline int
1652
rt_prune_table(rtable *tab)
1653
{
1654
  int limit = 512;
1655
  return rt_prune_step(tab, &limit);
1656
}
1657

    
1658
/**
1659
 * rt_prune_loop - prune routing tables
1660
 *
1661
 * The prune loop scans routing tables and removes routes belonging to flushing
1662
 * protocols, discarded routes and also stale network entries. Returns 1 when
1663
 * all such routes are pruned. It is a part of the protocol flushing loop.
1664
 */
1665
int
1666
rt_prune_loop(void)
1667
{
1668
  int limit = 512;
1669
  rtable *t;
1670

    
1671
  WALK_LIST(t, routing_tables)
1672
    if (! rt_prune_step(t, &limit))
1673
      return 0;
1674

    
1675
  return 1;
1676
}
1677

    
1678
void
1679
rt_preconfig(struct config *c)
1680
{
1681
  struct symbol *s = cf_get_symbol("master");
1682

    
1683
  init_list(&c->tables);
1684
  c->master_rtc = rt_new_table(s, NET_IP4);
1685
}
1686

    
1687

    
1688
/* 
1689
 * Some functions for handing internal next hop updates
1690
 * triggered by rt_schedule_nhu().
1691
 */
1692

    
1693
static inline int
1694
rta_next_hop_outdated(rta *a)
1695
{
1696
  struct hostentry *he = a->hostentry;
1697

    
1698
  if (!he)
1699
    return 0;
1700

    
1701
  if (!he->src)
1702
    return a->dest != RTD_UNREACHABLE;
1703

    
1704
  return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1705
    (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1706
    !mpnh_same(a->nexthops, he->src->nexthops);
1707
}
1708

    
1709
static inline void
1710
rta_apply_hostentry(rta *a, struct hostentry *he)
1711
{
1712
  a->hostentry = he;
1713
  a->iface = he->src ? he->src->iface : NULL;
1714
  a->gw = he->gw;
1715
  a->dest = he->dest;
1716
  a->igp_metric = he->igp_metric;
1717
  a->nexthops = he->src ? he->src->nexthops : NULL;
1718
}
1719

    
1720
static inline rte *
1721
rt_next_hop_update_rte(rtable *tab, rte *old)
1722
{
1723
  rta a;
1724
  memcpy(&a, old->attrs, sizeof(rta));
1725
  rta_apply_hostentry(&a, old->attrs->hostentry);
1726
  a.aflags = 0;
1727

    
1728
  rte *e = sl_alloc(rte_slab);
1729
  memcpy(e, old, sizeof(rte));
1730
  e->attrs = rta_lookup(&a);
1731

    
1732
  return e;
1733
}
1734

    
1735
static inline int
1736
rt_next_hop_update_net(rtable *tab, net *n)
1737
{
1738
  rte **k, *e, *new, *old_best, **new_best;
1739
  int count = 0;
1740
  int free_old_best = 0;
1741

    
1742
  old_best = n->routes;
1743
  if (!old_best)
1744
    return 0;
1745

    
1746
  for (k = &n->routes; e = *k; k = &e->next)
1747
    if (rta_next_hop_outdated(e->attrs))
1748
      {
1749
        new = rt_next_hop_update_rte(tab, e);
1750
        *k = new;
1751

    
1752
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1753
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1754

    
1755
        /* Call a pre-comparison hook */
1756
        /* Not really an efficient way to compute this */
1757
        if (e->attrs->src->proto->rte_recalculate)
1758
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1759

    
1760
        if (e != old_best)
1761
          rte_free_quick(e);
1762
        else /* Freeing of the old best rte is postponed */
1763
          free_old_best = 1;
1764

    
1765
        e = new;
1766
        count++;
1767
      }
1768

    
1769
  if (!count)
1770
    return 0;
1771

    
1772
  /* Find the new best route */
1773
  new_best = NULL;
1774
  for (k = &n->routes; e = *k; k = &e->next)
1775
    {
1776
      if (!new_best || rte_better(e, *new_best))
1777
        new_best = k;
1778
    }
1779

    
1780
  /* Relink the new best route to the first position */
1781
  new = *new_best;
1782
  if (new != n->routes)
1783
    {
1784
      *new_best = new->next;
1785
      new->next = n->routes;
1786
      n->routes = new;
1787
    }
1788

    
1789
  /* Announce the new best route */
1790
  if (new != old_best)
1791
    {
1792
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1793
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1794
    }
1795

    
1796
  /* FIXME: Better announcement of merged routes */
1797
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1798

    
1799
   if (free_old_best)
1800
    rte_free_quick(old_best);
1801

    
1802
  return count;
1803
}
1804

    
1805
static void
1806
rt_next_hop_update(rtable *tab)
1807
{
1808
  struct fib_iterator *fit = &tab->nhu_fit;
1809
  int max_feed = 32;
1810

    
1811
  if (tab->nhu_state == 0)
1812
    return;
1813

    
1814
  if (tab->nhu_state == 1)
1815
    {
1816
      FIB_ITERATE_INIT(fit, &tab->fib);
1817
      tab->nhu_state = 2;
1818
    }
1819

    
1820
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1821
    {
1822
      if (max_feed <= 0)
1823
        {
1824
          FIB_ITERATE_PUT(fit);
1825
          ev_schedule(tab->rt_event);
1826
          return;
1827
        }
1828
      max_feed -= rt_next_hop_update_net(tab, n);
1829
    }
1830
  FIB_ITERATE_END;
1831

    
1832
  /* state change 2->0, 3->1 */
1833
  tab->nhu_state &= 1;
1834

    
1835
  if (tab->nhu_state > 0)
1836
    ev_schedule(tab->rt_event);
1837
}
1838

    
1839

    
1840
struct rtable_config *
1841
rt_new_table(struct symbol *s, uint addr_type)
1842
{
1843
  /* Hack that allows to 'redefine' the master table */
1844
  if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc))
1845
    return s->def;
1846

    
1847
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1848

    
1849
  cf_define_symbol(s, SYM_TABLE, c);
1850
  c->name = s->name;
1851
  c->addr_type = addr_type;
1852
  add_tail(&new_config->tables, &c->n);
1853
  c->gc_max_ops = 1000;
1854
  c->gc_min_time = 5;
1855
  return c;
1856
}
1857

    
1858
/**
1859
 * rt_lock_table - lock a routing table
1860
 * @r: routing table to be locked
1861
 *
1862
 * Lock a routing table, because it's in use by a protocol,
1863
 * preventing it from being freed when it gets undefined in a new
1864
 * configuration.
1865
 */
1866
void
1867
rt_lock_table(rtable *r)
1868
{
1869
  r->use_count++;
1870
}
1871

    
1872
/**
1873
 * rt_unlock_table - unlock a routing table
1874
 * @r: routing table to be unlocked
1875
 *
1876
 * Unlock a routing table formerly locked by rt_lock_table(),
1877
 * that is decrease its use count and delete it if it's scheduled
1878
 * for deletion by configuration changes.
1879
 */
1880
void
1881
rt_unlock_table(rtable *r)
1882
{
1883
  if (!--r->use_count && r->deleted)
1884
    {
1885
      struct config *conf = r->deleted;
1886
      DBG("Deleting routing table %s\n", r->name);
1887
      r->config->table = NULL;
1888
      if (r->hostcache)
1889
        rt_free_hostcache(r);
1890
      rem_node(&r->n);
1891
      fib_free(&r->fib);
1892
      rfree(r->rt_event);
1893
      mb_free(r);
1894
      config_del_obstacle(conf);
1895
    }
1896
}
1897

    
1898
/**
1899
 * rt_commit - commit new routing table configuration
1900
 * @new: new configuration
1901
 * @old: original configuration or %NULL if it's boot time config
1902
 *
1903
 * Scan differences between @old and @new configuration and modify
1904
 * the routing tables according to these changes. If @new defines a
1905
 * previously unknown table, create it, if it omits a table existing
1906
 * in @old, schedule it for deletion (it gets deleted when all protocols
1907
 * disconnect from it by calling rt_unlock_table()), if it exists
1908
 * in both configurations, leave it unchanged.
1909
 */
1910
void
1911
rt_commit(struct config *new, struct config *old)
1912
{
1913
  struct rtable_config *o, *r;
1914

    
1915
  DBG("rt_commit:\n");
1916
  if (old)
1917
    {
1918
      WALK_LIST(o, old->tables)
1919
        {
1920
          rtable *ot = o->table;
1921
          if (!ot->deleted)
1922
            {
1923
              struct symbol *sym = cf_find_symbol(new, o->name);
1924
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
1925
                {
1926
                  DBG("\t%s: same\n", o->name);
1927
                  r = sym->def;
1928
                  r->table = ot;
1929
                  ot->name = r->name;
1930
                  ot->config = r;
1931
                  if (o->sorted != r->sorted)
1932
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1933
                }
1934
              else
1935
                {
1936
                  DBG("\t%s: deleted\n", o->name);
1937
                  ot->deleted = old;
1938
                  config_add_obstacle(old);
1939
                  rt_lock_table(ot);
1940
                  rt_unlock_table(ot);
1941
                }
1942
            }
1943
        }
1944
    }
1945

    
1946
  WALK_LIST(r, new->tables)
1947
    if (!r->table)
1948
      {
1949
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1950
        DBG("\t%s: created\n", r->name);
1951
        rt_setup(rt_table_pool, t, r->name, r);
1952
        add_tail(&routing_tables, &t->n);
1953
        r->table = t;
1954
      }
1955
  DBG("\tdone\n");
1956
}
1957

    
1958
static inline void
1959
do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
1960
{
1961
  rte_update_lock();
1962
  if (type == RA_ACCEPTED)
1963
    rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
1964
  else if (type == RA_MERGED)
1965
    rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
1966
  else
1967
    rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
1968
  rte_update_unlock();
1969
}
1970

    
1971
/**
1972
 * rt_feed_baby - advertise routes to a new protocol
1973
 * @p: protocol to be fed
1974
 *
1975
 * This function performs one pass of advertisement of routes to a newly
1976
 * initialized protocol. It's called by the protocol code as long as it
1977
 * has something to do. (We avoid transferring all the routes in single
1978
 * pass in order not to monopolize CPU time.)
1979
 */
1980
int
1981
rt_feed_baby(struct proto *p)
1982
{
1983
  struct announce_hook *h;
1984
  struct fib_iterator *fit;
1985
  int max_feed = 256;
1986

    
1987
  if (!p->feed_ahook)                        /* Need to initialize first */
1988
    {
1989
      if (!p->ahooks)
1990
        return 1;
1991
      DBG("Announcing routes to new protocol %s\n", p->name);
1992
      p->feed_ahook = p->ahooks;
1993
      fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator));
1994
      goto next_hook;
1995
    }
1996
  fit = p->feed_iterator;
1997

    
1998
again:
1999
  h = p->feed_ahook;
2000
  FIB_ITERATE_START(&h->table->fib, fit, net, n)
2001
    {
2002
      rte *e = n->routes;
2003
      if (max_feed <= 0)
2004
        {
2005
          FIB_ITERATE_PUT(fit);
2006
          return 0;
2007
        }
2008

    
2009
      /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2010

    
2011
      if ((p->accept_ra_types == RA_OPTIMAL) ||
2012
          (p->accept_ra_types == RA_ACCEPTED) ||
2013
          (p->accept_ra_types == RA_MERGED))
2014
        if (rte_is_valid(e))
2015
          {
2016
            if (p->export_state != ES_FEEDING)
2017
              return 1;  /* In the meantime, the protocol fell down. */
2018

    
2019
            do_feed_baby(p, p->accept_ra_types, h, n, e);
2020
            max_feed--;
2021
          }
2022

    
2023
      if (p->accept_ra_types == RA_ANY)
2024
        for(e = n->routes; e; e = e->next)
2025
          {
2026
            if (p->export_state != ES_FEEDING)
2027
              return 1;  /* In the meantime, the protocol fell down. */
2028

    
2029
            if (!rte_is_valid(e))
2030
              continue;
2031

    
2032
            do_feed_baby(p, RA_ANY, h, n, e);
2033
            max_feed--;
2034
          }
2035
    }
2036
  FIB_ITERATE_END;
2037
  p->feed_ahook = h->next;
2038
  if (!p->feed_ahook)
2039
    {
2040
      mb_free(p->feed_iterator);
2041
      p->feed_iterator = NULL;
2042
      return 1;
2043
    }
2044

    
2045
next_hook:
2046
  h = p->feed_ahook;
2047
  FIB_ITERATE_INIT(fit, &h->table->fib);
2048
  goto again;
2049
}
2050

    
2051
/**
2052
 * rt_feed_baby_abort - abort protocol feeding
2053
 * @p: protocol
2054
 *
2055
 * This function is called by the protocol code when the protocol
2056
 * stops or ceases to exist before the last iteration of rt_feed_baby()
2057
 * has finished.
2058
 */
2059
void
2060
rt_feed_baby_abort(struct proto *p)
2061
{
2062
  if (p->feed_ahook)
2063
    {
2064
      /* Unlink the iterator and exit */
2065
      fit_get(&p->feed_ahook->table->fib, p->feed_iterator);
2066
      p->feed_ahook = NULL;
2067
    }
2068
}
2069

    
2070

    
2071
static inline unsigned
2072
ptr_hash(void *ptr)
2073
{
2074
  uintptr_t p = (uintptr_t) ptr;
2075
  return p ^ (p << 8) ^ (p >> 16);
2076
}
2077

    
2078
static inline u32
2079
hc_hash(ip_addr a, rtable *dep)
2080
{
2081
  return ipa_hash(a) ^ ptr_hash(dep);
2082
}
2083

    
2084
static inline void
2085
hc_insert(struct hostcache *hc, struct hostentry *he)
2086
{
2087
  uint k = he->hash_key >> hc->hash_shift;
2088
  he->next = hc->hash_table[k];
2089
  hc->hash_table[k] = he;
2090
}
2091

    
2092
static inline void
2093
hc_remove(struct hostcache *hc, struct hostentry *he)
2094
{
2095
  struct hostentry **hep;
2096
  uint k = he->hash_key >> hc->hash_shift;
2097

    
2098
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2099
  *hep = he->next;
2100
}
2101

    
2102
#define HC_DEF_ORDER 10
2103
#define HC_HI_MARK *4
2104
#define HC_HI_STEP 2
2105
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2106
#define HC_LO_MARK /5
2107
#define HC_LO_STEP 2
2108
#define HC_LO_ORDER 10
2109

    
2110
static void
2111
hc_alloc_table(struct hostcache *hc, unsigned order)
2112
{
2113
  unsigned hsize = 1 << order;
2114
  hc->hash_order = order;
2115
  hc->hash_shift = 32 - order;
2116
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
2117
  hc->hash_min = (order <= HC_LO_ORDER) ?  0 : (hsize HC_LO_MARK);
2118

    
2119
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2120
}
2121

    
2122
static void
2123
hc_resize(struct hostcache *hc, unsigned new_order)
2124
{
2125
  unsigned old_size = 1 << hc->hash_order;
2126
  struct hostentry **old_table = hc->hash_table;
2127
  struct hostentry *he, *hen;
2128
  int i;
2129

    
2130
  hc_alloc_table(hc, new_order);
2131
  for (i = 0; i < old_size; i++)
2132
    for (he = old_table[i]; he != NULL; he=hen)
2133
      {
2134
        hen = he->next;
2135
        hc_insert(hc, he);
2136
      }
2137
  mb_free(old_table);
2138
}
2139

    
2140
static struct hostentry *
2141
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2142
{
2143
  struct hostentry *he = sl_alloc(hc->slab);
2144

    
2145
  he->addr = a;
2146
  he->link = ll;
2147
  he->tab = dep;
2148
  he->hash_key = k;
2149
  he->uc = 0;
2150
  he->src = NULL;
2151

    
2152
  add_tail(&hc->hostentries, &he->ln);
2153
  hc_insert(hc, he);
2154

    
2155
  hc->hash_items++;
2156
  if (hc->hash_items > hc->hash_max)
2157
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2158

    
2159
  return he;
2160
}
2161

    
2162
static void
2163
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2164
{
2165
  rta_free(he->src);
2166

    
2167
  rem_node(&he->ln);
2168
  hc_remove(hc, he);
2169
  sl_free(hc->slab, he);
2170

    
2171
  hc->hash_items--;
2172
  if (hc->hash_items < hc->hash_min)
2173
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2174
}
2175

    
2176
static void
2177
rt_init_hostcache(rtable *tab)
2178
{
2179
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2180
  init_list(&hc->hostentries);
2181

    
2182
  hc->hash_items = 0;
2183
  hc_alloc_table(hc, HC_DEF_ORDER);
2184
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2185

    
2186
  hc->lp = lp_new(rt_table_pool, 1008);
2187
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2188

    
2189
  tab->hostcache = hc;
2190
}
2191

    
2192
static void
2193
rt_free_hostcache(rtable *tab)
2194
{
2195
  struct hostcache *hc = tab->hostcache;
2196

    
2197
  node *n;
2198
  WALK_LIST(n, hc->hostentries)
2199
    {
2200
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2201
      rta_free(he->src);
2202

    
2203
      if (he->uc)
2204
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2205
    }
2206

    
2207
  rfree(hc->slab);
2208
  rfree(hc->lp);
2209
  mb_free(hc->hash_table);
2210
  mb_free(hc);
2211
}
2212

    
2213
static void
2214
rt_notify_hostcache(rtable *tab, net *net)
2215
{
2216
  if (tab->hcu_scheduled)
2217
    return;
2218

    
2219
  if (trie_match_net(tab->hostcache->trie, net->n.addr))
2220
    rt_schedule_hcu(tab);
2221
}
2222

    
2223
static int
2224
if_local_addr(ip_addr a, struct iface *i)
2225
{
2226
  struct ifa *b;
2227

    
2228
  WALK_LIST(b, i->addrs)
2229
    if (ipa_equal(a, b->ip))
2230
      return 1;
2231

    
2232
  return 0;
2233
}
2234

    
2235
static u32 
2236
rt_get_igp_metric(rte *rt)
2237
{
2238
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2239

    
2240
  if (ea)
2241
    return ea->u.data;
2242

    
2243
  rta *a = rt->attrs;
2244

    
2245
#ifdef CONFIG_OSPF
2246
  if ((a->source == RTS_OSPF) ||
2247
      (a->source == RTS_OSPF_IA) ||
2248
      (a->source == RTS_OSPF_EXT1))
2249
    return rt->u.ospf.metric1;
2250
#endif
2251

    
2252
#ifdef CONFIG_RIP
2253
  if (a->source == RTS_RIP)
2254
    return rt->u.rip.metric;
2255
#endif
2256

    
2257
  /* Device routes */
2258
  if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2259
    return 0;
2260

    
2261
  return IGP_METRIC_UNKNOWN;
2262
}
2263

    
2264
static int
2265
rt_update_hostentry(rtable *tab, struct hostentry *he)
2266
{
2267
  rta *old_src = he->src;
2268
  int pxlen = 0;
2269

    
2270
  /* Reset the hostentry */
2271
  he->src = NULL;
2272
  he->gw = IPA_NONE;
2273
  he->dest = RTD_UNREACHABLE;
2274
  he->igp_metric = 0;
2275

    
2276
  net_addr he_addr;
2277
  net_fill_ip_host(&he_addr, he->addr);
2278
  net *n = net_route(tab, &he_addr);
2279
  if (n)
2280
    {
2281
      rte *e = n->routes;
2282
      rta *a = e->attrs;
2283
      pxlen = n->n.addr->pxlen;
2284

    
2285
      if (a->hostentry)
2286
        {
2287
          /* Recursive route should not depend on another recursive route */
2288
          log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2289
              he->addr, n->n.addr);
2290
          goto done;
2291
        }
2292

    
2293
      if (a->dest == RTD_DEVICE)
2294
        {
2295
          if (if_local_addr(he->addr, a->iface))
2296
            {
2297
              /* The host address is a local address, this is not valid */
2298
              log(L_WARN "Next hop address %I is a local address of iface %s",
2299
                  he->addr, a->iface->name);
2300
              goto done;
2301
                  }
2302

    
2303
          /* The host is directly reachable, use link as a gateway */
2304
          he->gw = he->link;
2305
          he->dest = RTD_ROUTER;
2306
        }
2307
      else
2308
        {
2309
          /* The host is reachable through some route entry */
2310
          he->gw = a->gw;
2311
          he->dest = a->dest;
2312
        }
2313

    
2314
      he->src = rta_clone(a);
2315
      he->igp_metric = rt_get_igp_metric(e);
2316
    }
2317

    
2318
 done:
2319
  /* Add a prefix range to the trie */
2320
  trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2321

    
2322
  rta_free(old_src);
2323
  return old_src != he->src;
2324
}
2325

    
2326
static void
2327
rt_update_hostcache(rtable *tab)
2328
{
2329
  struct hostcache *hc = tab->hostcache;
2330
  struct hostentry *he;
2331
  node *n, *x;
2332

    
2333
  /* Reset the trie */
2334
  lp_flush(hc->lp);
2335
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2336

    
2337
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2338
    {
2339
      he = SKIP_BACK(struct hostentry, ln, n);
2340
      if (!he->uc)
2341
        {
2342
          hc_delete_hostentry(hc, he);
2343
          continue;
2344
        }
2345

    
2346
      if (rt_update_hostentry(tab, he))
2347
        rt_schedule_nhu(he->tab);
2348
    }
2349

    
2350
  tab->hcu_scheduled = 0;
2351
}
2352

    
2353
static struct hostentry *
2354
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2355
{
2356
  struct hostentry *he;
2357

    
2358
  if (!tab->hostcache)
2359
    rt_init_hostcache(tab);
2360

    
2361
  u32 k = hc_hash(a, dep);
2362
  struct hostcache *hc = tab->hostcache;
2363
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2364
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2365
      return he;
2366

    
2367
  he = hc_new_hostentry(hc, a, ll, dep, k);
2368
  rt_update_hostentry(tab, he);
2369
  return he;
2370
}
2371

    
2372
void
2373
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2374
{
2375
  rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2376
}
2377

    
2378

    
2379
/*
2380
 *  CLI commands
2381
 */
2382

    
2383
static void
2384
rt_format_via(rte *e, byte *via)
2385
{
2386
  rta *a = e->attrs;
2387

    
2388
  switch (a->dest)
2389
    {
2390
    case RTD_ROUTER:        bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2391
    case RTD_DEVICE:        bsprintf(via, "dev %s", a->iface->name); break;
2392
    case RTD_BLACKHOLE:        bsprintf(via, "blackhole"); break;
2393
    case RTD_UNREACHABLE:        bsprintf(via, "unreachable"); break;
2394
    case RTD_PROHIBIT:        bsprintf(via, "prohibited"); break;
2395
    case RTD_MULTIPATH:        bsprintf(via, "multipath"); break;
2396
    default:                bsprintf(via, "???");
2397
    }
2398
}
2399

    
2400
static void
2401
rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2402
{
2403
  byte via[IPA_MAX_TEXT_LENGTH+32];
2404
  byte from[IPA_MAX_TEXT_LENGTH+8];
2405
  byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2406
  rta *a = e->attrs;
2407
  int primary = (e->net->routes == e);
2408
  int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2409
  void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2410
  struct mpnh *nh;
2411

    
2412
  rt_format_via(e, via);
2413
  tm_format_datetime(tm, &config->tf_route, e->lastmod);
2414
  if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2415
    bsprintf(from, " from %I", a->from);
2416
  else
2417
    from[0] = 0;
2418

    
2419
  get_route_info = a->src->proto->proto->get_route_info;
2420
  if (get_route_info || d->verbose)
2421
    {
2422
      /* Need to normalize the extended attributes */
2423
      ea_list *t = tmpa;
2424
      t = ea_append(t, a->eattrs);
2425
      tmpa = alloca(ea_scan(t));
2426
      ea_merge(t, tmpa);
2427
      ea_sort(tmpa);
2428
    }
2429
  if (get_route_info)
2430
    get_route_info(e, info, tmpa);
2431
  else
2432
    bsprintf(info, " (%d)", e->pref);
2433
  cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name,
2434
             tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2435
  for (nh = a->nexthops; nh; nh = nh->next)
2436
    cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2437
  if (d->verbose)
2438
    rta_show(c, a, tmpa);
2439
}
2440

    
2441
static void
2442
rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2443
{
2444
  rte *e, *ee;
2445
  byte ia[NET_MAX_TEXT_LENGTH+1];
2446
  struct ea_list *tmpa;
2447
  struct announce_hook *a = NULL;
2448
  int first = 1;
2449
  int pass = 0;
2450

    
2451
  bsprintf(ia, "%N", n->n.addr);
2452

    
2453
  if (d->export_mode)
2454
    {
2455
      if (! d->export_protocol->rt_notify)
2456
        return;
2457

    
2458
      a = proto_find_announce_hook(d->export_protocol, d->table);
2459
      if (!a)
2460
        return;
2461
    }
2462

    
2463
  for (e = n->routes; e; e = e->next)
2464
    {
2465
      if (rte_is_filtered(e) != d->filtered)
2466
        continue;
2467

    
2468
      d->rt_counter++;
2469
      d->net_counter += first;
2470
      first = 0;
2471

    
2472
      if (pass)
2473
        continue;
2474

    
2475
      ee = e;
2476
      rte_update_lock();                /* We use the update buffer for filtering */
2477
      tmpa = make_tmp_attrs(e, rte_update_pool);
2478

    
2479
      /* Special case for merged export */
2480
      if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
2481
        {
2482
          rte *rt_free;
2483
          e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
2484
          pass = 1;
2485

    
2486
          if (!e)
2487
          { e = ee; goto skip; }
2488
        }
2489
      else if (d->export_mode)
2490
        {
2491
          struct proto *ep = d->export_protocol;
2492
          int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2493

    
2494
          if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
2495
            pass = 1;
2496

    
2497
          if (ic < 0)
2498
            goto skip;
2499

    
2500
          if (d->export_mode > RSEM_PREEXPORT)
2501
            {
2502
              /*
2503
               * FIXME - This shows what should be exported according to current
2504
               * filters, but not what was really exported. 'configure soft'
2505
               * command may change the export filter and do not update routes.
2506
               */
2507
              int do_export = (ic > 0) ||
2508
                (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2509

    
2510
              if (do_export != (d->export_mode == RSEM_EXPORT))
2511
                goto skip;
2512

    
2513
              if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED))
2514
                pass = 1;
2515
            }
2516
        }
2517

    
2518
      if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2519
        goto skip;
2520

    
2521
      if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2522
        goto skip;
2523

    
2524
      d->show_counter++;
2525
      if (d->stats < 2)
2526
        rt_show_rte(c, ia, e, d, tmpa);
2527
      ia[0] = 0;
2528

    
2529
    skip:
2530
      if (e != ee)
2531
      {
2532
        rte_free(e);
2533
        e = ee;
2534
      }
2535
      rte_update_unlock();
2536

    
2537
      if (d->primary_only)
2538
        break;
2539
    }
2540
}
2541

    
2542
static void
2543
rt_show_cont(struct cli *c)
2544
{
2545
  struct rt_show_data *d = c->rover;
2546
#ifdef DEBUGGING
2547
  unsigned max = 4;
2548
#else
2549
  unsigned max = 64;
2550
#endif
2551
  struct fib *fib = &d->table->fib;
2552
  struct fib_iterator *it = &d->fit;
2553

    
2554
  FIB_ITERATE_START(fib, it, net, n)
2555
    {
2556
      if (d->running_on_config && d->running_on_config != config)
2557
        {
2558
          cli_printf(c, 8004, "Stopped due to reconfiguration");
2559
          goto done;
2560
        }
2561
      if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
2562
        {
2563
          cli_printf(c, 8005, "Protocol is down");
2564
          goto done;
2565
        }
2566
      if (!max--)
2567
        {
2568
          FIB_ITERATE_PUT(it);
2569
          return;
2570
        }
2571
      rt_show_net(c, n, d);
2572
    }
2573
  FIB_ITERATE_END;
2574
  if (d->stats)
2575
    cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2576
  else
2577
    cli_printf(c, 0, "");
2578
done:
2579
  c->cont = c->cleanup = NULL;
2580
}
2581

    
2582
static void
2583
rt_show_cleanup(struct cli *c)
2584
{
2585
  struct rt_show_data *d = c->rover;
2586

    
2587
  /* Unlink the iterator */
2588
  fit_get(&d->table->fib, &d->fit);
2589
}
2590

    
2591
void
2592
rt_show(struct rt_show_data *d)
2593
{
2594
  net *n;
2595

    
2596
  /* Default is either a master table or a table related to a respective protocol */
2597
  if (!d->table && d->export_protocol) d->table = d->export_protocol->table;
2598
  if (!d->table && d->show_protocol) d->table = d->show_protocol->table;
2599
  if (!d->table) d->table = config->master_rtc->table;
2600

    
2601
  /* Filtered routes are neither exported nor have sensible ordering */
2602
  if (d->filtered && (d->export_mode || d->primary_only))
2603
    cli_msg(0, "");
2604

    
2605
  if (!d->addr)
2606
    {
2607
      FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2608
      this_cli->cont = rt_show_cont;
2609
      this_cli->cleanup = rt_show_cleanup;
2610
      this_cli->rover = d;
2611
    }
2612
  else
2613
    {
2614
      if (d->show_for)
2615
        n = net_route(d->table, d->addr);
2616
      else
2617
        n = net_find(d->table, d->addr);
2618

    
2619
      if (n)
2620
        rt_show_net(this_cli, n, d);
2621

    
2622
      if (d->rt_counter)
2623
        cli_msg(0, "");
2624
      else
2625
        cli_msg(8001, "Network not in table");
2626
    }
2627
}
2628

    
2629
/*
2630
 *  Documentation for functions declared inline in route.h
2631
 */
2632
#if 0
2633

2634
/**
2635
 * net_find - find a network entry
2636
 * @tab: a routing table
2637
 * @addr: address of the network
2638
 *
2639
 * net_find() looks up the given network in routing table @tab and
2640
 * returns a pointer to its &net entry or %NULL if no such network
2641
 * exists.
2642
 */
2643
static inline net *net_find(rtable *tab, net_addr *addr)
2644
{ DUMMY; }
2645

2646
/**
2647
 * net_get - obtain a network entry
2648
 * @tab: a routing table
2649
 * @addr: address of the network
2650
 *
2651
 * net_get() looks up the given network in routing table @tab and
2652
 * returns a pointer to its &net entry. If no such entry exists, it's
2653
 * created.
2654
 */
2655
static inline net *net_get(rtable *tab, net_addr *addr)
2656
{ DUMMY; }
2657

2658
/**
2659
 * rte_cow - copy a route for writing
2660
 * @r: a route entry to be copied
2661
 *
2662
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2663
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2664
 * just returned unchanged, else a new temporary entry with the same contents
2665
 * is created.
2666
 *
2667
 * The primary use of this function is inside the filter machinery -- when
2668
 * a filter wants to modify &rte contents (to change the preference or to
2669
 * attach another set of attributes), it must ensure that the &rte is not
2670
 * shared with anyone else (and especially that it isn't stored in any routing
2671
 * table).
2672
 *
2673
 * Result: a pointer to the new writable &rte.
2674
 */
2675
static inline rte * rte_cow(rte *r)
2676
{ DUMMY; }
2677

2678
#endif