Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ 8e433d6a

History | View | Annotate | Download (65 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/cli.h"
37
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
45

    
46
pool *rt_table_pool;
47

    
48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

    
51
static list routing_tables;
52

    
53
static void rt_format_via(rte *e, byte *via);
54
static void rt_free_hostcache(rtable *tab);
55
static void rt_notify_hostcache(rtable *tab, net *net);
56
static void rt_update_hostcache(rtable *tab);
57
static void rt_next_hop_update(rtable *tab);
58
static inline int rt_prune_table(rtable *tab);
59
static inline void rt_schedule_gc(rtable *tab);
60
static inline void rt_schedule_prune(rtable *tab);
61

    
62

    
63
static inline struct ea_list *
64
make_tmp_attrs(struct rte *rt, struct linpool *pool)
65
{
66
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
67
  mta = rt->attrs->src->proto->make_tmp_attrs;
68
  return mta ? mta(rt, rte_update_pool) : NULL;
69
}
70

    
71
/* Like fib_route(), but skips empty net entries */
72
static net *
73
net_route(rtable *tab, ip_addr a, int len)
74
{
75
  ip_addr a0;
76
  net *n;
77

    
78
  while (len >= 0)
79
    {
80
      a0 = ipa_and(a, ipa_mkmask(len));
81
      n = fib_find(&tab->fib, &a0, len);
82
      if (n && rte_is_valid(n->routes))
83
        return n;
84
      len--;
85
    }
86
  return NULL;
87
}
88

    
89
static void
90
rte_init(struct fib_node *N)
91
{
92
  net *n = (net *) N;
93

    
94
  N->flags = 0;
95
  n->routes = NULL;
96
}
97

    
98
/**
99
 * rte_find - find a route
100
 * @net: network node
101
 * @src: route source
102
 *
103
 * The rte_find() function returns a route for destination @net
104
 * which is from route source @src.
105
 */
106
rte *
107
rte_find(net *net, struct rte_src *src)
108
{
109
  rte *e = net->routes;
110

    
111
  while (e && e->attrs->src != src)
112
    e = e->next;
113
  return e;
114
}
115

    
116
/**
117
 * rte_get_temp - get a temporary &rte
118
 * @a: attributes to assign to the new route (a &rta; in case it's
119
 * un-cached, rte_update() will create a cached copy automatically)
120
 *
121
 * Create a temporary &rte and bind it with the attributes @a.
122
 * Also set route preference to the default preference set for
123
 * the protocol.
124
 */
125
rte *
126
rte_get_temp(rta *a)
127
{
128
  rte *e = sl_alloc(rte_slab);
129

    
130
  e->attrs = a;
131
  e->flags = 0;
132
  e->pref = a->src->proto->preference;
133
  return e;
134
}
135

    
136
rte *
137
rte_do_cow(rte *r)
138
{
139
  rte *e = sl_alloc(rte_slab);
140

    
141
  memcpy(e, r, sizeof(rte));
142
  e->attrs = rta_clone(r->attrs);
143
  e->flags = 0;
144
  return e;
145
}
146

    
147
/**
148
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
149
 * @r: a route entry to be copied
150
 * @lp: a linpool from which to allocate &rta
151
 *
152
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
153
 * modification. There are three possibilities: First, both &rte and &rta are
154
 * private copies, in that case they are returned unchanged.  Second, &rte is
155
 * private copy, but &rta is cached, in that case &rta is duplicated using
156
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
157
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
158
 *
159
 * Note that in the second case, cached &rta loses one reference, while private
160
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
161
 * nexthops, ...) with it. To work properly, original shared &rta should have
162
 * another reference during the life of created private copy.
163
 *
164
 * Result: a pointer to the new writable &rte with writable &rta.
165
 */
166
rte *
167
rte_cow_rta(rte *r, linpool *lp)
168
{
169
  if (!rta_is_cached(r->attrs))
170
    return r;
171

    
172
  rte *e = rte_cow(r);
173
  rta *a = rta_do_cow(r->attrs, lp);
174
  rta_free(e->attrs);
175
  e->attrs = a;
176
  return e;
177
}
178

    
179
static int                                /* Actually better or at least as good as */
180
rte_better(rte *new, rte *old)
181
{
182
  int (*better)(rte *, rte *);
183

    
184
  if (!rte_is_valid(old))
185
    return 1;
186
  if (!rte_is_valid(new))
187
    return 0;
188

    
189
  if (new->pref > old->pref)
190
    return 1;
191
  if (new->pref < old->pref)
192
    return 0;
193
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
194
    {
195
      /*
196
       *  If the user has configured protocol preferences, so that two different protocols
197
       *  have the same preference, try to break the tie by comparing addresses. Not too
198
       *  useful, but keeps the ordering of routes unambiguous.
199
       */
200
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
201
    }
202
  if (better = new->attrs->src->proto->rte_better)
203
    return better(new, old);
204
  return 0;
205
}
206

    
207
static int
208
rte_mergable(rte *pri, rte *sec)
209
{
210
  int (*mergable)(rte *, rte *);
211

    
212
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
213
    return 0;
214

    
215
  if (pri->pref != sec->pref)
216
    return 0;
217

    
218
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
219
    return 0;
220

    
221
  if (mergable = pri->attrs->src->proto->rte_mergable)
222
    return mergable(pri, sec);
223

    
224
  return 0;
225
}
226

    
227
static void
228
rte_trace(struct proto *p, rte *e, int dir, char *msg)
229
{
230
  byte via[STD_ADDRESS_P_LENGTH+32];
231

    
232
  rt_format_via(e, via);
233
  log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, via);
234
}
235

    
236
static inline void
237
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
238
{
239
  if (p->debug & flag)
240
    rte_trace(p, e, '>', msg);
241
}
242

    
243
static inline void
244
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
245
{
246
  if (p->debug & flag)
247
    rte_trace(p, e, '<', msg);
248
}
249

    
250
static rte *
251
export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
252
{
253
  struct proto *p = ah->proto;
254
  struct filter *filter = ah->out_filter;
255
  struct proto_stats *stats = ah->stats;
256
  ea_list *tmpb = NULL;
257
  rte *rt;
258
  int v;
259

    
260
  rt = rt0;
261
  *rt_free = NULL;
262

    
263
  if (!tmpa)
264
    tmpa = &tmpb;
265

    
266
  *tmpa = make_tmp_attrs(rt, rte_update_pool);
267

    
268
  v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
269
  if (v < 0)
270
    {
271
      if (silent)
272
        goto reject;
273

    
274
      stats->exp_updates_rejected++;
275
      if (v == RIC_REJECT)
276
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
277
      goto reject;
278
    }
279
  if (v > 0)
280
    {
281
      if (!silent)
282
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
283
      goto accept;
284
    }
285

    
286
  v = filter && ((filter == FILTER_REJECT) ||
287
                 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
288
  if (v)
289
    {
290
      if (silent)
291
        goto reject;
292

    
293
      stats->exp_updates_filtered++;
294
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
295
      goto reject;
296
    }
297

    
298
 accept:
299
  if (rt != rt0)
300
    *rt_free = rt;
301
  return rt;
302

    
303
 reject:
304
  /* Discard temporary rte */
305
  if (rt != rt0)
306
    rte_free(rt);
307
  return NULL;
308
}
309

    
310
static void
311
do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
312
{
313
  struct proto *p = ah->proto;
314
  struct proto_stats *stats = ah->stats;
315

    
316

    
317
  /*
318
   * First, apply export limit.
319
   *
320
   * Export route limits has several problems. Because exp_routes
321
   * counter is reset before refeed, we don't really know whether
322
   * limit is breached and whether the update is new or not. Therefore
323
   * the number of really exported routes may exceed the limit
324
   * temporarily (routes exported before and new routes in refeed).
325
   *
326
   * Minor advantage is that if the limit is decreased and refeed is
327
   * requested, the number of exported routes really decrease.
328
   *
329
   * Second problem is that with export limits, we don't know whether
330
   * old was really exported (it might be blocked by limit). When a
331
   * withdraw is exported, we announce it even when the previous
332
   * update was blocked. This is not a big issue, but the same problem
333
   * is in updating exp_routes counter. Therefore, to be consistent in
334
   * increases and decreases of exp_routes, we count exported routes
335
   * regardless of blocking by limits.
336
   *
337
   * Similar problem is in handling updates - when a new route is
338
   * received and blocking is active, the route would be blocked, but
339
   * when an update for the route will be received later, the update
340
   * would be propagated (as old != NULL). Therefore, we have to block
341
   * also non-new updates (contrary to import blocking).
342
   */
343

    
344
  struct proto_limit *l = ah->out_limit;
345
  if (l && new)
346
    {
347
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
348
        proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
349

    
350
      if (l->state == PLS_BLOCKED)
351
        {
352
          stats->exp_routes++;        /* see note above */
353
          stats->exp_updates_rejected++;
354
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
355
          new = NULL;
356

    
357
          if (!old)
358
            return;
359
        }
360
    }
361

    
362

    
363
  if (new)
364
    stats->exp_updates_accepted++;
365
  else
366
    stats->exp_withdraws_accepted++;
367

    
368
  /* Hack: We do not decrease exp_routes during refeed, we instead
369
     reset exp_routes at the start of refeed. */
370
  if (new)
371
    stats->exp_routes++;
372
  if (old && !refeed)
373
    stats->exp_routes--;
374

    
375
  if (p->debug & D_ROUTES)
376
    {
377
      if (new && old)
378
        rte_trace_out(D_ROUTES, p, new, "replaced");
379
      else if (new)
380
        rte_trace_out(D_ROUTES, p, new, "added");
381
      else if (old)
382
        rte_trace_out(D_ROUTES, p, old, "removed");
383
    }
384
  if (!new)
385
    p->rt_notify(p, ah->table, net, NULL, old, NULL);
386
  else if (tmpa)
387
    {
388
      ea_list *t = tmpa;
389
      while (t->next)
390
        t = t->next;
391
      t->next = new->attrs->eattrs;
392
      p->rt_notify(p, ah->table, net, new, old, tmpa);
393
      t->next = NULL;
394
    }
395
  else
396
    p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
397
}
398

    
399
static void
400
rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed)
401
{
402
  struct proto *p = ah->proto;
403
  struct proto_stats *stats = ah->stats;
404

    
405
  rte *new = new0;
406
  rte *old = old0;
407
  rte *new_free = NULL;
408
  rte *old_free = NULL;
409
  ea_list *tmpa = NULL;
410

    
411
  if (new)
412
    stats->exp_updates_received++;
413
  else
414
    stats->exp_withdraws_received++;
415

    
416
  /*
417
   * This is a tricky part - we don't know whether route 'old' was
418
   * exported to protocol 'p' or was filtered by the export filter.
419
   * We try to run the export filter to know this to have a correct
420
   * value in 'old' argument of rte_update (and proper filter value)
421
   *
422
   * FIXME - this is broken because 'configure soft' may change
423
   * filters but keep routes. Refeed is expected to be called after
424
   * change of the filters and with old == new, therefore we do not
425
   * even try to run the filter on an old route, This may lead to
426
   * 'spurious withdraws' but ensure that there are no 'missing
427
   * withdraws'.
428
   *
429
   * This is not completely safe as there is a window between
430
   * reconfiguration and the end of refeed - if a newly filtered
431
   * route disappears during this period, proper withdraw is not
432
   * sent (because old would be also filtered) and the route is
433
   * not refeeded (because it disappeared before that).
434
   */
435

    
436
  if (new)
437
    new = export_filter(ah, new, &new_free, &tmpa, 0);
438

    
439
  if (old && !refeed)
440
    old = export_filter(ah, old, &old_free, NULL, 1);
441

    
442
  if (!new && !old)
443
  {
444
    /*
445
     * As mentioned above, 'old' value may be incorrect in some race conditions.
446
     * We generally ignore it with the exception of withdraw to pipe protocol.
447
     * In that case we rather propagate unfiltered withdraws regardless of
448
     * export filters to ensure that when a protocol is flushed, its routes are
449
     * removed from all tables. Possible spurious unfiltered withdraws are not
450
     * problem here as they are ignored if there is no corresponding route at
451
     * the other end of the pipe. We directly call rt_notify() hook instead of
452
     * do_rt_notify() to avoid logging and stat counters.
453
     */
454

    
455
#ifdef CONFIG_PIPE
456
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
457
      p->rt_notify(p, ah->table, net, NULL, old0, NULL);
458
#endif
459

    
460
    return;
461
  }
462

    
463
  do_rt_notify(ah, net, new, old, tmpa, refeed);
464

    
465
  /* Discard temporary rte's */
466
  if (new_free)
467
    rte_free(new_free);
468
  if (old_free)
469
    rte_free(old_free);
470
}
471

    
472
static void
473
rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
474
{
475
  // struct proto *p = ah->proto;
476
  struct proto_stats *stats = ah->stats;
477

    
478
  rte *r;
479
  rte *new_best = NULL;
480
  rte *old_best = NULL;
481
  rte *new_free = NULL;
482
  rte *old_free = NULL;
483
  ea_list *tmpa = NULL;
484

    
485
  /* Used to track whether we met old_changed position. If before_old is NULL
486
     old_changed was the first and we met it implicitly before current best route. */
487
  int old_meet = old_changed && !before_old;
488

    
489
  /* Note that before_old is either NULL or valid (not rejected) route.
490
     If old_changed is valid, before_old have to be too. If old changed route
491
     was not valid, caller must use NULL for both old_changed and before_old. */
492

    
493
  if (new_changed)
494
    stats->exp_updates_received++;
495
  else
496
    stats->exp_withdraws_received++;
497

    
498
  /* First, find the new_best route - first accepted by filters */
499
  for (r=net->routes; rte_is_valid(r); r=r->next)
500
    {
501
      if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
502
        break;
503

    
504
      /* Note if we walked around the position of old_changed route */
505
      if (r == before_old)
506
        old_meet = 1;
507
    }
508

    
509
  /* 
510
   * Second, handle the feed case. That means we do not care for
511
   * old_best. It is NULL for feed, and the new_best for refeed. 
512
   * For refeed, there is a hack similar to one in rt_notify_basic()
513
   * to ensure withdraws in case of changed filters
514
   */
515
  if (feed)
516
    {
517
      if (feed == 2)        /* refeed */
518
        old_best = new_best ? new_best :
519
          (rte_is_valid(net->routes) ? net->routes : NULL);
520
      else
521
        old_best = NULL;
522

    
523
      if (!new_best && !old_best)
524
        return;
525

    
526
      goto found;
527
    }
528

    
529
  /*
530
   * Now, we find the old_best route. Generally, it is the same as the
531
   * new_best, unless new_best is the same as new_changed or
532
   * old_changed is accepted before new_best.
533
   *
534
   * There are four cases:
535
   *
536
   * - We would find and accept old_changed before new_best, therefore
537
   *   old_changed is old_best. In remaining cases we suppose this
538
   *   is not true.
539
   *
540
   * - We found no new_best, therefore there is also no old_best and
541
   *   we ignore this withdraw.
542
   *
543
   * - We found new_best different than new_changed, therefore
544
   *   old_best is the same as new_best and we ignore this update.
545
   *
546
   * - We found new_best the same as new_changed, therefore it cannot
547
   *   be old_best and we have to continue search for old_best.
548
   */
549

    
550
  /* First case */
551
  if (old_meet)
552
    if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
553
      goto found;
554

    
555
  /* Second case */
556
  if (!new_best)
557
    return;
558

    
559
  /* Third case, we use r instead of new_best, because export_filter() could change it */
560
  if (r != new_changed)
561
    {
562
      if (new_free)
563
        rte_free(new_free);
564
      return;
565
    }
566

    
567
  /* Fourth case */
568
  for (r=r->next; rte_is_valid(r); r=r->next)
569
    {
570
      if (old_best = export_filter(ah, r, &old_free, NULL, 1))
571
        goto found;
572

    
573
      if (r == before_old)
574
        if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
575
          goto found;
576
    }
577

    
578
  /* Implicitly, old_best is NULL and new_best is non-NULL */
579

    
580
 found:
581
  do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));
582

    
583
  /* Discard temporary rte's */
584
  if (new_free)
585
    rte_free(new_free);
586
  if (old_free)
587
    rte_free(old_free);
588
}
589

    
590

    
591
static struct mpnh *
592
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
593
{
594
  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
595
  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
596
  return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
597
}
598

    
599
rte *
600
rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
601
{
602
  // struct proto *p = ah->proto;
603
  struct mpnh *nhs = NULL;
604
  rte *best0, *best, *rt0, *rt, *tmp;
605

    
606
  best0 = net->routes;
607
  *rt_free = NULL;
608

    
609
  if (!rte_is_valid(best0))
610
    return NULL;
611

    
612
  best = export_filter(ah, best0, rt_free, tmpa, silent);
613

    
614
  if (!best || !rte_is_reachable(best))
615
    return best;
616

    
617
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
618
  {
619
    if (!rte_mergable(best0, rt0))
620
      continue;
621

    
622
    rt = export_filter(ah, rt0, &tmp, NULL, 1);
623

    
624
    if (!rt)
625
      continue;
626

    
627
    if (rte_is_reachable(rt))
628
      nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
629

    
630
    if (tmp)
631
      rte_free(tmp);
632
  }
633

    
634
  if (nhs)
635
  {
636
    nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
637

    
638
    if (nhs->next)
639
    {
640
      best = rte_cow_rta(best, rte_update_pool);
641
      best->attrs->dest = RTD_MULTIPATH;
642
      best->attrs->nexthops = nhs;
643
    }
644
  }
645

    
646
  if (best != best0)
647
    *rt_free = best;
648

    
649
  return best;
650
}
651

    
652

    
653
static void
654
rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
655
                 rte *new_best, rte*old_best, int refeed)
656
{
657
  // struct proto *p = ah->proto;
658

    
659
  rte *new_best_free = NULL;
660
  rte *old_best_free = NULL;
661
  rte *new_changed_free = NULL;
662
  rte *old_changed_free = NULL;
663
  ea_list *tmpa = NULL;
664

    
665
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
666

    
667
  /* This check should be done by the caller */
668
  if (!new_best && !old_best)
669
    return;
670

    
671
  /* Check whether the change is relevant to the merged route */
672
  if ((new_best == old_best) && !refeed)
673
  {
674
    new_changed = rte_mergable(new_best, new_changed) ?
675
      export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
676

    
677
    old_changed = rte_mergable(old_best, old_changed) ?
678
      export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
679

    
680
    if (!new_changed && !old_changed)
681
      return;
682
  }
683

    
684
  if (new_best)
685
    ah->stats->exp_updates_received++;
686
  else
687
    ah->stats->exp_withdraws_received++;
688

    
689
  /* Prepare new merged route */
690
  if (new_best)
691
    new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
692

    
693
  /* Prepare old merged route (without proper merged next hops) */
694
  /* There are some issues with running filter on old route - see rt_notify_basic() */
695
  if (old_best && !refeed)
696
    old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
697

    
698
  if (new_best || old_best)
699
    do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
700

    
701
  /* Discard temporary rte's */
702
  if (new_best_free)
703
    rte_free(new_best_free);
704
  if (old_best_free)
705
    rte_free(old_best_free);
706
  if (new_changed_free)
707
    rte_free(new_changed_free);
708
  if (old_changed_free)
709
    rte_free(old_changed_free);
710
}
711

    
712

    
713
/**
714
 * rte_announce - announce a routing table change
715
 * @tab: table the route has been added to
716
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
717
 * @net: network in question
718
 * @new: the new route to be announced
719
 * @old: the previous route for the same network
720
 * @new_best: the new best route for the same network
721
 * @old_best: the previous best route for the same network
722
 * @before_old: The previous route before @old for the same network.
723
 *                 If @before_old is NULL @old was the first.
724
 *
725
 * This function gets a routing table update and announces it
726
 * to all protocols that acccepts given type of route announcement
727
 * and are connected to the same table by their announcement hooks.
728
 *
729
 * Route announcement of type %RA_OPTIMAL si generated when optimal
730
 * route (in routing table @tab) changes. In that case @old stores the
731
 * old optimal route.
732
 *
733
 * Route announcement of type %RA_ANY si generated when any route (in
734
 * routing table @tab) changes In that case @old stores the old route
735
 * from the same protocol.
736
 *
737
 * For each appropriate protocol, we first call its import_control()
738
 * hook which performs basic checks on the route (each protocol has a
739
 * right to veto or force accept of the route before any filter is
740
 * asked) and adds default values of attributes specific to the new
741
 * protocol (metrics, tags etc.).  Then it consults the protocol's
742
 * export filter and if it accepts the route, the rt_notify() hook of
743
 * the protocol gets called.
744
 */
745
static void
746
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
747
             rte *new_best, rte *old_best, rte *before_old)
748
{
749
  if (!rte_is_valid(new))
750
    new = NULL;
751

    
752
  if (!rte_is_valid(old))
753
    old = before_old = NULL;
754

    
755
  if (!rte_is_valid(new_best))
756
    new_best = NULL;
757

    
758
  if (!rte_is_valid(old_best))
759
    old_best = NULL;
760

    
761
  if (!old && !new)
762
    return;
763

    
764
  if (type == RA_OPTIMAL)
765
    {
766
      if (new)
767
        new->attrs->src->proto->stats.pref_routes++;
768
      if (old)
769
        old->attrs->src->proto->stats.pref_routes--;
770

    
771
      if (tab->hostcache)
772
        rt_notify_hostcache(tab, net);
773
    }
774

    
775
  struct announce_hook *a;
776
  WALK_LIST(a, tab->hooks)
777
    {
778
      ASSERT(a->proto->export_state != ES_DOWN);
779
      if (a->proto->accept_ra_types == type)
780
        if (type == RA_ACCEPTED)
781
          rt_notify_accepted(a, net, new, old, before_old, 0);
782
        else if (type == RA_MERGED)
783
          rt_notify_merged(a, net, new, old, new_best, old_best, 0);
784
        else
785
          rt_notify_basic(a, net, new, old, 0);
786
    }
787
}
788

    
789
static inline int
790
rte_validate(rte *e)
791
{
792
  int c;
793
  net *n = e->net;
794

    
795
  if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
796
    {
797
      log(L_WARN "Ignoring bogus prefix %I/%d received via %s",
798
          n->n.prefix, n->n.pxlen, e->sender->proto->name);
799
      return 0;
800
    }
801

    
802
  c = ipa_classify_net(n->n.prefix);
803
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
804
    {
805
      log(L_WARN "Ignoring bogus route %I/%d received via %s",
806
          n->n.prefix, n->n.pxlen, e->sender->proto->name);
807
      return 0;
808
    }
809

    
810
  return 1;
811
}
812

    
813
/**
814
 * rte_free - delete a &rte
815
 * @e: &rte to be deleted
816
 *
817
 * rte_free() deletes the given &rte from the routing table it's linked to.
818
 */
819
void
820
rte_free(rte *e)
821
{
822
  if (rta_is_cached(e->attrs))
823
    rta_free(e->attrs);
824
  sl_free(rte_slab, e);
825
}
826

    
827
static inline void
828
rte_free_quick(rte *e)
829
{
830
  rta_free(e->attrs);
831
  sl_free(rte_slab, e);
832
}
833

    
834
static int
835
rte_same(rte *x, rte *y)
836
{
837
  return
838
    x->attrs == y->attrs &&
839
    x->flags == y->flags &&
840
    x->pflags == y->pflags &&
841
    x->pref == y->pref &&
842
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
843
}
844

    
845
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
846

    
847
static void
848
rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
849
{
850
  struct proto *p = ah->proto;
851
  struct rtable *table = ah->table;
852
  struct proto_stats *stats = ah->stats;
853
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
854
  rte *before_old = NULL;
855
  rte *old_best = net->routes;
856
  rte *old = NULL;
857
  rte **k;
858

    
859
  k = &net->routes;                        /* Find and remove original route from the same protocol */
860
  while (old = *k)
861
    {
862
      if (old->attrs->src == src)
863
        {
864
          /* If there is the same route in the routing table but from
865
           * a different sender, then there are two paths from the
866
           * source protocol to this routing table through transparent
867
           * pipes, which is not allowed.
868
           *
869
           * We log that and ignore the route. If it is withdraw, we
870
           * ignore it completely (there might be 'spurious withdraws',
871
           * see FIXME in do_rte_announce())
872
           */
873
          if (old->sender->proto != p)
874
            {
875
              if (new)
876
                {
877
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %I/%d to table %s",
878
                      net->n.prefix, net->n.pxlen, table->name);
879
                  rte_free_quick(new);
880
                }
881
              return;
882
            }
883

    
884
          if (new && rte_same(old, new))
885
            {
886
              /* No changes, ignore the new route */
887

    
888
              if (!rte_is_filtered(new))
889
                {
890
                  stats->imp_updates_ignored++;
891
                  rte_trace_in(D_ROUTES, p, new, "ignored");
892
                }
893

    
894
              rte_free_quick(new);
895
              return;
896
            }
897
          *k = old->next;
898
          break;
899
        }
900
      k = &old->next;
901
      before_old = old;
902
    }
903

    
904
  if (!old)
905
    before_old = NULL;
906

    
907
  if (!old && !new)
908
    {
909
      stats->imp_withdraws_ignored++;
910
      return;
911
    }
912

    
913
  int new_ok = rte_is_ok(new);
914
  int old_ok = rte_is_ok(old);
915

    
916
  struct proto_limit *l = ah->rx_limit;
917
  if (l && !old && new)
918
    {
919
      u32 all_routes = stats->imp_routes + stats->filt_routes;
920

    
921
      if (all_routes >= l->limit)
922
        proto_notify_limit(ah, l, PLD_RX, all_routes);
923

    
924
      if (l->state == PLS_BLOCKED)
925
        {
926
          /* In receive limit the situation is simple, old is NULL so
927
             we just free new and exit like nothing happened */
928

    
929
          stats->imp_updates_ignored++;
930
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
931
          rte_free_quick(new);
932
          return;
933
        }
934
    }
935

    
936
  l = ah->in_limit;
937
  if (l && !old_ok && new_ok)
938
    {
939
      if (stats->imp_routes >= l->limit)
940
        proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
941

    
942
      if (l->state == PLS_BLOCKED)
943
        {
944
          /* In import limit the situation is more complicated. We
945
             shouldn't just drop the route, we should handle it like
946
             it was filtered. We also have to continue the route
947
             processing if old or new is non-NULL, but we should exit
948
             if both are NULL as this case is probably assumed to be
949
             already handled. */
950

    
951
          stats->imp_updates_ignored++;
952
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
953

    
954
          if (ah->in_keep_filtered)
955
            new->flags |= REF_FILTERED;
956
          else
957
            { rte_free_quick(new); new = NULL; }
958

    
959
          /* Note that old && !new could be possible when
960
             ah->in_keep_filtered changed in the recent past. */
961

    
962
          if (!old && !new)
963
            return;
964

    
965
          new_ok = 0;
966
          goto skip_stats1;
967
        }
968
    }
969

    
970
  if (new_ok)
971
    stats->imp_updates_accepted++;
972
  else if (old_ok)
973
    stats->imp_withdraws_accepted++;
974
  else
975
    stats->imp_withdraws_ignored++;
976

    
977
 skip_stats1:
978

    
979
  if (new)
980
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
981
  if (old)
982
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
983

    
984
  if (table->config->sorted)
985
    {
986
      /* If routes are sorted, just insert new route to appropriate position */
987
      if (new)
988
        {
989
          if (before_old && !rte_better(new, before_old))
990
            k = &before_old->next;
991
          else
992
            k = &net->routes;
993

    
994
          for (; *k; k=&(*k)->next)
995
            if (rte_better(new, *k))
996
              break;
997

    
998
          new->next = *k;
999
          *k = new;
1000
        }
1001
    }
1002
  else
1003
    {
1004
      /* If routes are not sorted, find the best route and move it on
1005
         the first position. There are several optimized cases. */
1006

    
1007
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1008
        goto do_recalculate;
1009

    
1010
      if (new && rte_better(new, old_best))
1011
        {
1012
          /* The first case - the new route is cleary optimal,
1013
             we link it at the first position */
1014

    
1015
          new->next = net->routes;
1016
          net->routes = new;
1017
        }
1018
      else if (old == old_best)
1019
        {
1020
          /* The second case - the old best route disappeared, we add the
1021
             new route (if we have any) to the list (we don't care about
1022
             position) and then we elect the new optimal route and relink
1023
             that route at the first position and announce it. New optimal
1024
             route might be NULL if there is no more routes */
1025

    
1026
        do_recalculate:
1027
          /* Add the new route to the list */
1028
          if (new)
1029
            {
1030
              new->next = net->routes;
1031
              net->routes = new;
1032
            }
1033

    
1034
          /* Find a new optimal route (if there is any) */
1035
          if (net->routes)
1036
            {
1037
              rte **bp = &net->routes;
1038
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1039
                if (rte_better(*k, *bp))
1040
                  bp = k;
1041

    
1042
              /* And relink it */
1043
              rte *best = *bp;
1044
              *bp = best->next;
1045
              best->next = net->routes;
1046
              net->routes = best;
1047
            }
1048
        }
1049
      else if (new)
1050
        {
1051
          /* The third case - the new route is not better than the old
1052
             best route (therefore old_best != NULL) and the old best
1053
             route was not removed (therefore old_best == net->routes).
1054
             We just link the new route after the old best route. */
1055

    
1056
          ASSERT(net->routes != NULL);
1057
          new->next = net->routes->next;
1058
          net->routes->next = new;
1059
        }
1060
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1061
    }
1062

    
1063
  if (new)
1064
    new->lastmod = now;
1065

    
1066
  /* Log the route change */
1067
  if (p->debug & D_ROUTES)
1068
    {
1069
      if (new_ok)
1070
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1071
      else if (old_ok)
1072
        {
1073
          if (old != old_best)
1074
            rte_trace(p, old, '>', "removed");
1075
          else if (rte_is_ok(net->routes))
1076
            rte_trace(p, old, '>', "removed [replaced]");
1077
          else
1078
            rte_trace(p, old, '>', "removed [sole]");
1079
        }
1080
    }
1081

    
1082
  /* Propagate the route change */
1083
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1084
  if (net->routes != old_best)
1085
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1086
  if (table->config->sorted)
1087
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1088
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1089

    
1090
  if (!net->routes &&
1091
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1092
      (table->gc_time + table->config->gc_min_time <= now))
1093
    rt_schedule_gc(table);
1094

    
1095
  if (old_ok && p->rte_remove)
1096
    p->rte_remove(net, old);
1097
  if (new_ok && p->rte_insert)
1098
    p->rte_insert(net, new);
1099

    
1100
  if (old)
1101
    rte_free_quick(old);
1102
}
1103

    
1104
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1105

    
1106
static inline void
1107
rte_update_lock(void)
1108
{
1109
  rte_update_nest_cnt++;
1110
}
1111

    
1112
static inline void
1113
rte_update_unlock(void)
1114
{
1115
  if (!--rte_update_nest_cnt)
1116
    lp_flush(rte_update_pool);
1117
}
1118

    
1119
static inline void
1120
rte_hide_dummy_routes(net *net, rte **dummy)
1121
{
1122
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1123
  {
1124
    *dummy = net->routes;
1125
    net->routes = (*dummy)->next;
1126
  }
1127
}
1128

    
1129
static inline void
1130
rte_unhide_dummy_routes(net *net, rte **dummy)
1131
{
1132
  if (*dummy)
1133
  {
1134
    (*dummy)->next = net->routes;
1135
    net->routes = *dummy;
1136
  }
1137
}
1138

    
1139
/**
1140
 * rte_update - enter a new update to a routing table
1141
 * @table: table to be updated
1142
 * @ah: pointer to table announce hook
1143
 * @net: network node
1144
 * @p: protocol submitting the update
1145
 * @src: protocol originating the update
1146
 * @new: a &rte representing the new route or %NULL for route removal.
1147
 *
1148
 * This function is called by the routing protocols whenever they discover
1149
 * a new route or wish to update/remove an existing route. The right announcement
1150
 * sequence is to build route attributes first (either un-cached with @aflags set
1151
 * to zero or a cached one using rta_lookup(); in this case please note that
1152
 * you need to increase the use count of the attributes yourself by calling
1153
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1154
 * the appropriate data and finally submit the new &rte by calling rte_update().
1155
 *
1156
 * @src specifies the protocol that originally created the route and the meaning
1157
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1158
 * same value as @new->attrs->proto. @p specifies the protocol that called
1159
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1160
 * stores @p in @new->sender;
1161
 *
1162
 * When rte_update() gets any route, it automatically validates it (checks,
1163
 * whether the network and next hop address are valid IP addresses and also
1164
 * whether a normal routing protocol doesn't try to smuggle a host or link
1165
 * scope route to the table), converts all protocol dependent attributes stored
1166
 * in the &rte to temporary extended attributes, consults import filters of the
1167
 * protocol to see if the route should be accepted and/or its attributes modified,
1168
 * stores the temporary attributes back to the &rte.
1169
 *
1170
 * Now, having a "public" version of the route, we
1171
 * automatically find any old route defined by the protocol @src
1172
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1173
 * recalculate the optimal route for this destination and finally broadcast
1174
 * the change (if any) to all routing protocols by calling rte_announce().
1175
 *
1176
 * All memory used for attribute lists and other temporary allocations is taken
1177
 * from a special linear pool @rte_update_pool and freed when rte_update()
1178
 * finishes.
1179
 */
1180

    
1181
void
1182
rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
1183
{
1184
  struct proto *p = ah->proto;
1185
  struct proto_stats *stats = ah->stats;
1186
  struct filter *filter = ah->in_filter;
1187
  ea_list *tmpa = NULL;
1188
  rte *dummy = NULL;
1189

    
1190
  rte_update_lock();
1191
  if (new)
1192
    {
1193
      new->sender = ah;
1194

    
1195
      stats->imp_updates_received++;
1196
      if (!rte_validate(new))
1197
        {
1198
          rte_trace_in(D_FILTERS, p, new, "invalid");
1199
          stats->imp_updates_invalid++;
1200
          goto drop;
1201
        }
1202

    
1203
      if (filter == FILTER_REJECT)
1204
        {
1205
          stats->imp_updates_filtered++;
1206
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1207

    
1208
          if (! ah->in_keep_filtered)
1209
            goto drop;
1210

    
1211
          /* new is a private copy, i could modify it */
1212
          new->flags |= REF_FILTERED;
1213
        }
1214
      else
1215
        {
1216
          tmpa = make_tmp_attrs(new, rte_update_pool);
1217
          if (filter && (filter != FILTER_REJECT))
1218
            {
1219
              ea_list *old_tmpa = tmpa;
1220
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1221
              if (fr > F_ACCEPT)
1222
                {
1223
                  stats->imp_updates_filtered++;
1224
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1225

    
1226
                  if (! ah->in_keep_filtered)
1227
                    goto drop;
1228

    
1229
                  new->flags |= REF_FILTERED;
1230
                }
1231
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1232
                src->proto->store_tmp_attrs(new, tmpa);
1233
            }
1234
        }
1235
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1236
        new->attrs = rta_lookup(new->attrs);
1237
      new->flags |= REF_COW;
1238
    }
1239
  else
1240
    {
1241
      stats->imp_withdraws_received++;
1242

    
1243
      if (!net || !src)
1244
        {
1245
          stats->imp_withdraws_ignored++;
1246
          rte_update_unlock();
1247
          return;
1248
        }
1249
    }
1250

    
1251
 recalc:
1252
  rte_hide_dummy_routes(net, &dummy);
1253
  rte_recalculate(ah, net, new, src);
1254
  rte_unhide_dummy_routes(net, &dummy);
1255
  rte_update_unlock();
1256
  return;
1257

    
1258
 drop:
1259
  rte_free(new);
1260
  new = NULL;
1261
  goto recalc;
1262
}
1263

    
1264
/* Independent call to rte_announce(), used from next hop
1265
   recalculation, outside of rte_update(). new must be non-NULL */
1266
static inline void 
1267
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1268
               rte *new_best, rte *old_best)
1269
{
1270
  rte_update_lock();
1271
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1272
  rte_update_unlock();
1273
}
1274

    
1275
void
1276
rte_discard(rtable *t, rte *old)        /* Non-filtered route deletion, used during garbage collection */
1277
{
1278
  rte_update_lock();
1279
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1280
  rte_update_unlock();
1281
}
1282

    
1283
/* Check rtable for best route to given net whether it would be exported do p */
1284
int
1285
rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter)
1286
{
1287
  net *n = net_find(t, prefix, pxlen);
1288
  rte *rt = n ? n->routes : NULL;
1289

    
1290
  if (!rte_is_valid(rt))
1291
    return 0;
1292

    
1293
  rte_update_lock();
1294

    
1295
  /* Rest is stripped down export_filter() */
1296
  ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1297
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1298
  if (v == RIC_PROCESS)
1299
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1300

    
1301
   /* Discard temporary rte */
1302
  if (rt != n->routes)
1303
    rte_free(rt);
1304

    
1305
  rte_update_unlock();
1306

    
1307
  return v > 0;
1308
}
1309

    
1310

    
1311
/**
1312
 * rt_refresh_begin - start a refresh cycle
1313
 * @t: related routing table
1314
 * @ah: related announce hook 
1315
 *
1316
 * This function starts a refresh cycle for given routing table and announce
1317
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1318
 * routes to the routing table (by rte_update()). After that, all protocol
1319
 * routes (more precisely routes with @ah as @sender) not sent during the
1320
 * refresh cycle but still in the table from the past are pruned. This is
1321
 * implemented by marking all related routes as stale by REF_STALE flag in
1322
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1323
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1324
 */
1325
void
1326
rt_refresh_begin(rtable *t, struct announce_hook *ah)
1327
{
1328
  net *n;
1329
  rte *e;
1330

    
1331
  FIB_WALK(&t->fib, fn)
1332
    {
1333
      n = (net *) fn;
1334
      for (e = n->routes; e; e = e->next)
1335
        if (e->sender == ah)
1336
          e->flags |= REF_STALE;
1337
    }
1338
  FIB_WALK_END;
1339
}
1340

    
1341
/**
1342
 * rt_refresh_end - end a refresh cycle
1343
 * @t: related routing table
1344
 * @ah: related announce hook 
1345
 *
1346
 * This function starts a refresh cycle for given routing table and announce
1347
 * hook. See rt_refresh_begin() for description of refresh cycles.
1348
 */
1349
void
1350
rt_refresh_end(rtable *t, struct announce_hook *ah)
1351
{
1352
  int prune = 0;
1353
  net *n;
1354
  rte *e;
1355

    
1356
  FIB_WALK(&t->fib, fn)
1357
    {
1358
      n = (net *) fn;
1359
      for (e = n->routes; e; e = e->next)
1360
        if ((e->sender == ah) && (e->flags & REF_STALE))
1361
          {
1362
            e->flags |= REF_DISCARD;
1363
            prune = 1;
1364
          }
1365
    }
1366
  FIB_WALK_END;
1367

    
1368
  if (prune)
1369
    rt_schedule_prune(t);
1370
}
1371

    
1372

    
1373
/**
1374
 * rte_dump - dump a route
1375
 * @e: &rte to be dumped
1376
 *
1377
 * This functions dumps contents of a &rte to debug output.
1378
 */
1379
void
1380
rte_dump(rte *e)
1381
{
1382
  net *n = e->net;
1383
  debug("%-1I/%2d ", n->n.prefix, n->n.pxlen);
1384
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1385
  rta_dump(e->attrs);
1386
  if (e->attrs->src->proto->proto->dump_attrs)
1387
    e->attrs->src->proto->proto->dump_attrs(e);
1388
  debug("\n");
1389
}
1390

    
1391
/**
1392
 * rt_dump - dump a routing table
1393
 * @t: routing table to be dumped
1394
 *
1395
 * This function dumps contents of a given routing table to debug output.
1396
 */
1397
void
1398
rt_dump(rtable *t)
1399
{
1400
  rte *e;
1401
  net *n;
1402
  struct announce_hook *a;
1403

    
1404
  debug("Dump of routing table <%s>\n", t->name);
1405
#ifdef DEBUGGING
1406
  fib_check(&t->fib);
1407
#endif
1408
  FIB_WALK(&t->fib, fn)
1409
    {
1410
      n = (net *) fn;
1411
      for(e=n->routes; e; e=e->next)
1412
        rte_dump(e);
1413
    }
1414
  FIB_WALK_END;
1415
  WALK_LIST(a, t->hooks)
1416
    debug("\tAnnounces routes to protocol %s\n", a->proto->name);
1417
  debug("\n");
1418
}
1419

    
1420
/**
1421
 * rt_dump_all - dump all routing tables
1422
 *
1423
 * This function dumps contents of all routing tables to debug output.
1424
 */
1425
void
1426
rt_dump_all(void)
1427
{
1428
  rtable *t;
1429

    
1430
  WALK_LIST(t, routing_tables)
1431
    rt_dump(t);
1432
}
1433

    
1434
static inline void
1435
rt_schedule_prune(rtable *tab)
1436
{
1437
  rt_mark_for_prune(tab);
1438
  ev_schedule(tab->rt_event);
1439
}
1440

    
1441
static inline void
1442
rt_schedule_gc(rtable *tab)
1443
{
1444
  if (tab->gc_scheduled)
1445
    return;
1446

    
1447
  tab->gc_scheduled = 1;
1448
  ev_schedule(tab->rt_event);
1449
}
1450

    
1451
static inline void
1452
rt_schedule_hcu(rtable *tab)
1453
{
1454
  if (tab->hcu_scheduled)
1455
    return;
1456

    
1457
  tab->hcu_scheduled = 1;
1458
  ev_schedule(tab->rt_event);
1459
}
1460

    
1461
static inline void
1462
rt_schedule_nhu(rtable *tab)
1463
{
1464
  if (tab->nhu_state == 0)
1465
    ev_schedule(tab->rt_event);
1466

    
1467
  /* state change 0->1, 2->3 */
1468
  tab->nhu_state |= 1;
1469
}
1470

    
1471

    
1472
static void
1473
rt_prune_nets(rtable *tab)
1474
{
1475
  struct fib_iterator fit;
1476
  int ncnt = 0, ndel = 0;
1477

    
1478
#ifdef DEBUGGING
1479
  fib_check(&tab->fib);
1480
#endif
1481

    
1482
  FIB_ITERATE_INIT(&fit, &tab->fib);
1483
again:
1484
  FIB_ITERATE_START(&tab->fib, &fit, f)
1485
    {
1486
      net *n = (net *) f;
1487
      ncnt++;
1488
      if (!n->routes)                /* Orphaned FIB entry */
1489
        {
1490
          FIB_ITERATE_PUT(&fit, f);
1491
          fib_delete(&tab->fib, f);
1492
          ndel++;
1493
          goto again;
1494
        }
1495
    }
1496
  FIB_ITERATE_END(f);
1497
  DBG("Pruned %d of %d networks\n", ndel, ncnt);
1498

    
1499
  tab->gc_counter = 0;
1500
  tab->gc_time = now;
1501
  tab->gc_scheduled = 0;
1502
}
1503

    
1504
static void
1505
rt_event(void *ptr)
1506
{
1507
  rtable *tab = ptr;
1508

    
1509
  if (tab->hcu_scheduled)
1510
    rt_update_hostcache(tab);
1511

    
1512
  if (tab->nhu_state)
1513
    rt_next_hop_update(tab);
1514

    
1515
  if (tab->prune_state)
1516
    if (!rt_prune_table(tab))
1517
      {
1518
        /* Table prune unfinished */
1519
        ev_schedule(tab->rt_event);
1520
        return;
1521
      }
1522

    
1523
  if (tab->gc_scheduled)
1524
    {
1525
      rt_prune_nets(tab);
1526
      rt_prune_sources(); // FIXME this should be moved to independent event
1527
    }
1528
}
1529

    
1530
void
1531
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1532
{
1533
  bzero(t, sizeof(*t));
1534
  fib_init(&t->fib, p, sizeof(net), 0, rte_init);
1535
  t->name = name;
1536
  t->config = cf;
1537
  init_list(&t->hooks);
1538
  if (cf)
1539
    {
1540
      t->rt_event = ev_new(p);
1541
      t->rt_event->hook = rt_event;
1542
      t->rt_event->data = t;
1543
      t->gc_time = now;
1544
    }
1545
}
1546

    
1547
/**
1548
 * rt_init - initialize routing tables
1549
 *
1550
 * This function is called during BIRD startup. It initializes the
1551
 * routing table module.
1552
 */
1553
void
1554
rt_init(void)
1555
{
1556
  rta_init();
1557
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1558
  rte_update_pool = lp_new(rt_table_pool, 4080);
1559
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1560
  init_list(&routing_tables);
1561
}
1562

    
1563

    
1564
static int
1565
rt_prune_step(rtable *tab, int *limit)
1566
{
1567
  struct fib_iterator *fit = &tab->prune_fit;
1568

    
1569
  DBG("Pruning route table %s\n", tab->name);
1570
#ifdef DEBUGGING
1571
  fib_check(&tab->fib);
1572
#endif
1573

    
1574
  if (tab->prune_state == RPS_NONE)
1575
    return 1;
1576

    
1577
  if (tab->prune_state == RPS_SCHEDULED)
1578
    {
1579
      FIB_ITERATE_INIT(fit, &tab->fib);
1580
      tab->prune_state = RPS_RUNNING;
1581
    }
1582

    
1583
again:
1584
  FIB_ITERATE_START(&tab->fib, fit, fn)
1585
    {
1586
      net *n = (net *) fn;
1587
      rte *e;
1588

    
1589
    rescan:
1590
      for (e=n->routes; e; e=e->next)
1591
        if (e->sender->proto->flushing || (e->flags & REF_DISCARD))
1592
          {
1593
            if (*limit <= 0)
1594
              {
1595
                FIB_ITERATE_PUT(fit, fn);
1596
                return 0;
1597
              }
1598

    
1599
            rte_discard(tab, e);
1600
            (*limit)--;
1601

    
1602
            goto rescan;
1603
          }
1604
      if (!n->routes)                /* Orphaned FIB entry */
1605
        {
1606
          FIB_ITERATE_PUT(fit, fn);
1607
          fib_delete(&tab->fib, fn);
1608
          goto again;
1609
        }
1610
    }
1611
  FIB_ITERATE_END(fn);
1612

    
1613
#ifdef DEBUGGING
1614
  fib_check(&tab->fib);
1615
#endif
1616

    
1617
  tab->prune_state = RPS_NONE;
1618
  return 1;
1619
}
1620

    
1621
/**
1622
 * rt_prune_table - prune a routing table
1623
 * @tab: a routing table for pruning
1624
 *
1625
 * This function scans the routing table @tab and removes routes belonging to
1626
 * flushing protocols, discarded routes and also stale network entries, in a
1627
 * similar fashion like rt_prune_loop(). Returns 1 when all such routes are
1628
 * pruned. Contrary to rt_prune_loop(), this function is not a part of the
1629
 * protocol flushing loop, but it is called from rt_event() for just one routing
1630
 * table.
1631
 *
1632
 * Note that rt_prune_table() and rt_prune_loop() share (for each table) the
1633
 * prune state (@prune_state) and also the pruning iterator (@prune_fit).
1634
 */
1635
static inline int
1636
rt_prune_table(rtable *tab)
1637
{
1638
  int limit = 512;
1639
  return rt_prune_step(tab, &limit);
1640
}
1641

    
1642
/**
1643
 * rt_prune_loop - prune routing tables
1644
 *
1645
 * The prune loop scans routing tables and removes routes belonging to flushing
1646
 * protocols, discarded routes and also stale network entries. Returns 1 when
1647
 * all such routes are pruned. It is a part of the protocol flushing loop.
1648
 */
1649
int
1650
rt_prune_loop(void)
1651
{
1652
  int limit = 512;
1653
  rtable *t;
1654

    
1655
  WALK_LIST(t, routing_tables)
1656
    if (! rt_prune_step(t, &limit))
1657
      return 0;
1658

    
1659
  return 1;
1660
}
1661

    
1662
void
1663
rt_preconfig(struct config *c)
1664
{
1665
  struct symbol *s = cf_get_symbol("master");
1666

    
1667
  init_list(&c->tables);
1668
  c->master_rtc = rt_new_table(s);
1669
}
1670

    
1671

    
1672
/* 
1673
 * Some functions for handing internal next hop updates
1674
 * triggered by rt_schedule_nhu().
1675
 */
1676

    
1677
static inline int
1678
rta_next_hop_outdated(rta *a)
1679
{
1680
  struct hostentry *he = a->hostentry;
1681

    
1682
  if (!he)
1683
    return 0;
1684

    
1685
  if (!he->src)
1686
    return a->dest != RTD_UNREACHABLE;
1687

    
1688
  return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1689
    (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1690
    !mpnh_same(a->nexthops, he->src->nexthops);
1691
}
1692

    
1693
static inline void
1694
rta_apply_hostentry(rta *a, struct hostentry *he)
1695
{
1696
  a->hostentry = he;
1697
  a->iface = he->src ? he->src->iface : NULL;
1698
  a->gw = he->gw;
1699
  a->dest = he->dest;
1700
  a->igp_metric = he->igp_metric;
1701
  a->nexthops = he->src ? he->src->nexthops : NULL;
1702
}
1703

    
1704
static inline rte *
1705
rt_next_hop_update_rte(rtable *tab, rte *old)
1706
{
1707
  rta a;
1708
  memcpy(&a, old->attrs, sizeof(rta));
1709
  rta_apply_hostentry(&a, old->attrs->hostentry);
1710
  a.aflags = 0;
1711

    
1712
  rte *e = sl_alloc(rte_slab);
1713
  memcpy(e, old, sizeof(rte));
1714
  e->attrs = rta_lookup(&a);
1715

    
1716
  return e;
1717
}
1718

    
1719
static inline int
1720
rt_next_hop_update_net(rtable *tab, net *n)
1721
{
1722
  rte **k, *e, *new, *old_best, **new_best;
1723
  int count = 0;
1724
  int free_old_best = 0;
1725

    
1726
  old_best = n->routes;
1727
  if (!old_best)
1728
    return 0;
1729

    
1730
  for (k = &n->routes; e = *k; k = &e->next)
1731
    if (rta_next_hop_outdated(e->attrs))
1732
      {
1733
        new = rt_next_hop_update_rte(tab, e);
1734
        *k = new;
1735

    
1736
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1737
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1738

    
1739
        /* Call a pre-comparison hook */
1740
        /* Not really an efficient way to compute this */
1741
        if (e->attrs->src->proto->rte_recalculate)
1742
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1743

    
1744
        if (e != old_best)
1745
          rte_free_quick(e);
1746
        else /* Freeing of the old best rte is postponed */
1747
          free_old_best = 1;
1748

    
1749
        e = new;
1750
        count++;
1751
      }
1752

    
1753
  if (!count)
1754
    return 0;
1755

    
1756
  /* Find the new best route */
1757
  new_best = NULL;
1758
  for (k = &n->routes; e = *k; k = &e->next)
1759
    {
1760
      if (!new_best || rte_better(e, *new_best))
1761
        new_best = k;
1762
    }
1763

    
1764
  /* Relink the new best route to the first position */
1765
  new = *new_best;
1766
  if (new != n->routes)
1767
    {
1768
      *new_best = new->next;
1769
      new->next = n->routes;
1770
      n->routes = new;
1771
    }
1772

    
1773
  /* Announce the new best route */
1774
  if (new != old_best)
1775
    {
1776
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1777
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1778
    }
1779

    
1780
  /* FIXME: Better announcement of merged routes */
1781
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1782

    
1783
   if (free_old_best)
1784
    rte_free_quick(old_best);
1785

    
1786
  return count;
1787
}
1788

    
1789
static void
1790
rt_next_hop_update(rtable *tab)
1791
{
1792
  struct fib_iterator *fit = &tab->nhu_fit;
1793
  int max_feed = 32;
1794

    
1795
  if (tab->nhu_state == 0)
1796
    return;
1797

    
1798
  if (tab->nhu_state == 1)
1799
    {
1800
      FIB_ITERATE_INIT(fit, &tab->fib);
1801
      tab->nhu_state = 2;
1802
    }
1803

    
1804
  FIB_ITERATE_START(&tab->fib, fit, fn)
1805
    {
1806
      if (max_feed <= 0)
1807
        {
1808
          FIB_ITERATE_PUT(fit, fn);
1809
          ev_schedule(tab->rt_event);
1810
          return;
1811
        }
1812
      max_feed -= rt_next_hop_update_net(tab, (net *) fn);
1813
    }
1814
  FIB_ITERATE_END(fn);
1815

    
1816
  /* state change 2->0, 3->1 */
1817
  tab->nhu_state &= 1;
1818

    
1819
  if (tab->nhu_state > 0)
1820
    ev_schedule(tab->rt_event);
1821
}
1822

    
1823

    
1824
struct rtable_config *
1825
rt_new_table(struct symbol *s)
1826
{
1827
  /* Hack that allows to 'redefine' the master table */
1828
  if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc))
1829
    return s->def;
1830

    
1831
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1832

    
1833
  cf_define_symbol(s, SYM_TABLE, c);
1834
  c->name = s->name;
1835
  add_tail(&new_config->tables, &c->n);
1836
  c->gc_max_ops = 1000;
1837
  c->gc_min_time = 5;
1838
  return c;
1839
}
1840

    
1841
/**
1842
 * rt_lock_table - lock a routing table
1843
 * @r: routing table to be locked
1844
 *
1845
 * Lock a routing table, because it's in use by a protocol,
1846
 * preventing it from being freed when it gets undefined in a new
1847
 * configuration.
1848
 */
1849
void
1850
rt_lock_table(rtable *r)
1851
{
1852
  r->use_count++;
1853
}
1854

    
1855
/**
1856
 * rt_unlock_table - unlock a routing table
1857
 * @r: routing table to be unlocked
1858
 *
1859
 * Unlock a routing table formerly locked by rt_lock_table(),
1860
 * that is decrease its use count and delete it if it's scheduled
1861
 * for deletion by configuration changes.
1862
 */
1863
void
1864
rt_unlock_table(rtable *r)
1865
{
1866
  if (!--r->use_count && r->deleted)
1867
    {
1868
      struct config *conf = r->deleted;
1869
      DBG("Deleting routing table %s\n", r->name);
1870
      r->config->table = NULL;
1871
      if (r->hostcache)
1872
        rt_free_hostcache(r);
1873
      rem_node(&r->n);
1874
      fib_free(&r->fib);
1875
      rfree(r->rt_event);
1876
      mb_free(r);
1877
      config_del_obstacle(conf);
1878
    }
1879
}
1880

    
1881
/**
1882
 * rt_commit - commit new routing table configuration
1883
 * @new: new configuration
1884
 * @old: original configuration or %NULL if it's boot time config
1885
 *
1886
 * Scan differences between @old and @new configuration and modify
1887
 * the routing tables according to these changes. If @new defines a
1888
 * previously unknown table, create it, if it omits a table existing
1889
 * in @old, schedule it for deletion (it gets deleted when all protocols
1890
 * disconnect from it by calling rt_unlock_table()), if it exists
1891
 * in both configurations, leave it unchanged.
1892
 */
1893
void
1894
rt_commit(struct config *new, struct config *old)
1895
{
1896
  struct rtable_config *o, *r;
1897

    
1898
  DBG("rt_commit:\n");
1899
  if (old)
1900
    {
1901
      WALK_LIST(o, old->tables)
1902
        {
1903
          rtable *ot = o->table;
1904
          if (!ot->deleted)
1905
            {
1906
              struct symbol *sym = cf_find_symbol(new, o->name);
1907
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
1908
                {
1909
                  DBG("\t%s: same\n", o->name);
1910
                  r = sym->def;
1911
                  r->table = ot;
1912
                  ot->name = r->name;
1913
                  ot->config = r;
1914
                  if (o->sorted != r->sorted)
1915
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1916
                }
1917
              else
1918
                {
1919
                  DBG("\t%s: deleted\n", o->name);
1920
                  ot->deleted = old;
1921
                  config_add_obstacle(old);
1922
                  rt_lock_table(ot);
1923
                  rt_unlock_table(ot);
1924
                }
1925
            }
1926
        }
1927
    }
1928

    
1929
  WALK_LIST(r, new->tables)
1930
    if (!r->table)
1931
      {
1932
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1933
        DBG("\t%s: created\n", r->name);
1934
        rt_setup(rt_table_pool, t, r->name, r);
1935
        add_tail(&routing_tables, &t->n);
1936
        r->table = t;
1937
      }
1938
  DBG("\tdone\n");
1939
}
1940

    
1941
static inline void
1942
do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
1943
{
1944
  rte_update_lock();
1945
  if (type == RA_ACCEPTED)
1946
    rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
1947
  else if (type == RA_MERGED)
1948
    rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
1949
  else
1950
    rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
1951
  rte_update_unlock();
1952
}
1953

    
1954
/**
1955
 * rt_feed_baby - advertise routes to a new protocol
1956
 * @p: protocol to be fed
1957
 *
1958
 * This function performs one pass of advertisement of routes to a newly
1959
 * initialized protocol. It's called by the protocol code as long as it
1960
 * has something to do. (We avoid transferring all the routes in single
1961
 * pass in order not to monopolize CPU time.)
1962
 */
1963
int
1964
rt_feed_baby(struct proto *p)
1965
{
1966
  struct announce_hook *h;
1967
  struct fib_iterator *fit;
1968
  int max_feed = 256;
1969

    
1970
  if (!p->feed_ahook)                        /* Need to initialize first */
1971
    {
1972
      if (!p->ahooks)
1973
        return 1;
1974
      DBG("Announcing routes to new protocol %s\n", p->name);
1975
      p->feed_ahook = p->ahooks;
1976
      fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator));
1977
      goto next_hook;
1978
    }
1979
  fit = p->feed_iterator;
1980

    
1981
again:
1982
  h = p->feed_ahook;
1983
  FIB_ITERATE_START(&h->table->fib, fit, fn)
1984
    {
1985
      net *n = (net *) fn;
1986
      rte *e = n->routes;
1987
      if (max_feed <= 0)
1988
        {
1989
          FIB_ITERATE_PUT(fit, fn);
1990
          return 0;
1991
        }
1992

    
1993
      /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
1994

    
1995
      if ((p->accept_ra_types == RA_OPTIMAL) ||
1996
          (p->accept_ra_types == RA_ACCEPTED) ||
1997
          (p->accept_ra_types == RA_MERGED))
1998
        if (rte_is_valid(e))
1999
          {
2000
            if (p->export_state != ES_FEEDING)
2001
              return 1;  /* In the meantime, the protocol fell down. */
2002

    
2003
            do_feed_baby(p, p->accept_ra_types, h, n, e);
2004
            max_feed--;
2005
          }
2006

    
2007
      if (p->accept_ra_types == RA_ANY)
2008
        for(e = n->routes; e; e = e->next)
2009
          {
2010
            if (p->export_state != ES_FEEDING)
2011
              return 1;  /* In the meantime, the protocol fell down. */
2012

    
2013
            if (!rte_is_valid(e))
2014
              continue;
2015

    
2016
            do_feed_baby(p, RA_ANY, h, n, e);
2017
            max_feed--;
2018
          }
2019
    }
2020
  FIB_ITERATE_END(fn);
2021
  p->feed_ahook = h->next;
2022
  if (!p->feed_ahook)
2023
    {
2024
      mb_free(p->feed_iterator);
2025
      p->feed_iterator = NULL;
2026
      return 1;
2027
    }
2028

    
2029
next_hook:
2030
  h = p->feed_ahook;
2031
  FIB_ITERATE_INIT(fit, &h->table->fib);
2032
  goto again;
2033
}
2034

    
2035
/**
2036
 * rt_feed_baby_abort - abort protocol feeding
2037
 * @p: protocol
2038
 *
2039
 * This function is called by the protocol code when the protocol
2040
 * stops or ceases to exist before the last iteration of rt_feed_baby()
2041
 * has finished.
2042
 */
2043
void
2044
rt_feed_baby_abort(struct proto *p)
2045
{
2046
  if (p->feed_ahook)
2047
    {
2048
      /* Unlink the iterator and exit */
2049
      fit_get(&p->feed_ahook->table->fib, p->feed_iterator);
2050
      p->feed_ahook = NULL;
2051
    }
2052
}
2053

    
2054

    
2055
static inline unsigned
2056
ptr_hash(void *ptr)
2057
{
2058
  uintptr_t p = (uintptr_t) ptr;
2059
  return p ^ (p << 8) ^ (p >> 16);
2060
}
2061

    
2062
static inline unsigned
2063
hc_hash(ip_addr a, rtable *dep)
2064
{
2065
  return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff;
2066
}
2067

    
2068
static inline void
2069
hc_insert(struct hostcache *hc, struct hostentry *he)
2070
{
2071
  uint k = he->hash_key >> hc->hash_shift;
2072
  he->next = hc->hash_table[k];
2073
  hc->hash_table[k] = he;
2074
}
2075

    
2076
static inline void
2077
hc_remove(struct hostcache *hc, struct hostentry *he)
2078
{
2079
  struct hostentry **hep;
2080
  uint k = he->hash_key >> hc->hash_shift;
2081

    
2082
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2083
  *hep = he->next;
2084
}
2085

    
2086
#define HC_DEF_ORDER 10
2087
#define HC_HI_MARK *4
2088
#define HC_HI_STEP 2
2089
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2090
#define HC_LO_MARK /5
2091
#define HC_LO_STEP 2
2092
#define HC_LO_ORDER 10
2093

    
2094
static void
2095
hc_alloc_table(struct hostcache *hc, unsigned order)
2096
{
2097
  unsigned hsize = 1 << order;
2098
  hc->hash_order = order;
2099
  hc->hash_shift = 16 - order;
2100
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
2101
  hc->hash_min = (order <= HC_LO_ORDER) ?  0 : (hsize HC_LO_MARK);
2102

    
2103
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2104
}
2105

    
2106
static void
2107
hc_resize(struct hostcache *hc, unsigned new_order)
2108
{
2109
  unsigned old_size = 1 << hc->hash_order;
2110
  struct hostentry **old_table = hc->hash_table;
2111
  struct hostentry *he, *hen;
2112
  int i;
2113

    
2114
  hc_alloc_table(hc, new_order);
2115
  for (i = 0; i < old_size; i++)
2116
    for (he = old_table[i]; he != NULL; he=hen)
2117
      {
2118
        hen = he->next;
2119
        hc_insert(hc, he);
2120
      }
2121
  mb_free(old_table);
2122
}
2123

    
2124
static struct hostentry *
2125
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2126
{
2127
  struct hostentry *he = sl_alloc(hc->slab);
2128

    
2129
  he->addr = a;
2130
  he->link = ll;
2131
  he->tab = dep;
2132
  he->hash_key = k;
2133
  he->uc = 0;
2134
  he->src = NULL;
2135

    
2136
  add_tail(&hc->hostentries, &he->ln);
2137
  hc_insert(hc, he);
2138

    
2139
  hc->hash_items++;
2140
  if (hc->hash_items > hc->hash_max)
2141
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2142

    
2143
  return he;
2144
}
2145

    
2146
static void
2147
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2148
{
2149
  rta_free(he->src);
2150

    
2151
  rem_node(&he->ln);
2152
  hc_remove(hc, he);
2153
  sl_free(hc->slab, he);
2154

    
2155
  hc->hash_items--;
2156
  if (hc->hash_items < hc->hash_min)
2157
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2158
}
2159

    
2160
static void
2161
rt_init_hostcache(rtable *tab)
2162
{
2163
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2164
  init_list(&hc->hostentries);
2165

    
2166
  hc->hash_items = 0;
2167
  hc_alloc_table(hc, HC_DEF_ORDER);
2168
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2169

    
2170
  hc->lp = lp_new(rt_table_pool, 1008);
2171
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2172

    
2173
  tab->hostcache = hc;
2174
}
2175

    
2176
static void
2177
rt_free_hostcache(rtable *tab)
2178
{
2179
  struct hostcache *hc = tab->hostcache;
2180

    
2181
  node *n;
2182
  WALK_LIST(n, hc->hostentries)
2183
    {
2184
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2185
      rta_free(he->src);
2186

    
2187
      if (he->uc)
2188
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2189
    }
2190

    
2191
  rfree(hc->slab);
2192
  rfree(hc->lp);
2193
  mb_free(hc->hash_table);
2194
  mb_free(hc);
2195
}
2196

    
2197
static void
2198
rt_notify_hostcache(rtable *tab, net *net)
2199
{
2200
  struct hostcache *hc = tab->hostcache;
2201

    
2202
  if (tab->hcu_scheduled)
2203
    return;
2204

    
2205
  if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen))
2206
    rt_schedule_hcu(tab);
2207
}
2208

    
2209
static int
2210
if_local_addr(ip_addr a, struct iface *i)
2211
{
2212
  struct ifa *b;
2213

    
2214
  WALK_LIST(b, i->addrs)
2215
    if (ipa_equal(a, b->ip))
2216
      return 1;
2217

    
2218
  return 0;
2219
}
2220

    
2221
static u32 
2222
rt_get_igp_metric(rte *rt)
2223
{
2224
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2225

    
2226
  if (ea)
2227
    return ea->u.data;
2228

    
2229
  rta *a = rt->attrs;
2230

    
2231
#ifdef CONFIG_OSPF
2232
  if ((a->source == RTS_OSPF) ||
2233
      (a->source == RTS_OSPF_IA) ||
2234
      (a->source == RTS_OSPF_EXT1))
2235
    return rt->u.ospf.metric1;
2236
#endif
2237

    
2238
#ifdef CONFIG_RIP
2239
  if (a->source == RTS_RIP)
2240
    return rt->u.rip.metric;
2241
#endif
2242

    
2243
  /* Device routes */
2244
  if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2245
    return 0;
2246

    
2247
  return IGP_METRIC_UNKNOWN;
2248
}
2249

    
2250
static int
2251
rt_update_hostentry(rtable *tab, struct hostentry *he)
2252
{
2253
  rta *old_src = he->src;
2254
  int pxlen = 0;
2255

    
2256
  /* Reset the hostentry */ 
2257
  he->src = NULL;
2258
  he->gw = IPA_NONE;
2259
  he->dest = RTD_UNREACHABLE;
2260
  he->igp_metric = 0;
2261

    
2262
  net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
2263
  if (n)
2264
    {
2265
      rte *e = n->routes;
2266
      rta *a = e->attrs;
2267
      pxlen = n->n.pxlen;
2268

    
2269
      if (a->hostentry)
2270
        {
2271
          /* Recursive route should not depend on another recursive route */
2272
          log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d",
2273
              he->addr, n->n.prefix, pxlen);
2274
          goto done;
2275
        }
2276

    
2277
      if (a->dest == RTD_DEVICE)
2278
        {
2279
          if (if_local_addr(he->addr, a->iface))
2280
            {
2281
              /* The host address is a local address, this is not valid */
2282
              log(L_WARN "Next hop address %I is a local address of iface %s",
2283
                  he->addr, a->iface->name);
2284
              goto done;
2285
                  }
2286

    
2287
          /* The host is directly reachable, use link as a gateway */
2288
          he->gw = he->link;
2289
          he->dest = RTD_ROUTER;
2290
        }
2291
      else
2292
        {
2293
          /* The host is reachable through some route entry */
2294
          he->gw = a->gw;
2295
          he->dest = a->dest;
2296
        }
2297

    
2298
      he->src = rta_clone(a);
2299
      he->igp_metric = rt_get_igp_metric(e);
2300
    }
2301

    
2302
 done:
2303
  /* Add a prefix range to the trie */
2304
  trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
2305

    
2306
  rta_free(old_src);
2307
  return old_src != he->src;
2308
}
2309

    
2310
static void
2311
rt_update_hostcache(rtable *tab)
2312
{
2313
  struct hostcache *hc = tab->hostcache;
2314
  struct hostentry *he;
2315
  node *n, *x;
2316

    
2317
  /* Reset the trie */
2318
  lp_flush(hc->lp);
2319
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2320

    
2321
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2322
    {
2323
      he = SKIP_BACK(struct hostentry, ln, n);
2324
      if (!he->uc)
2325
        {
2326
          hc_delete_hostentry(hc, he);
2327
          continue;
2328
        }
2329

    
2330
      if (rt_update_hostentry(tab, he))
2331
        rt_schedule_nhu(he->tab);
2332
    }
2333

    
2334
  tab->hcu_scheduled = 0;
2335
}
2336

    
2337
static struct hostentry *
2338
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2339
{
2340
  struct hostentry *he;
2341

    
2342
  if (!tab->hostcache)
2343
    rt_init_hostcache(tab);
2344

    
2345
  uint k = hc_hash(a, dep);
2346
  struct hostcache *hc = tab->hostcache;
2347
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2348
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2349
      return he;
2350

    
2351
  he = hc_new_hostentry(hc, a, ll, dep, k);
2352
  rt_update_hostentry(tab, he);
2353
  return he;
2354
}
2355

    
2356
void
2357
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2358
{
2359
  rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2360
}
2361

    
2362

    
2363
/*
2364
 *  CLI commands
2365
 */
2366

    
2367
static void
2368
rt_format_via(rte *e, byte *via)
2369
{
2370
  rta *a = e->attrs;
2371

    
2372
  switch (a->dest)
2373
    {
2374
    case RTD_ROUTER:        bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2375
    case RTD_DEVICE:        bsprintf(via, "dev %s", a->iface->name); break;
2376
    case RTD_BLACKHOLE:        bsprintf(via, "blackhole"); break;
2377
    case RTD_UNREACHABLE:        bsprintf(via, "unreachable"); break;
2378
    case RTD_PROHIBIT:        bsprintf(via, "prohibited"); break;
2379
    case RTD_MULTIPATH:        bsprintf(via, "multipath"); break;
2380
    default:                bsprintf(via, "???");
2381
    }
2382
}
2383

    
2384
static void
2385
rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2386
{
2387
  byte via[STD_ADDRESS_P_LENGTH+32], from[STD_ADDRESS_P_LENGTH+8];
2388
  byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2389
  rta *a = e->attrs;
2390
  int primary = (e->net->routes == e);
2391
  int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2392
  void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2393
  struct mpnh *nh;
2394

    
2395
  rt_format_via(e, via);
2396
  tm_format_datetime(tm, &config->tf_route, e->lastmod);
2397
  if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2398
    bsprintf(from, " from %I", a->from);
2399
  else
2400
    from[0] = 0;
2401

    
2402
  get_route_info = a->src->proto->proto->get_route_info;
2403
  if (get_route_info || d->verbose)
2404
    {
2405
      /* Need to normalize the extended attributes */
2406
      ea_list *t = tmpa;
2407
      t = ea_append(t, a->eattrs);
2408
      tmpa = alloca(ea_scan(t));
2409
      ea_merge(t, tmpa);
2410
      ea_sort(tmpa);
2411
    }
2412
  if (get_route_info)
2413
    get_route_info(e, info, tmpa);
2414
  else
2415
    bsprintf(info, " (%d)", e->pref);
2416
  cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name,
2417
             tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2418
  for (nh = a->nexthops; nh; nh = nh->next)
2419
    cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2420
  if (d->verbose)
2421
    rta_show(c, a, tmpa);
2422
}
2423

    
2424
static void
2425
rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2426
{
2427
  rte *e, *ee;
2428
  byte ia[STD_ADDRESS_P_LENGTH+8];
2429
  struct ea_list *tmpa;
2430
  struct announce_hook *a = NULL;
2431
  int first = 1;
2432
  int pass = 0;
2433

    
2434
  bsprintf(ia, "%I/%d", n->n.prefix, n->n.pxlen);
2435

    
2436
  if (d->export_mode)
2437
    {
2438
      if (! d->export_protocol->rt_notify)
2439
        return;
2440

    
2441
      a = proto_find_announce_hook(d->export_protocol, d->table);
2442
      if (!a)
2443
        return;
2444
    }
2445

    
2446
  for (e = n->routes; e; e = e->next)
2447
    {
2448
      if (rte_is_filtered(e) != d->filtered)
2449
        continue;
2450

    
2451
      d->rt_counter++;
2452
      d->net_counter += first;
2453
      first = 0;
2454

    
2455
      if (pass)
2456
        continue;
2457

    
2458
      ee = e;
2459
      rte_update_lock();                /* We use the update buffer for filtering */
2460
      tmpa = make_tmp_attrs(e, rte_update_pool);
2461

    
2462
      /* Special case for merged export */
2463
      if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
2464
        {
2465
          rte *rt_free;
2466
          e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
2467
          pass = 1;
2468

    
2469
          if (!e)
2470
          { e = ee; goto skip; }
2471
        }
2472
      else if (d->export_mode)
2473
        {
2474
          struct proto *ep = d->export_protocol;
2475
          int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2476

    
2477
          if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
2478
            pass = 1;
2479

    
2480
          if (ic < 0)
2481
            goto skip;
2482

    
2483
          if (d->export_mode > RSEM_PREEXPORT)
2484
            {
2485
              /*
2486
               * FIXME - This shows what should be exported according to current
2487
               * filters, but not what was really exported. 'configure soft'
2488
               * command may change the export filter and do not update routes.
2489
               */
2490
              int do_export = (ic > 0) ||
2491
                (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2492

    
2493
              if (do_export != (d->export_mode == RSEM_EXPORT))
2494
                goto skip;
2495

    
2496
              if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED))
2497
                pass = 1;
2498
            }
2499
        }
2500

    
2501
      if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2502
        goto skip;
2503

    
2504
      if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2505
        goto skip;
2506

    
2507
      d->show_counter++;
2508
      if (d->stats < 2)
2509
        rt_show_rte(c, ia, e, d, tmpa);
2510
      ia[0] = 0;
2511

    
2512
    skip:
2513
      if (e != ee)
2514
      {
2515
        rte_free(e);
2516
        e = ee;
2517
      }
2518
      rte_update_unlock();
2519

    
2520
      if (d->primary_only)
2521
        break;
2522
    }
2523
}
2524

    
2525
static void
2526
rt_show_cont(struct cli *c)
2527
{
2528
  struct rt_show_data *d = c->rover;
2529
#ifdef DEBUGGING
2530
  unsigned max = 4;
2531
#else
2532
  unsigned max = 64;
2533
#endif
2534
  struct fib *fib = &d->table->fib;
2535
  struct fib_iterator *it = &d->fit;
2536

    
2537
  FIB_ITERATE_START(fib, it, f)
2538
    {
2539
      net *n = (net *) f;
2540
      if (d->running_on_config && d->running_on_config != config)
2541
        {
2542
          cli_printf(c, 8004, "Stopped due to reconfiguration");
2543
          goto done;
2544
        }
2545
      if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
2546
        {
2547
          cli_printf(c, 8005, "Protocol is down");
2548
          goto done;
2549
        }
2550
      if (!max--)
2551
        {
2552
          FIB_ITERATE_PUT(it, f);
2553
          return;
2554
        }
2555
      rt_show_net(c, n, d);
2556
    }
2557
  FIB_ITERATE_END(f);
2558
  if (d->stats)
2559
    cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2560
  else
2561
    cli_printf(c, 0, "");
2562
done:
2563
  c->cont = c->cleanup = NULL;
2564
}
2565

    
2566
static void
2567
rt_show_cleanup(struct cli *c)
2568
{
2569
  struct rt_show_data *d = c->rover;
2570

    
2571
  /* Unlink the iterator */
2572
  fit_get(&d->table->fib, &d->fit);
2573
}
2574

    
2575
void
2576
rt_show(struct rt_show_data *d)
2577
{
2578
  net *n;
2579

    
2580
  /* Default is either a master table or a table related to a respective protocol */
2581
  if (!d->table && d->export_protocol) d->table = d->export_protocol->table;
2582
  if (!d->table && d->show_protocol) d->table = d->show_protocol->table;
2583
  if (!d->table) d->table = config->master_rtc->table;
2584

    
2585
  /* Filtered routes are neither exported nor have sensible ordering */
2586
  if (d->filtered && (d->export_mode || d->primary_only))
2587
    cli_msg(0, "");
2588

    
2589
  if (d->pxlen == 256)
2590
    {
2591
      FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2592
      this_cli->cont = rt_show_cont;
2593
      this_cli->cleanup = rt_show_cleanup;
2594
      this_cli->rover = d;
2595
    }
2596
  else
2597
    {
2598
      if (d->show_for)
2599
        n = net_route(d->table, d->prefix, d->pxlen);
2600
      else
2601
        n = net_find(d->table, d->prefix, d->pxlen);
2602

    
2603
      if (n)
2604
        rt_show_net(this_cli, n, d);
2605

    
2606
      if (d->rt_counter)
2607
        cli_msg(0, "");
2608
      else
2609
        cli_msg(8001, "Network not in table");
2610
    }
2611
}
2612

    
2613
/*
2614
 *  Documentation for functions declared inline in route.h
2615
 */
2616
#if 0
2617

2618
/**
2619
 * net_find - find a network entry
2620
 * @tab: a routing table
2621
 * @addr: address of the network
2622
 * @len: length of the network prefix
2623
 *
2624
 * net_find() looks up the given network in routing table @tab and
2625
 * returns a pointer to its &net entry or %NULL if no such network
2626
 * exists.
2627
 */
2628
static inline net *net_find(rtable *tab, ip_addr addr, unsigned len)
2629
{ DUMMY; }
2630

2631
/**
2632
 * net_get - obtain a network entry
2633
 * @tab: a routing table
2634
 * @addr: address of the network
2635
 * @len: length of the network prefix
2636
 *
2637
 * net_get() looks up the given network in routing table @tab and
2638
 * returns a pointer to its &net entry. If no such entry exists, it's
2639
 * created.
2640
 */
2641
static inline net *net_get(rtable *tab, ip_addr addr, unsigned len)
2642
{ DUMMY; }
2643

2644
/**
2645
 * rte_cow - copy a route for writing
2646
 * @r: a route entry to be copied
2647
 *
2648
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2649
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2650
 * just returned unchanged, else a new temporary entry with the same contents
2651
 * is created.
2652
 *
2653
 * The primary use of this function is inside the filter machinery -- when
2654
 * a filter wants to modify &rte contents (to change the preference or to
2655
 * attach another set of attributes), it must ensure that the &rte is not
2656
 * shared with anyone else (and especially that it isn't stored in any routing
2657
 * table).
2658
 *
2659
 * Result: a pointer to the new writable &rte.
2660
 */
2661
static inline rte * rte_cow(rte *r)
2662
{ DUMMY; }
2663

2664
#endif