Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ 9b9a7143

History | View | Annotate | Download (64.9 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/cli.h"
37
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
45

    
46
pool *rt_table_pool;
47

    
48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

    
51
static list routing_tables;
52

    
53
static void rt_format_via(rte *e, byte *via);
54
static void rt_free_hostcache(rtable *tab);
55
static void rt_notify_hostcache(rtable *tab, net *net);
56
static void rt_update_hostcache(rtable *tab);
57
static void rt_next_hop_update(rtable *tab);
58
static inline int rt_prune_table(rtable *tab);
59
static inline void rt_schedule_gc(rtable *tab);
60
static inline void rt_schedule_prune(rtable *tab);
61

    
62

    
63
static inline struct ea_list *
64
make_tmp_attrs(struct rte *rt, struct linpool *pool)
65
{
66
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
67
  mta = rt->attrs->src->proto->make_tmp_attrs;
68
  return mta ? mta(rt, rte_update_pool) : NULL;
69
}
70

    
71
/* Like fib_route(), but skips empty net entries */
72
static net *
73
net_route(rtable *tab, ip_addr a, int len)
74
{
75
  ip_addr a0;
76
  net *n;
77

    
78
  while (len >= 0)
79
    {
80
      a0 = ipa_and(a, ipa_mkmask(len));
81
      n = fib_find(&tab->fib, &a0, len);
82
      if (n && rte_is_valid(n->routes))
83
        return n;
84
      len--;
85
    }
86
  return NULL;
87
}
88

    
89
static void
90
rte_init(struct fib_node *N)
91
{
92
  net *n = (net *) N;
93

    
94
  N->flags = 0;
95
  n->routes = NULL;
96
}
97

    
98
/**
99
 * rte_find - find a route
100
 * @net: network node
101
 * @src: route source
102
 *
103
 * The rte_find() function returns a route for destination @net
104
 * which is from route source @src.
105
 */
106
rte *
107
rte_find(net *net, struct rte_src *src)
108
{
109
  rte *e = net->routes;
110

    
111
  while (e && e->attrs->src != src)
112
    e = e->next;
113
  return e;
114
}
115

    
116
/**
117
 * rte_get_temp - get a temporary &rte
118
 * @a: attributes to assign to the new route (a &rta; in case it's
119
 * un-cached, rte_update() will create a cached copy automatically)
120
 *
121
 * Create a temporary &rte and bind it with the attributes @a.
122
 * Also set route preference to the default preference set for
123
 * the protocol.
124
 */
125
rte *
126
rte_get_temp(rta *a)
127
{
128
  rte *e = sl_alloc(rte_slab);
129

    
130
  e->attrs = a;
131
  e->flags = 0;
132
  e->pref = a->src->proto->preference;
133
  return e;
134
}
135

    
136
rte *
137
rte_do_cow(rte *r)
138
{
139
  rte *e = sl_alloc(rte_slab);
140

    
141
  memcpy(e, r, sizeof(rte));
142
  e->attrs = rta_clone(r->attrs);
143
  e->flags = 0;
144
  return e;
145
}
146

    
147
/**
148
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
149
 * @r: a route entry to be copied
150
 * @lp: a linpool from which to allocate &rta
151
 *
152
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
153
 * modification. There are three possibilities: First, both &rte and &rta are
154
 * private copies, in that case they are returned unchanged.  Second, &rte is
155
 * private copy, but &rta is cached, in that case &rta is duplicated using
156
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
157
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
158
 *
159
 * Note that in the second case, cached &rta loses one reference, while private
160
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
161
 * nexthops, ...) with it. To work properly, original shared &rta should have
162
 * another reference during the life of created private copy.
163
 *
164
 * Result: a pointer to the new writable &rte with writable &rta.
165
 */
166
rte *
167
rte_cow_rta(rte *r, linpool *lp)
168
{
169
  if (!rta_is_cached(r->attrs))
170
    return r;
171

    
172
  rte *e = rte_cow(r);
173
  rta *a = rta_do_cow(r->attrs, lp);
174
  rta_free(e->attrs);
175
  e->attrs = a;
176
  return e;
177
}
178

    
179
static int                                /* Actually better or at least as good as */
180
rte_better(rte *new, rte *old)
181
{
182
  int (*better)(rte *, rte *);
183

    
184
  if (!rte_is_valid(old))
185
    return 1;
186
  if (!rte_is_valid(new))
187
    return 0;
188

    
189
  if (new->pref > old->pref)
190
    return 1;
191
  if (new->pref < old->pref)
192
    return 0;
193
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
194
    {
195
      /*
196
       *  If the user has configured protocol preferences, so that two different protocols
197
       *  have the same preference, try to break the tie by comparing addresses. Not too
198
       *  useful, but keeps the ordering of routes unambiguous.
199
       */
200
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
201
    }
202
  if (better = new->attrs->src->proto->rte_better)
203
    return better(new, old);
204
  return 0;
205
}
206

    
207
static int
208
rte_mergable(rte *pri, rte *sec)
209
{
210
  int (*mergable)(rte *, rte *);
211

    
212
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
213
    return 0;
214

    
215
  if (pri->pref != sec->pref)
216
    return 0;
217

    
218
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
219
    return 0;
220

    
221
  if (mergable = pri->attrs->src->proto->rte_mergable)
222
    return mergable(pri, sec);
223

    
224
  return 0;
225
}
226

    
227
static void
228
rte_trace(struct proto *p, rte *e, int dir, char *msg)
229
{
230
  byte via[STD_ADDRESS_P_LENGTH+32];
231

    
232
  rt_format_via(e, via);
233
  log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, via);
234
}
235

    
236
static inline void
237
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
238
{
239
  if (p->debug & flag)
240
    rte_trace(p, e, '>', msg);
241
}
242

    
243
static inline void
244
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
245
{
246
  if (p->debug & flag)
247
    rte_trace(p, e, '<', msg);
248
}
249

    
250
static rte *
251
export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
252
{
253
  struct proto *p = ah->proto;
254
  struct filter *filter = ah->out_filter;
255
  struct proto_stats *stats = ah->stats;
256
  ea_list *tmpb = NULL;
257
  rte *rt;
258
  int v;
259

    
260
  rt = rt0;
261
  *rt_free = NULL;
262

    
263
  if (!tmpa)
264
    tmpa = &tmpb;
265

    
266
  *tmpa = make_tmp_attrs(rt, rte_update_pool);
267

    
268
  v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
269
  if (v < 0)
270
    {
271
      if (silent)
272
        goto reject;
273

    
274
      stats->exp_updates_rejected++;
275
      if (v == RIC_REJECT)
276
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
277
      goto reject;
278
    }
279
  if (v > 0)
280
    {
281
      if (!silent)
282
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
283
      goto accept;
284
    }
285

    
286
  v = filter && ((filter == FILTER_REJECT) ||
287
                 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
288
  if (v)
289
    {
290
      if (silent)
291
        goto reject;
292

    
293
      stats->exp_updates_filtered++;
294
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
295
      goto reject;
296
    }
297

    
298
 accept:
299
  if (rt != rt0)
300
    *rt_free = rt;
301
  return rt;
302

    
303
 reject:
304
  /* Discard temporary rte */
305
  if (rt != rt0)
306
    rte_free(rt);
307
  return NULL;
308
}
309

    
310
static void
311
do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
312
{
313
  struct proto *p = ah->proto;
314
  struct proto_stats *stats = ah->stats;
315

    
316

    
317
  /*
318
   * First, apply export limit.
319
   *
320
   * Export route limits has several problems. Because exp_routes
321
   * counter is reset before refeed, we don't really know whether
322
   * limit is breached and whether the update is new or not. Therefore
323
   * the number of really exported routes may exceed the limit
324
   * temporarily (routes exported before and new routes in refeed).
325
   *
326
   * Minor advantage is that if the limit is decreased and refeed is
327
   * requested, the number of exported routes really decrease.
328
   *
329
   * Second problem is that with export limits, we don't know whether
330
   * old was really exported (it might be blocked by limit). When a
331
   * withdraw is exported, we announce it even when the previous
332
   * update was blocked. This is not a big issue, but the same problem
333
   * is in updating exp_routes counter. Therefore, to be consistent in
334
   * increases and decreases of exp_routes, we count exported routes
335
   * regardless of blocking by limits.
336
   *
337
   * Similar problem is in handling updates - when a new route is
338
   * received and blocking is active, the route would be blocked, but
339
   * when an update for the route will be received later, the update
340
   * would be propagated (as old != NULL). Therefore, we have to block
341
   * also non-new updates (contrary to import blocking).
342
   */
343

    
344
  struct proto_limit *l = ah->out_limit;
345
  if (l && new)
346
    {
347
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
348
        proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
349

    
350
      if (l->state == PLS_BLOCKED)
351
        {
352
          stats->exp_routes++;        /* see note above */
353
          stats->exp_updates_rejected++;
354
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
355
          new = NULL;
356

    
357
          if (!old)
358
            return;
359
        }
360
    }
361

    
362

    
363
  if (new)
364
    stats->exp_updates_accepted++;
365
  else
366
    stats->exp_withdraws_accepted++;
367

    
368
  /* Hack: We do not decrease exp_routes during refeed, we instead
369
     reset exp_routes at the start of refeed. */
370
  if (new)
371
    stats->exp_routes++;
372
  if (old && !refeed)
373
    stats->exp_routes--;
374

    
375
  if (p->debug & D_ROUTES)
376
    {
377
      if (new && old)
378
        rte_trace_out(D_ROUTES, p, new, "replaced");
379
      else if (new)
380
        rte_trace_out(D_ROUTES, p, new, "added");
381
      else if (old)
382
        rte_trace_out(D_ROUTES, p, old, "removed");
383
    }
384
  if (!new)
385
    p->rt_notify(p, ah->table, net, NULL, old, NULL);
386
  else if (tmpa)
387
    {
388
      ea_list *t = tmpa;
389
      while (t->next)
390
        t = t->next;
391
      t->next = new->attrs->eattrs;
392
      p->rt_notify(p, ah->table, net, new, old, tmpa);
393
      t->next = NULL;
394
    }
395
  else
396
    p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
397
}
398

    
399
static void
400
rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed)
401
{
402
  struct proto *p = ah->proto;
403
  struct proto_stats *stats = ah->stats;
404

    
405
  rte *new = new0;
406
  rte *old = old0;
407
  rte *new_free = NULL;
408
  rte *old_free = NULL;
409
  ea_list *tmpa = NULL;
410

    
411
  if (new)
412
    stats->exp_updates_received++;
413
  else
414
    stats->exp_withdraws_received++;
415

    
416
  /*
417
   * This is a tricky part - we don't know whether route 'old' was
418
   * exported to protocol 'p' or was filtered by the export filter.
419
   * We try to run the export filter to know this to have a correct
420
   * value in 'old' argument of rte_update (and proper filter value)
421
   *
422
   * FIXME - this is broken because 'configure soft' may change
423
   * filters but keep routes. Refeed is expected to be called after
424
   * change of the filters and with old == new, therefore we do not
425
   * even try to run the filter on an old route, This may lead to
426
   * 'spurious withdraws' but ensure that there are no 'missing
427
   * withdraws'.
428
   *
429
   * This is not completely safe as there is a window between
430
   * reconfiguration and the end of refeed - if a newly filtered
431
   * route disappears during this period, proper withdraw is not
432
   * sent (because old would be also filtered) and the route is
433
   * not refeeded (because it disappeared before that).
434
   */
435

    
436
  if (new)
437
    new = export_filter(ah, new, &new_free, &tmpa, 0);
438

    
439
  if (old && !refeed)
440
    old = export_filter(ah, old, &old_free, NULL, 1);
441

    
442
  if (!new && !old)
443
  {
444
    /*
445
     * As mentioned above, 'old' value may be incorrect in some race conditions.
446
     * We generally ignore it with the exception of withdraw to pipe protocol.
447
     * In that case we rather propagate unfiltered withdraws regardless of
448
     * export filters to ensure that when a protocol is flushed, its routes are
449
     * removed from all tables. Possible spurious unfiltered withdraws are not
450
     * problem here as they are ignored if there is no corresponding route at
451
     * the other end of the pipe. We directly call rt_notify() hook instead of
452
     * do_rt_notify() to avoid logging and stat counters.
453
     */
454

    
455
#ifdef CONFIG_PIPE
456
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
457
      p->rt_notify(p, ah->table, net, NULL, old0, NULL);
458
#endif
459

    
460
    return;
461
  }
462

    
463
  do_rt_notify(ah, net, new, old, tmpa, refeed);
464

    
465
  /* Discard temporary rte's */
466
  if (new_free)
467
    rte_free(new_free);
468
  if (old_free)
469
    rte_free(old_free);
470
}
471

    
472
static void
473
rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
474
{
475
  // struct proto *p = ah->proto;
476
  struct proto_stats *stats = ah->stats;
477

    
478
  rte *r;
479
  rte *new_best = NULL;
480
  rte *old_best = NULL;
481
  rte *new_free = NULL;
482
  rte *old_free = NULL;
483
  ea_list *tmpa = NULL;
484

    
485
  /* Used to track whether we met old_changed position. If before_old is NULL
486
     old_changed was the first and we met it implicitly before current best route. */
487
  int old_meet = old_changed && !before_old;
488

    
489
  /* Note that before_old is either NULL or valid (not rejected) route.
490
     If old_changed is valid, before_old have to be too. If old changed route
491
     was not valid, caller must use NULL for both old_changed and before_old. */
492

    
493
  if (new_changed)
494
    stats->exp_updates_received++;
495
  else
496
    stats->exp_withdraws_received++;
497

    
498
  /* First, find the new_best route - first accepted by filters */
499
  for (r=net->routes; rte_is_valid(r); r=r->next)
500
    {
501
      if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
502
        break;
503

    
504
      /* Note if we walked around the position of old_changed route */
505
      if (r == before_old)
506
        old_meet = 1;
507
    }
508

    
509
  /* 
510
   * Second, handle the feed case. That means we do not care for
511
   * old_best. It is NULL for feed, and the new_best for refeed. 
512
   * For refeed, there is a hack similar to one in rt_notify_basic()
513
   * to ensure withdraws in case of changed filters
514
   */
515
  if (feed)
516
    {
517
      if (feed == 2)        /* refeed */
518
        old_best = new_best ? new_best :
519
          (rte_is_valid(net->routes) ? net->routes : NULL);
520
      else
521
        old_best = NULL;
522

    
523
      if (!new_best && !old_best)
524
        return;
525

    
526
      goto found;
527
    }
528

    
529
  /*
530
   * Now, we find the old_best route. Generally, it is the same as the
531
   * new_best, unless new_best is the same as new_changed or
532
   * old_changed is accepted before new_best.
533
   *
534
   * There are four cases:
535
   *
536
   * - We would find and accept old_changed before new_best, therefore
537
   *   old_changed is old_best. In remaining cases we suppose this
538
   *   is not true.
539
   *
540
   * - We found no new_best, therefore there is also no old_best and
541
   *   we ignore this withdraw.
542
   *
543
   * - We found new_best different than new_changed, therefore
544
   *   old_best is the same as new_best and we ignore this update.
545
   *
546
   * - We found new_best the same as new_changed, therefore it cannot
547
   *   be old_best and we have to continue search for old_best.
548
   */
549

    
550
  /* First case */
551
  if (old_meet)
552
    if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
553
      goto found;
554

    
555
  /* Second case */
556
  if (!new_best)
557
    return;
558

    
559
  /* Third case, we use r instead of new_best, because export_filter() could change it */
560
  if (r != new_changed)
561
    {
562
      if (new_free)
563
        rte_free(new_free);
564
      return;
565
    }
566

    
567
  /* Fourth case */
568
  for (r=r->next; rte_is_valid(r); r=r->next)
569
    {
570
      if (old_best = export_filter(ah, r, &old_free, NULL, 1))
571
        goto found;
572

    
573
      if (r == before_old)
574
        if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
575
          goto found;
576
    }
577

    
578
  /* Implicitly, old_best is NULL and new_best is non-NULL */
579

    
580
 found:
581
  do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));
582

    
583
  /* Discard temporary rte's */
584
  if (new_free)
585
    rte_free(new_free);
586
  if (old_free)
587
    rte_free(old_free);
588
}
589

    
590

    
591
static struct mpnh *
592
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
593
{
594
  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
595
  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
596
  return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
597
}
598

    
599
rte *
600
rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
601
{
602
  // struct proto *p = ah->proto;
603
  struct mpnh *nhs = NULL;
604
  rte *best0, *best, *rt0, *rt, *tmp;
605

    
606
  best0 = net->routes;
607
  *rt_free = NULL;
608

    
609
  if (!rte_is_valid(best0))
610
    return NULL;
611

    
612
  best = export_filter(ah, best0, rt_free, tmpa, silent);
613

    
614
  if (!best || !rte_is_reachable(best))
615
    return best;
616

    
617
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
618
  {
619
    if (!rte_mergable(best0, rt0))
620
      continue;
621

    
622
    rt = export_filter(ah, rt0, &tmp, NULL, 1);
623

    
624
    if (!rt)
625
      continue;
626

    
627
    if (rte_is_reachable(rt))
628
      nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
629

    
630
    if (tmp)
631
      rte_free(tmp);
632
  }
633

    
634
  if (nhs)
635
  {
636
    nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
637

    
638
    if (nhs->next)
639
    {
640
      best = rte_cow_rta(best, rte_update_pool);
641
      best->attrs->dest = RTD_MULTIPATH;
642
      best->attrs->nexthops = nhs;
643
    }
644
  }
645

    
646
  if (best != best0)
647
    *rt_free = best;
648

    
649
  return best;
650
}
651

    
652

    
653
static void
654
rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
655
                 rte *new_best, rte*old_best, int refeed)
656
{
657
  // struct proto *p = ah->proto;
658

    
659
  rte *new_best_free = NULL;
660
  rte *old_best_free = NULL;
661
  rte *new_changed_free = NULL;
662
  rte *old_changed_free = NULL;
663
  ea_list *tmpa = NULL;
664

    
665
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
666

    
667
  /* This check should be done by the caller */
668
  if (!new_best && !old_best)
669
    return;
670

    
671
  /* Check whether the change is relevant to the merged route */
672
  if ((new_best == old_best) && !refeed)
673
  {
674
    new_changed = rte_mergable(new_best, new_changed) ?
675
      export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
676

    
677
    old_changed = rte_mergable(old_best, old_changed) ?
678
      export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
679

    
680
    if (!new_changed && !old_changed)
681
      return;
682
  }
683

    
684
  if (new_best)
685
    ah->stats->exp_updates_received++;
686
  else
687
    ah->stats->exp_withdraws_received++;
688

    
689
  /* Prepare new merged route */
690
  if (new_best)
691
    new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
692

    
693
  /* Prepare old merged route (without proper merged next hops) */
694
  /* There are some issues with running filter on old route - see rt_notify_basic() */
695
  if (old_best && !refeed)
696
    old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
697

    
698
  if (new_best || old_best)
699
    do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
700

    
701
  /* Discard temporary rte's */
702
  if (new_best_free)
703
    rte_free(new_best_free);
704
  if (old_best_free)
705
    rte_free(old_best_free);
706
  if (new_changed_free)
707
    rte_free(new_changed_free);
708
  if (old_changed_free)
709
    rte_free(old_changed_free);
710
}
711

    
712

    
713
/**
714
 * rte_announce - announce a routing table change
715
 * @tab: table the route has been added to
716
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
717
 * @net: network in question
718
 * @new: the new route to be announced
719
 * @old: the previous route for the same network
720
 *
721
 * This function gets a routing table update and announces it
722
 * to all protocols that acccepts given type of route announcement
723
 * and are connected to the same table by their announcement hooks.
724
 *
725
 * Route announcement of type RA_OPTIMAL si generated when optimal
726
 * route (in routing table @tab) changes. In that case @old stores the
727
 * old optimal route.
728
 *
729
 * Route announcement of type RA_ANY si generated when any route (in
730
 * routing table @tab) changes In that case @old stores the old route
731
 * from the same protocol.
732
 *
733
 * For each appropriate protocol, we first call its import_control()
734
 * hook which performs basic checks on the route (each protocol has a
735
 * right to veto or force accept of the route before any filter is
736
 * asked) and adds default values of attributes specific to the new
737
 * protocol (metrics, tags etc.).  Then it consults the protocol's
738
 * export filter and if it accepts the route, the rt_notify() hook of
739
 * the protocol gets called.
740
 */
741
static void
742
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
743
             rte *new_best, rte *old_best, rte *before_old)
744
{
745
  if (!rte_is_valid(new))
746
    new = NULL;
747

    
748
  if (!rte_is_valid(old))
749
    old = before_old = NULL;
750

    
751
  if (!rte_is_valid(new_best))
752
    new_best = NULL;
753

    
754
  if (!rte_is_valid(old_best))
755
    old_best = NULL;
756

    
757
  if (!old && !new)
758
    return;
759

    
760
  if (type == RA_OPTIMAL)
761
    {
762
      if (new)
763
        new->attrs->src->proto->stats.pref_routes++;
764
      if (old)
765
        old->attrs->src->proto->stats.pref_routes--;
766

    
767
      if (tab->hostcache)
768
        rt_notify_hostcache(tab, net);
769
    }
770

    
771
  struct announce_hook *a;
772
  WALK_LIST(a, tab->hooks)
773
    {
774
      ASSERT(a->proto->export_state != ES_DOWN);
775
      if (a->proto->accept_ra_types == type)
776
        if (type == RA_ACCEPTED)
777
          rt_notify_accepted(a, net, new, old, before_old, 0);
778
        else if (type == RA_MERGED)
779
          rt_notify_merged(a, net, new, old, new_best, old_best, 0);
780
        else
781
          rt_notify_basic(a, net, new, old, 0);
782
    }
783
}
784

    
785
static inline int
786
rte_validate(rte *e)
787
{
788
  int c;
789
  net *n = e->net;
790

    
791
  if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
792
    {
793
      log(L_WARN "Ignoring bogus prefix %I/%d received via %s",
794
          n->n.prefix, n->n.pxlen, e->sender->proto->name);
795
      return 0;
796
    }
797

    
798
  c = ipa_classify_net(n->n.prefix);
799
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
800
    {
801
      log(L_WARN "Ignoring bogus route %I/%d received via %s",
802
          n->n.prefix, n->n.pxlen, e->sender->proto->name);
803
      return 0;
804
    }
805

    
806
  return 1;
807
}
808

    
809
/**
810
 * rte_free - delete a &rte
811
 * @e: &rte to be deleted
812
 *
813
 * rte_free() deletes the given &rte from the routing table it's linked to.
814
 */
815
void
816
rte_free(rte *e)
817
{
818
  if (rta_is_cached(e->attrs))
819
    rta_free(e->attrs);
820
  sl_free(rte_slab, e);
821
}
822

    
823
static inline void
824
rte_free_quick(rte *e)
825
{
826
  rta_free(e->attrs);
827
  sl_free(rte_slab, e);
828
}
829

    
830
static int
831
rte_same(rte *x, rte *y)
832
{
833
  return
834
    x->attrs == y->attrs &&
835
    x->flags == y->flags &&
836
    x->pflags == y->pflags &&
837
    x->pref == y->pref &&
838
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
839
}
840

    
841
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
842

    
843
static void
844
rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
845
{
846
  struct proto *p = ah->proto;
847
  struct rtable *table = ah->table;
848
  struct proto_stats *stats = ah->stats;
849
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
850
  rte *before_old = NULL;
851
  rte *old_best = net->routes;
852
  rte *old = NULL;
853
  rte **k;
854

    
855
  k = &net->routes;                        /* Find and remove original route from the same protocol */
856
  while (old = *k)
857
    {
858
      if (old->attrs->src == src)
859
        {
860
          /* If there is the same route in the routing table but from
861
           * a different sender, then there are two paths from the
862
           * source protocol to this routing table through transparent
863
           * pipes, which is not allowed.
864
           *
865
           * We log that and ignore the route. If it is withdraw, we
866
           * ignore it completely (there might be 'spurious withdraws',
867
           * see FIXME in do_rte_announce())
868
           */
869
          if (old->sender->proto != p)
870
            {
871
              if (new)
872
                {
873
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %I/%d to table %s",
874
                      net->n.prefix, net->n.pxlen, table->name);
875
                  rte_free_quick(new);
876
                }
877
              return;
878
            }
879

    
880
          if (new && rte_same(old, new))
881
            {
882
              /* No changes, ignore the new route */
883

    
884
              if (!rte_is_filtered(new))
885
                {
886
                  stats->imp_updates_ignored++;
887
                  rte_trace_in(D_ROUTES, p, new, "ignored");
888
                }
889

    
890
              rte_free_quick(new);
891
#ifdef CONFIG_RIP
892
              /* lastmod is used internally by RIP as the last time
893
                 when the route was received. */
894
              if (src->proto->proto == &proto_rip)
895
                old->lastmod = now;
896
#endif
897
              return;
898
            }
899
          *k = old->next;
900
          break;
901
        }
902
      k = &old->next;
903
      before_old = old;
904
    }
905

    
906
  if (!old)
907
    before_old = NULL;
908

    
909
  if (!old && !new)
910
    {
911
      stats->imp_withdraws_ignored++;
912
      return;
913
    }
914

    
915
  int new_ok = rte_is_ok(new);
916
  int old_ok = rte_is_ok(old);
917

    
918
  struct proto_limit *l = ah->rx_limit;
919
  if (l && !old && new)
920
    {
921
      u32 all_routes = stats->imp_routes + stats->filt_routes;
922

    
923
      if (all_routes >= l->limit)
924
        proto_notify_limit(ah, l, PLD_RX, all_routes);
925

    
926
      if (l->state == PLS_BLOCKED)
927
        {
928
          /* In receive limit the situation is simple, old is NULL so
929
             we just free new and exit like nothing happened */
930

    
931
          stats->imp_updates_ignored++;
932
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
933
          rte_free_quick(new);
934
          return;
935
        }
936
    }
937

    
938
  l = ah->in_limit;
939
  if (l && !old_ok && new_ok)
940
    {
941
      if (stats->imp_routes >= l->limit)
942
        proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
943

    
944
      if (l->state == PLS_BLOCKED)
945
        {
946
          /* In import limit the situation is more complicated. We
947
             shouldn't just drop the route, we should handle it like
948
             it was filtered. We also have to continue the route
949
             processing if old or new is non-NULL, but we should exit
950
             if both are NULL as this case is probably assumed to be
951
             already handled. */
952

    
953
          stats->imp_updates_ignored++;
954
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
955

    
956
          if (ah->in_keep_filtered)
957
            new->flags |= REF_FILTERED;
958
          else
959
            { rte_free_quick(new); new = NULL; }
960

    
961
          /* Note that old && !new could be possible when
962
             ah->in_keep_filtered changed in the recent past. */
963

    
964
          if (!old && !new)
965
            return;
966

    
967
          new_ok = 0;
968
          goto skip_stats1;
969
        }
970
    }
971

    
972
  if (new_ok)
973
    stats->imp_updates_accepted++;
974
  else if (old_ok)
975
    stats->imp_withdraws_accepted++;
976
  else
977
    stats->imp_withdraws_ignored++;
978

    
979
 skip_stats1:
980

    
981
  if (new)
982
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
983
  if (old)
984
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
985

    
986
  if (table->config->sorted)
987
    {
988
      /* If routes are sorted, just insert new route to appropriate position */
989
      if (new)
990
        {
991
          if (before_old && !rte_better(new, before_old))
992
            k = &before_old->next;
993
          else
994
            k = &net->routes;
995

    
996
          for (; *k; k=&(*k)->next)
997
            if (rte_better(new, *k))
998
              break;
999

    
1000
          new->next = *k;
1001
          *k = new;
1002
        }
1003
    }
1004
  else
1005
    {
1006
      /* If routes are not sorted, find the best route and move it on
1007
         the first position. There are several optimized cases. */
1008

    
1009
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1010
        goto do_recalculate;
1011

    
1012
      if (new && rte_better(new, old_best))
1013
        {
1014
          /* The first case - the new route is cleary optimal,
1015
             we link it at the first position */
1016

    
1017
          new->next = net->routes;
1018
          net->routes = new;
1019
        }
1020
      else if (old == old_best)
1021
        {
1022
          /* The second case - the old best route disappeared, we add the
1023
             new route (if we have any) to the list (we don't care about
1024
             position) and then we elect the new optimal route and relink
1025
             that route at the first position and announce it. New optimal
1026
             route might be NULL if there is no more routes */
1027

    
1028
        do_recalculate:
1029
          /* Add the new route to the list */
1030
          if (new)
1031
            {
1032
              new->next = net->routes;
1033
              net->routes = new;
1034
            }
1035

    
1036
          /* Find a new optimal route (if there is any) */
1037
          if (net->routes)
1038
            {
1039
              rte **bp = &net->routes;
1040
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1041
                if (rte_better(*k, *bp))
1042
                  bp = k;
1043

    
1044
              /* And relink it */
1045
              rte *best = *bp;
1046
              *bp = best->next;
1047
              best->next = net->routes;
1048
              net->routes = best;
1049
            }
1050
        }
1051
      else if (new)
1052
        {
1053
          /* The third case - the new route is not better than the old
1054
             best route (therefore old_best != NULL) and the old best
1055
             route was not removed (therefore old_best == net->routes).
1056
             We just link the new route after the old best route. */
1057

    
1058
          ASSERT(net->routes != NULL);
1059
          new->next = net->routes->next;
1060
          net->routes->next = new;
1061
        }
1062
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1063
    }
1064

    
1065
  if (new)
1066
    new->lastmod = now;
1067

    
1068
  /* Log the route change */
1069
  if (p->debug & D_ROUTES)
1070
    {
1071
      if (new_ok)
1072
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1073
      else if (old_ok)
1074
        {
1075
          if (old != old_best)
1076
            rte_trace(p, old, '>', "removed");
1077
          else if (rte_is_ok(net->routes))
1078
            rte_trace(p, old, '>', "removed [replaced]");
1079
          else
1080
            rte_trace(p, old, '>', "removed [sole]");
1081
        }
1082
    }
1083

    
1084
  /* Propagate the route change */
1085
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1086
  if (net->routes != old_best)
1087
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1088
  if (table->config->sorted)
1089
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1090
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1091

    
1092
  if (!net->routes &&
1093
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1094
      (table->gc_time + table->config->gc_min_time <= now))
1095
    rt_schedule_gc(table);
1096

    
1097
  if (old_ok && p->rte_remove)
1098
    p->rte_remove(net, old);
1099
  if (new_ok && p->rte_insert)
1100
    p->rte_insert(net, new);
1101

    
1102
  if (old)
1103
    rte_free_quick(old);
1104
}
1105

    
1106
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1107

    
1108
static inline void
1109
rte_update_lock(void)
1110
{
1111
  rte_update_nest_cnt++;
1112
}
1113

    
1114
static inline void
1115
rte_update_unlock(void)
1116
{
1117
  if (!--rte_update_nest_cnt)
1118
    lp_flush(rte_update_pool);
1119
}
1120

    
1121
static inline void
1122
rte_hide_dummy_routes(net *net, rte **dummy)
1123
{
1124
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1125
  {
1126
    *dummy = net->routes;
1127
    net->routes = (*dummy)->next;
1128
  }
1129
}
1130

    
1131
static inline void
1132
rte_unhide_dummy_routes(net *net, rte **dummy)
1133
{
1134
  if (*dummy)
1135
  {
1136
    (*dummy)->next = net->routes;
1137
    net->routes = *dummy;
1138
  }
1139
}
1140

    
1141
/**
1142
 * rte_update - enter a new update to a routing table
1143
 * @table: table to be updated
1144
 * @ah: pointer to table announce hook
1145
 * @net: network node
1146
 * @p: protocol submitting the update
1147
 * @src: protocol originating the update
1148
 * @new: a &rte representing the new route or %NULL for route removal.
1149
 *
1150
 * This function is called by the routing protocols whenever they discover
1151
 * a new route or wish to update/remove an existing route. The right announcement
1152
 * sequence is to build route attributes first (either un-cached with @aflags set
1153
 * to zero or a cached one using rta_lookup(); in this case please note that
1154
 * you need to increase the use count of the attributes yourself by calling
1155
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1156
 * the appropriate data and finally submit the new &rte by calling rte_update().
1157
 *
1158
 * @src specifies the protocol that originally created the route and the meaning
1159
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1160
 * same value as @new->attrs->proto. @p specifies the protocol that called
1161
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1162
 * stores @p in @new->sender;
1163
 *
1164
 * When rte_update() gets any route, it automatically validates it (checks,
1165
 * whether the network and next hop address are valid IP addresses and also
1166
 * whether a normal routing protocol doesn't try to smuggle a host or link
1167
 * scope route to the table), converts all protocol dependent attributes stored
1168
 * in the &rte to temporary extended attributes, consults import filters of the
1169
 * protocol to see if the route should be accepted and/or its attributes modified,
1170
 * stores the temporary attributes back to the &rte.
1171
 *
1172
 * Now, having a "public" version of the route, we
1173
 * automatically find any old route defined by the protocol @src
1174
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1175
 * recalculate the optimal route for this destination and finally broadcast
1176
 * the change (if any) to all routing protocols by calling rte_announce().
1177
 *
1178
 * All memory used for attribute lists and other temporary allocations is taken
1179
 * from a special linear pool @rte_update_pool and freed when rte_update()
1180
 * finishes.
1181
 */
1182

    
1183
void
1184
rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
1185
{
1186
  struct proto *p = ah->proto;
1187
  struct proto_stats *stats = ah->stats;
1188
  struct filter *filter = ah->in_filter;
1189
  ea_list *tmpa = NULL;
1190
  rte *dummy = NULL;
1191

    
1192
  rte_update_lock();
1193
  if (new)
1194
    {
1195
      new->sender = ah;
1196

    
1197
      stats->imp_updates_received++;
1198
      if (!rte_validate(new))
1199
        {
1200
          rte_trace_in(D_FILTERS, p, new, "invalid");
1201
          stats->imp_updates_invalid++;
1202
          goto drop;
1203
        }
1204

    
1205
      if (filter == FILTER_REJECT)
1206
        {
1207
          stats->imp_updates_filtered++;
1208
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1209

    
1210
          if (! ah->in_keep_filtered)
1211
            goto drop;
1212

    
1213
          /* new is a private copy, i could modify it */
1214
          new->flags |= REF_FILTERED;
1215
        }
1216
      else
1217
        {
1218
          tmpa = make_tmp_attrs(new, rte_update_pool);
1219
          if (filter && (filter != FILTER_REJECT))
1220
            {
1221
              ea_list *old_tmpa = tmpa;
1222
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1223
              if (fr > F_ACCEPT)
1224
                {
1225
                  stats->imp_updates_filtered++;
1226
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1227

    
1228
                  if (! ah->in_keep_filtered)
1229
                    goto drop;
1230

    
1231
                  new->flags |= REF_FILTERED;
1232
                }
1233
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1234
                src->proto->store_tmp_attrs(new, tmpa);
1235
            }
1236
        }
1237
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1238
        new->attrs = rta_lookup(new->attrs);
1239
      new->flags |= REF_COW;
1240
    }
1241
  else
1242
    {
1243
      stats->imp_withdraws_received++;
1244

    
1245
      if (!net || !src)
1246
        {
1247
          stats->imp_withdraws_ignored++;
1248
          rte_update_unlock();
1249
          return;
1250
        }
1251
    }
1252

    
1253
 recalc:
1254
  rte_hide_dummy_routes(net, &dummy);
1255
  rte_recalculate(ah, net, new, src);
1256
  rte_unhide_dummy_routes(net, &dummy);
1257
  rte_update_unlock();
1258
  return;
1259

    
1260
 drop:
1261
  rte_free(new);
1262
  new = NULL;
1263
  goto recalc;
1264
}
1265

    
1266
/* Independent call to rte_announce(), used from next hop
1267
   recalculation, outside of rte_update(). new must be non-NULL */
1268
static inline void 
1269
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1270
               rte *new_best, rte *old_best)
1271
{
1272
  rte_update_lock();
1273
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1274
  rte_update_unlock();
1275
}
1276

    
1277
void
1278
rte_discard(rtable *t, rte *old)        /* Non-filtered route deletion, used during garbage collection */
1279
{
1280
  rte_update_lock();
1281
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1282
  rte_update_unlock();
1283
}
1284

    
1285
/* Check rtable for best route to given net whether it would be exported do p */
1286
int
1287
rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter)
1288
{
1289
  net *n = net_find(t, prefix, pxlen);
1290
  rte *rt = n ? n->routes : NULL;
1291

    
1292
  if (!rte_is_valid(rt))
1293
    return 0;
1294

    
1295
  rte_update_lock();
1296

    
1297
  /* Rest is stripped down export_filter() */
1298
  ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1299
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1300
  if (v == RIC_PROCESS)
1301
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1302

    
1303
   /* Discard temporary rte */
1304
  if (rt != n->routes)
1305
    rte_free(rt);
1306

    
1307
  rte_update_unlock();
1308

    
1309
  return v > 0;
1310
}
1311

    
1312

    
1313
/**
1314
 * rt_refresh_begin - start a refresh cycle
1315
 * @t: related routing table
1316
 * @ah: related announce hook 
1317
 *
1318
 * This function starts a refresh cycle for given routing table and announce
1319
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1320
 * routes to the routing table (by rte_update()). After that, all protocol
1321
 * routes (more precisely routes with @ah as @sender) not sent during the
1322
 * refresh cycle but still in the table from the past are pruned. This is
1323
 * implemented by marking all related routes as stale by REF_STALE flag in
1324
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1325
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1326
 */
1327
void
1328
rt_refresh_begin(rtable *t, struct announce_hook *ah)
1329
{
1330
  net *n;
1331
  rte *e;
1332

    
1333
  FIB_WALK(&t->fib, fn)
1334
    {
1335
      n = (net *) fn;
1336
      for (e = n->routes; e; e = e->next)
1337
        if (e->sender == ah)
1338
          e->flags |= REF_STALE;
1339
    }
1340
  FIB_WALK_END;
1341
}
1342

    
1343
/**
1344
 * rt_refresh_end - end a refresh cycle
1345
 * @t: related routing table
1346
 * @ah: related announce hook 
1347
 *
1348
 * This function starts a refresh cycle for given routing table and announce
1349
 * hook. See rt_refresh_begin() for description of refresh cycles.
1350
 */
1351
void
1352
rt_refresh_end(rtable *t, struct announce_hook *ah)
1353
{
1354
  int prune = 0;
1355
  net *n;
1356
  rte *e;
1357

    
1358
  FIB_WALK(&t->fib, fn)
1359
    {
1360
      n = (net *) fn;
1361
      for (e = n->routes; e; e = e->next)
1362
        if ((e->sender == ah) && (e->flags & REF_STALE))
1363
          {
1364
            e->flags |= REF_DISCARD;
1365
            prune = 1;
1366
          }
1367
    }
1368
  FIB_WALK_END;
1369

    
1370
  if (prune)
1371
    rt_schedule_prune(t);
1372
}
1373

    
1374

    
1375
/**
1376
 * rte_dump - dump a route
1377
 * @e: &rte to be dumped
1378
 *
1379
 * This functions dumps contents of a &rte to debug output.
1380
 */
1381
void
1382
rte_dump(rte *e)
1383
{
1384
  net *n = e->net;
1385
  debug("%-1I/%2d ", n->n.prefix, n->n.pxlen);
1386
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1387
  rta_dump(e->attrs);
1388
  if (e->attrs->src->proto->proto->dump_attrs)
1389
    e->attrs->src->proto->proto->dump_attrs(e);
1390
  debug("\n");
1391
}
1392

    
1393
/**
1394
 * rt_dump - dump a routing table
1395
 * @t: routing table to be dumped
1396
 *
1397
 * This function dumps contents of a given routing table to debug output.
1398
 */
1399
void
1400
rt_dump(rtable *t)
1401
{
1402
  rte *e;
1403
  net *n;
1404
  struct announce_hook *a;
1405

    
1406
  debug("Dump of routing table <%s>\n", t->name);
1407
#ifdef DEBUGGING
1408
  fib_check(&t->fib);
1409
#endif
1410
  FIB_WALK(&t->fib, fn)
1411
    {
1412
      n = (net *) fn;
1413
      for(e=n->routes; e; e=e->next)
1414
        rte_dump(e);
1415
    }
1416
  FIB_WALK_END;
1417
  WALK_LIST(a, t->hooks)
1418
    debug("\tAnnounces routes to protocol %s\n", a->proto->name);
1419
  debug("\n");
1420
}
1421

    
1422
/**
1423
 * rt_dump_all - dump all routing tables
1424
 *
1425
 * This function dumps contents of all routing tables to debug output.
1426
 */
1427
void
1428
rt_dump_all(void)
1429
{
1430
  rtable *t;
1431

    
1432
  WALK_LIST(t, routing_tables)
1433
    rt_dump(t);
1434
}
1435

    
1436
static inline void
1437
rt_schedule_prune(rtable *tab)
1438
{
1439
  rt_mark_for_prune(tab);
1440
  ev_schedule(tab->rt_event);
1441
}
1442

    
1443
static inline void
1444
rt_schedule_gc(rtable *tab)
1445
{
1446
  if (tab->gc_scheduled)
1447
    return;
1448

    
1449
  tab->gc_scheduled = 1;
1450
  ev_schedule(tab->rt_event);
1451
}
1452

    
1453
static inline void
1454
rt_schedule_hcu(rtable *tab)
1455
{
1456
  if (tab->hcu_scheduled)
1457
    return;
1458

    
1459
  tab->hcu_scheduled = 1;
1460
  ev_schedule(tab->rt_event);
1461
}
1462

    
1463
static inline void
1464
rt_schedule_nhu(rtable *tab)
1465
{
1466
  if (tab->nhu_state == 0)
1467
    ev_schedule(tab->rt_event);
1468

    
1469
  /* state change 0->1, 2->3 */
1470
  tab->nhu_state |= 1;
1471
}
1472

    
1473

    
1474
static void
1475
rt_prune_nets(rtable *tab)
1476
{
1477
  struct fib_iterator fit;
1478
  int ncnt = 0, ndel = 0;
1479

    
1480
#ifdef DEBUGGING
1481
  fib_check(&tab->fib);
1482
#endif
1483

    
1484
  FIB_ITERATE_INIT(&fit, &tab->fib);
1485
again:
1486
  FIB_ITERATE_START(&tab->fib, &fit, f)
1487
    {
1488
      net *n = (net *) f;
1489
      ncnt++;
1490
      if (!n->routes)                /* Orphaned FIB entry */
1491
        {
1492
          FIB_ITERATE_PUT(&fit, f);
1493
          fib_delete(&tab->fib, f);
1494
          ndel++;
1495
          goto again;
1496
        }
1497
    }
1498
  FIB_ITERATE_END(f);
1499
  DBG("Pruned %d of %d networks\n", ndel, ncnt);
1500

    
1501
  tab->gc_counter = 0;
1502
  tab->gc_time = now;
1503
  tab->gc_scheduled = 0;
1504
}
1505

    
1506
static void
1507
rt_event(void *ptr)
1508
{
1509
  rtable *tab = ptr;
1510

    
1511
  if (tab->hcu_scheduled)
1512
    rt_update_hostcache(tab);
1513

    
1514
  if (tab->nhu_state)
1515
    rt_next_hop_update(tab);
1516

    
1517
  if (tab->prune_state)
1518
    if (!rt_prune_table(tab))
1519
      {
1520
        /* Table prune unfinished */
1521
        ev_schedule(tab->rt_event);
1522
        return;
1523
      }
1524

    
1525
  if (tab->gc_scheduled)
1526
    {
1527
      rt_prune_nets(tab);
1528
      rt_prune_sources(); // FIXME this should be moved to independent event
1529
    }
1530
}
1531

    
1532
void
1533
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1534
{
1535
  bzero(t, sizeof(*t));
1536
  fib_init(&t->fib, p, sizeof(net), 0, rte_init);
1537
  t->name = name;
1538
  t->config = cf;
1539
  init_list(&t->hooks);
1540
  if (cf)
1541
    {
1542
      t->rt_event = ev_new(p);
1543
      t->rt_event->hook = rt_event;
1544
      t->rt_event->data = t;
1545
      t->gc_time = now;
1546
    }
1547
}
1548

    
1549
/**
1550
 * rt_init - initialize routing tables
1551
 *
1552
 * This function is called during BIRD startup. It initializes the
1553
 * routing table module.
1554
 */
1555
void
1556
rt_init(void)
1557
{
1558
  rta_init();
1559
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1560
  rte_update_pool = lp_new(rt_table_pool, 4080);
1561
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1562
  init_list(&routing_tables);
1563
}
1564

    
1565

    
1566
static int
1567
rt_prune_step(rtable *tab, int *limit)
1568
{
1569
  struct fib_iterator *fit = &tab->prune_fit;
1570

    
1571
  DBG("Pruning route table %s\n", tab->name);
1572
#ifdef DEBUGGING
1573
  fib_check(&tab->fib);
1574
#endif
1575

    
1576
  if (tab->prune_state == RPS_NONE)
1577
    return 1;
1578

    
1579
  if (tab->prune_state == RPS_SCHEDULED)
1580
    {
1581
      FIB_ITERATE_INIT(fit, &tab->fib);
1582
      tab->prune_state = RPS_RUNNING;
1583
    }
1584

    
1585
again:
1586
  FIB_ITERATE_START(&tab->fib, fit, fn)
1587
    {
1588
      net *n = (net *) fn;
1589
      rte *e;
1590

    
1591
    rescan:
1592
      for (e=n->routes; e; e=e->next)
1593
        if (e->sender->proto->flushing || (e->flags & REF_DISCARD))
1594
          {
1595
            if (*limit <= 0)
1596
              {
1597
                FIB_ITERATE_PUT(fit, fn);
1598
                return 0;
1599
              }
1600

    
1601
            rte_discard(tab, e);
1602
            (*limit)--;
1603

    
1604
            goto rescan;
1605
          }
1606
      if (!n->routes)                /* Orphaned FIB entry */
1607
        {
1608
          FIB_ITERATE_PUT(fit, fn);
1609
          fib_delete(&tab->fib, fn);
1610
          goto again;
1611
        }
1612
    }
1613
  FIB_ITERATE_END(fn);
1614

    
1615
#ifdef DEBUGGING
1616
  fib_check(&tab->fib);
1617
#endif
1618

    
1619
  tab->prune_state = RPS_NONE;
1620
  return 1;
1621
}
1622

    
1623
/**
1624
 * rt_prune_table - prune a routing table
1625
 *
1626
 * This function scans the routing table @tab and removes routes belonging to
1627
 * flushing protocols, discarded routes and also stale network entries, in a
1628
 * similar fashion like rt_prune_loop(). Returns 1 when all such routes are
1629
 * pruned. Contrary to rt_prune_loop(), this function is not a part of the
1630
 * protocol flushing loop, but it is called from rt_event() for just one routing
1631
 * table.
1632
 *
1633
 * Note that rt_prune_table() and rt_prune_loop() share (for each table) the
1634
 * prune state (@prune_state) and also the pruning iterator (@prune_fit).
1635
 */
1636
static inline int
1637
rt_prune_table(rtable *tab)
1638
{
1639
  int limit = 512;
1640
  return rt_prune_step(tab, &limit);
1641
}
1642

    
1643
/**
1644
 * rt_prune_loop - prune routing tables
1645
 *
1646
 * The prune loop scans routing tables and removes routes belonging to flushing
1647
 * protocols, discarded routes and also stale network entries. Returns 1 when
1648
 * all such routes are pruned. It is a part of the protocol flushing loop.
1649
 */
1650
int
1651
rt_prune_loop(void)
1652
{
1653
  int limit = 512;
1654
  rtable *t;
1655

    
1656
  WALK_LIST(t, routing_tables)
1657
    if (! rt_prune_step(t, &limit))
1658
      return 0;
1659

    
1660
  return 1;
1661
}
1662

    
1663
void
1664
rt_preconfig(struct config *c)
1665
{
1666
  struct symbol *s = cf_get_symbol("master");
1667

    
1668
  init_list(&c->tables);
1669
  c->master_rtc = rt_new_table(s);
1670
}
1671

    
1672

    
1673
/* 
1674
 * Some functions for handing internal next hop updates
1675
 * triggered by rt_schedule_nhu().
1676
 */
1677

    
1678
static inline int
1679
rta_next_hop_outdated(rta *a)
1680
{
1681
  struct hostentry *he = a->hostentry;
1682

    
1683
  if (!he)
1684
    return 0;
1685

    
1686
  if (!he->src)
1687
    return a->dest != RTD_UNREACHABLE;
1688

    
1689
  return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1690
    (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1691
    !mpnh_same(a->nexthops, he->src->nexthops);
1692
}
1693

    
1694
static inline void
1695
rta_apply_hostentry(rta *a, struct hostentry *he)
1696
{
1697
  a->hostentry = he;
1698
  a->iface = he->src ? he->src->iface : NULL;
1699
  a->gw = he->gw;
1700
  a->dest = he->dest;
1701
  a->igp_metric = he->igp_metric;
1702
  a->nexthops = he->src ? he->src->nexthops : NULL;
1703
}
1704

    
1705
static inline rte *
1706
rt_next_hop_update_rte(rtable *tab, rte *old)
1707
{
1708
  rta a;
1709
  memcpy(&a, old->attrs, sizeof(rta));
1710
  rta_apply_hostentry(&a, old->attrs->hostentry);
1711
  a.aflags = 0;
1712

    
1713
  rte *e = sl_alloc(rte_slab);
1714
  memcpy(e, old, sizeof(rte));
1715
  e->attrs = rta_lookup(&a);
1716

    
1717
  return e;
1718
}
1719

    
1720
static inline int
1721
rt_next_hop_update_net(rtable *tab, net *n)
1722
{
1723
  rte **k, *e, *new, *old_best, **new_best;
1724
  int count = 0;
1725
  int free_old_best = 0;
1726

    
1727
  old_best = n->routes;
1728
  if (!old_best)
1729
    return 0;
1730

    
1731
  for (k = &n->routes; e = *k; k = &e->next)
1732
    if (rta_next_hop_outdated(e->attrs))
1733
      {
1734
        new = rt_next_hop_update_rte(tab, e);
1735
        *k = new;
1736

    
1737
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1738
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1739

    
1740
        /* Call a pre-comparison hook */
1741
        /* Not really an efficient way to compute this */
1742
        if (e->attrs->src->proto->rte_recalculate)
1743
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1744

    
1745
        if (e != old_best)
1746
          rte_free_quick(e);
1747
        else /* Freeing of the old best rte is postponed */
1748
          free_old_best = 1;
1749

    
1750
        e = new;
1751
        count++;
1752
      }
1753

    
1754
  if (!count)
1755
    return 0;
1756

    
1757
  /* Find the new best route */
1758
  new_best = NULL;
1759
  for (k = &n->routes; e = *k; k = &e->next)
1760
    {
1761
      if (!new_best || rte_better(e, *new_best))
1762
        new_best = k;
1763
    }
1764

    
1765
  /* Relink the new best route to the first position */
1766
  new = *new_best;
1767
  if (new != n->routes)
1768
    {
1769
      *new_best = new->next;
1770
      new->next = n->routes;
1771
      n->routes = new;
1772
    }
1773

    
1774
  /* Announce the new best route */
1775
  if (new != old_best)
1776
    {
1777
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1778
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1779
    }
1780

    
1781
  /* FIXME: Better announcement of merged routes */
1782
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1783

    
1784
   if (free_old_best)
1785
    rte_free_quick(old_best);
1786

    
1787
  return count;
1788
}
1789

    
1790
static void
1791
rt_next_hop_update(rtable *tab)
1792
{
1793
  struct fib_iterator *fit = &tab->nhu_fit;
1794
  int max_feed = 32;
1795

    
1796
  if (tab->nhu_state == 0)
1797
    return;
1798

    
1799
  if (tab->nhu_state == 1)
1800
    {
1801
      FIB_ITERATE_INIT(fit, &tab->fib);
1802
      tab->nhu_state = 2;
1803
    }
1804

    
1805
  FIB_ITERATE_START(&tab->fib, fit, fn)
1806
    {
1807
      if (max_feed <= 0)
1808
        {
1809
          FIB_ITERATE_PUT(fit, fn);
1810
          ev_schedule(tab->rt_event);
1811
          return;
1812
        }
1813
      max_feed -= rt_next_hop_update_net(tab, (net *) fn);
1814
    }
1815
  FIB_ITERATE_END(fn);
1816

    
1817
  /* state change 2->0, 3->1 */
1818
  tab->nhu_state &= 1;
1819

    
1820
  if (tab->nhu_state > 0)
1821
    ev_schedule(tab->rt_event);
1822
}
1823

    
1824

    
1825
struct rtable_config *
1826
rt_new_table(struct symbol *s)
1827
{
1828
  /* Hack that allows to 'redefine' the master table */
1829
  if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc))
1830
    return s->def;
1831

    
1832
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1833

    
1834
  cf_define_symbol(s, SYM_TABLE, c);
1835
  c->name = s->name;
1836
  add_tail(&new_config->tables, &c->n);
1837
  c->gc_max_ops = 1000;
1838
  c->gc_min_time = 5;
1839
  return c;
1840
}
1841

    
1842
/**
1843
 * rt_lock_table - lock a routing table
1844
 * @r: routing table to be locked
1845
 *
1846
 * Lock a routing table, because it's in use by a protocol,
1847
 * preventing it from being freed when it gets undefined in a new
1848
 * configuration.
1849
 */
1850
void
1851
rt_lock_table(rtable *r)
1852
{
1853
  r->use_count++;
1854
}
1855

    
1856
/**
1857
 * rt_unlock_table - unlock a routing table
1858
 * @r: routing table to be unlocked
1859
 *
1860
 * Unlock a routing table formerly locked by rt_lock_table(),
1861
 * that is decrease its use count and delete it if it's scheduled
1862
 * for deletion by configuration changes.
1863
 */
1864
void
1865
rt_unlock_table(rtable *r)
1866
{
1867
  if (!--r->use_count && r->deleted)
1868
    {
1869
      struct config *conf = r->deleted;
1870
      DBG("Deleting routing table %s\n", r->name);
1871
      if (r->hostcache)
1872
        rt_free_hostcache(r);
1873
      rem_node(&r->n);
1874
      fib_free(&r->fib);
1875
      rfree(r->rt_event);
1876
      mb_free(r);
1877
      config_del_obstacle(conf);
1878
    }
1879
}
1880

    
1881
/**
1882
 * rt_commit - commit new routing table configuration
1883
 * @new: new configuration
1884
 * @old: original configuration or %NULL if it's boot time config
1885
 *
1886
 * Scan differences between @old and @new configuration and modify
1887
 * the routing tables according to these changes. If @new defines a
1888
 * previously unknown table, create it, if it omits a table existing
1889
 * in @old, schedule it for deletion (it gets deleted when all protocols
1890
 * disconnect from it by calling rt_unlock_table()), if it exists
1891
 * in both configurations, leave it unchanged.
1892
 */
1893
void
1894
rt_commit(struct config *new, struct config *old)
1895
{
1896
  struct rtable_config *o, *r;
1897

    
1898
  DBG("rt_commit:\n");
1899
  if (old)
1900
    {
1901
      WALK_LIST(o, old->tables)
1902
        {
1903
          rtable *ot = o->table;
1904
          if (!ot->deleted)
1905
            {
1906
              struct symbol *sym = cf_find_symbol(new, o->name);
1907
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
1908
                {
1909
                  DBG("\t%s: same\n", o->name);
1910
                  r = sym->def;
1911
                  r->table = ot;
1912
                  ot->name = r->name;
1913
                  ot->config = r;
1914
                  if (o->sorted != r->sorted)
1915
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1916
                }
1917
              else
1918
                {
1919
                  DBG("\t%s: deleted\n", o->name);
1920
                  ot->deleted = old;
1921
                  config_add_obstacle(old);
1922
                  rt_lock_table(ot);
1923
                  rt_unlock_table(ot);
1924
                }
1925
            }
1926
        }
1927
    }
1928

    
1929
  WALK_LIST(r, new->tables)
1930
    if (!r->table)
1931
      {
1932
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1933
        DBG("\t%s: created\n", r->name);
1934
        rt_setup(rt_table_pool, t, r->name, r);
1935
        add_tail(&routing_tables, &t->n);
1936
        r->table = t;
1937
      }
1938
  DBG("\tdone\n");
1939
}
1940

    
1941
static inline void
1942
do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
1943
{
1944
  rte_update_lock();
1945
  if (type == RA_ACCEPTED)
1946
    rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
1947
  else if (type == RA_MERGED)
1948
    rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
1949
  else
1950
    rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
1951
  rte_update_unlock();
1952
}
1953

    
1954
/**
1955
 * rt_feed_baby - advertise routes to a new protocol
1956
 * @p: protocol to be fed
1957
 *
1958
 * This function performs one pass of advertisement of routes to a newly
1959
 * initialized protocol. It's called by the protocol code as long as it
1960
 * has something to do. (We avoid transferring all the routes in single
1961
 * pass in order not to monopolize CPU time.)
1962
 */
1963
int
1964
rt_feed_baby(struct proto *p)
1965
{
1966
  struct announce_hook *h;
1967
  struct fib_iterator *fit;
1968
  int max_feed = 256;
1969

    
1970
  if (!p->feed_ahook)                        /* Need to initialize first */
1971
    {
1972
      if (!p->ahooks)
1973
        return 1;
1974
      DBG("Announcing routes to new protocol %s\n", p->name);
1975
      p->feed_ahook = p->ahooks;
1976
      fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator));
1977
      goto next_hook;
1978
    }
1979
  fit = p->feed_iterator;
1980

    
1981
again:
1982
  h = p->feed_ahook;
1983
  FIB_ITERATE_START(&h->table->fib, fit, fn)
1984
    {
1985
      net *n = (net *) fn;
1986
      rte *e = n->routes;
1987
      if (max_feed <= 0)
1988
        {
1989
          FIB_ITERATE_PUT(fit, fn);
1990
          return 0;
1991
        }
1992

    
1993
      /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
1994

    
1995
      if ((p->accept_ra_types == RA_OPTIMAL) ||
1996
          (p->accept_ra_types == RA_ACCEPTED) ||
1997
          (p->accept_ra_types == RA_MERGED))
1998
        if (rte_is_valid(e))
1999
          {
2000
            if (p->export_state != ES_FEEDING)
2001
              return 1;  /* In the meantime, the protocol fell down. */
2002

    
2003
            do_feed_baby(p, p->accept_ra_types, h, n, e);
2004
            max_feed--;
2005
          }
2006

    
2007
      if (p->accept_ra_types == RA_ANY)
2008
        for(e = n->routes; e; e = e->next)
2009
          {
2010
            if (p->export_state != ES_FEEDING)
2011
              return 1;  /* In the meantime, the protocol fell down. */
2012

    
2013
            if (!rte_is_valid(e))
2014
              continue;
2015

    
2016
            do_feed_baby(p, RA_ANY, h, n, e);
2017
            max_feed--;
2018
          }
2019
    }
2020
  FIB_ITERATE_END(fn);
2021
  p->feed_ahook = h->next;
2022
  if (!p->feed_ahook)
2023
    {
2024
      mb_free(p->feed_iterator);
2025
      p->feed_iterator = NULL;
2026
      return 1;
2027
    }
2028

    
2029
next_hook:
2030
  h = p->feed_ahook;
2031
  FIB_ITERATE_INIT(fit, &h->table->fib);
2032
  goto again;
2033
}
2034

    
2035
/**
2036
 * rt_feed_baby_abort - abort protocol feeding
2037
 * @p: protocol
2038
 *
2039
 * This function is called by the protocol code when the protocol
2040
 * stops or ceases to exist before the last iteration of rt_feed_baby()
2041
 * has finished.
2042
 */
2043
void
2044
rt_feed_baby_abort(struct proto *p)
2045
{
2046
  if (p->feed_ahook)
2047
    {
2048
      /* Unlink the iterator and exit */
2049
      fit_get(&p->feed_ahook->table->fib, p->feed_iterator);
2050
      p->feed_ahook = NULL;
2051
    }
2052
}
2053

    
2054

    
2055
static inline unsigned
2056
ptr_hash(void *ptr)
2057
{
2058
  uintptr_t p = (uintptr_t) ptr;
2059
  return p ^ (p << 8) ^ (p >> 16);
2060
}
2061

    
2062
static inline unsigned
2063
hc_hash(ip_addr a, rtable *dep)
2064
{
2065
  return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff;
2066
}
2067

    
2068
static inline void
2069
hc_insert(struct hostcache *hc, struct hostentry *he)
2070
{
2071
  uint k = he->hash_key >> hc->hash_shift;
2072
  he->next = hc->hash_table[k];
2073
  hc->hash_table[k] = he;
2074
}
2075

    
2076
static inline void
2077
hc_remove(struct hostcache *hc, struct hostentry *he)
2078
{
2079
  struct hostentry **hep;
2080
  uint k = he->hash_key >> hc->hash_shift;
2081

    
2082
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2083
  *hep = he->next;
2084
}
2085

    
2086
#define HC_DEF_ORDER 10
2087
#define HC_HI_MARK *4
2088
#define HC_HI_STEP 2
2089
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2090
#define HC_LO_MARK /5
2091
#define HC_LO_STEP 2
2092
#define HC_LO_ORDER 10
2093

    
2094
static void
2095
hc_alloc_table(struct hostcache *hc, unsigned order)
2096
{
2097
  unsigned hsize = 1 << order;
2098
  hc->hash_order = order;
2099
  hc->hash_shift = 16 - order;
2100
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
2101
  hc->hash_min = (order <= HC_LO_ORDER) ?  0 : (hsize HC_LO_MARK);
2102

    
2103
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2104
}
2105

    
2106
static void
2107
hc_resize(struct hostcache *hc, unsigned new_order)
2108
{
2109
  unsigned old_size = 1 << hc->hash_order;
2110
  struct hostentry **old_table = hc->hash_table;
2111
  struct hostentry *he, *hen;
2112
  int i;
2113

    
2114
  hc_alloc_table(hc, new_order);
2115
  for (i = 0; i < old_size; i++)
2116
    for (he = old_table[i]; he != NULL; he=hen)
2117
      {
2118
        hen = he->next;
2119
        hc_insert(hc, he);
2120
      }
2121
  mb_free(old_table);
2122
}
2123

    
2124
static struct hostentry *
2125
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2126
{
2127
  struct hostentry *he = sl_alloc(hc->slab);
2128

    
2129
  he->addr = a;
2130
  he->link = ll;
2131
  he->tab = dep;
2132
  he->hash_key = k;
2133
  he->uc = 0;
2134
  he->src = NULL;
2135

    
2136
  add_tail(&hc->hostentries, &he->ln);
2137
  hc_insert(hc, he);
2138

    
2139
  hc->hash_items++;
2140
  if (hc->hash_items > hc->hash_max)
2141
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2142

    
2143
  return he;
2144
}
2145

    
2146
static void
2147
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2148
{
2149
  rta_free(he->src);
2150

    
2151
  rem_node(&he->ln);
2152
  hc_remove(hc, he);
2153
  sl_free(hc->slab, he);
2154

    
2155
  hc->hash_items--;
2156
  if (hc->hash_items < hc->hash_min)
2157
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2158
}
2159

    
2160
static void
2161
rt_init_hostcache(rtable *tab)
2162
{
2163
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2164
  init_list(&hc->hostentries);
2165

    
2166
  hc->hash_items = 0;
2167
  hc_alloc_table(hc, HC_DEF_ORDER);
2168
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2169

    
2170
  hc->lp = lp_new(rt_table_pool, 1008);
2171
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2172

    
2173
  tab->hostcache = hc;
2174
}
2175

    
2176
static void
2177
rt_free_hostcache(rtable *tab)
2178
{
2179
  struct hostcache *hc = tab->hostcache;
2180

    
2181
  node *n;
2182
  WALK_LIST(n, hc->hostentries)
2183
    {
2184
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2185
      rta_free(he->src);
2186

    
2187
      if (he->uc)
2188
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2189
    }
2190

    
2191
  rfree(hc->slab);
2192
  rfree(hc->lp);
2193
  mb_free(hc->hash_table);
2194
  mb_free(hc);
2195
}
2196

    
2197
static void
2198
rt_notify_hostcache(rtable *tab, net *net)
2199
{
2200
  struct hostcache *hc = tab->hostcache;
2201

    
2202
  if (tab->hcu_scheduled)
2203
    return;
2204

    
2205
  if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen))
2206
    rt_schedule_hcu(tab);
2207
}
2208

    
2209
static int
2210
if_local_addr(ip_addr a, struct iface *i)
2211
{
2212
  struct ifa *b;
2213

    
2214
  WALK_LIST(b, i->addrs)
2215
    if (ipa_equal(a, b->ip))
2216
      return 1;
2217

    
2218
  return 0;
2219
}
2220

    
2221
static u32 
2222
rt_get_igp_metric(rte *rt)
2223
{
2224
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2225

    
2226
  if (ea)
2227
    return ea->u.data;
2228

    
2229
  rta *a = rt->attrs;
2230

    
2231
#ifdef CONFIG_OSPF
2232
  if ((a->source == RTS_OSPF) ||
2233
      (a->source == RTS_OSPF_IA) ||
2234
      (a->source == RTS_OSPF_EXT1))
2235
    return rt->u.ospf.metric1;
2236
#endif
2237

    
2238
#ifdef CONFIG_RIP
2239
  if (a->source == RTS_RIP)
2240
    return rt->u.rip.metric;
2241
#endif
2242

    
2243
  /* Device routes */
2244
  if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2245
    return 0;
2246

    
2247
  return IGP_METRIC_UNKNOWN;
2248
}
2249

    
2250
static int
2251
rt_update_hostentry(rtable *tab, struct hostentry *he)
2252
{
2253
  rta *old_src = he->src;
2254
  int pxlen = 0;
2255

    
2256
  /* Reset the hostentry */ 
2257
  he->src = NULL;
2258
  he->gw = IPA_NONE;
2259
  he->dest = RTD_UNREACHABLE;
2260
  he->igp_metric = 0;
2261

    
2262
  net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
2263
  if (n)
2264
    {
2265
      rte *e = n->routes;
2266
      rta *a = e->attrs;
2267
      pxlen = n->n.pxlen;
2268

    
2269
      if (a->hostentry)
2270
        {
2271
          /* Recursive route should not depend on another recursive route */
2272
          log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d",
2273
              he->addr, n->n.prefix, pxlen);
2274
          goto done;
2275
        }
2276

    
2277
      if (a->dest == RTD_DEVICE)
2278
        {
2279
          if (if_local_addr(he->addr, a->iface))
2280
            {
2281
              /* The host address is a local address, this is not valid */
2282
              log(L_WARN "Next hop address %I is a local address of iface %s",
2283
                  he->addr, a->iface->name);
2284
              goto done;
2285
                  }
2286

    
2287
          /* The host is directly reachable, use link as a gateway */
2288
          he->gw = he->link;
2289
          he->dest = RTD_ROUTER;
2290
        }
2291
      else
2292
        {
2293
          /* The host is reachable through some route entry */
2294
          he->gw = a->gw;
2295
          he->dest = a->dest;
2296
        }
2297

    
2298
      he->src = rta_clone(a);
2299
      he->igp_metric = rt_get_igp_metric(e);
2300
    }
2301

    
2302
 done:
2303
  /* Add a prefix range to the trie */
2304
  trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
2305

    
2306
  rta_free(old_src);
2307
  return old_src != he->src;
2308
}
2309

    
2310
static void
2311
rt_update_hostcache(rtable *tab)
2312
{
2313
  struct hostcache *hc = tab->hostcache;
2314
  struct hostentry *he;
2315
  node *n, *x;
2316

    
2317
  /* Reset the trie */
2318
  lp_flush(hc->lp);
2319
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2320

    
2321
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2322
    {
2323
      he = SKIP_BACK(struct hostentry, ln, n);
2324
      if (!he->uc)
2325
        {
2326
          hc_delete_hostentry(hc, he);
2327
          continue;
2328
        }
2329

    
2330
      if (rt_update_hostentry(tab, he))
2331
        rt_schedule_nhu(he->tab);
2332
    }
2333

    
2334
  tab->hcu_scheduled = 0;
2335
}
2336

    
2337
static struct hostentry *
2338
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2339
{
2340
  struct hostentry *he;
2341

    
2342
  if (!tab->hostcache)
2343
    rt_init_hostcache(tab);
2344

    
2345
  uint k = hc_hash(a, dep);
2346
  struct hostcache *hc = tab->hostcache;
2347
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2348
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2349
      return he;
2350

    
2351
  he = hc_new_hostentry(hc, a, ll, dep, k);
2352
  rt_update_hostentry(tab, he);
2353
  return he;
2354
}
2355

    
2356
void
2357
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2358
{
2359
  rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2360
}
2361

    
2362

    
2363
/*
2364
 *  CLI commands
2365
 */
2366

    
2367
static void
2368
rt_format_via(rte *e, byte *via)
2369
{
2370
  rta *a = e->attrs;
2371

    
2372
  switch (a->dest)
2373
    {
2374
    case RTD_ROUTER:        bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2375
    case RTD_DEVICE:        bsprintf(via, "dev %s", a->iface->name); break;
2376
    case RTD_BLACKHOLE:        bsprintf(via, "blackhole"); break;
2377
    case RTD_UNREACHABLE:        bsprintf(via, "unreachable"); break;
2378
    case RTD_PROHIBIT:        bsprintf(via, "prohibited"); break;
2379
    case RTD_MULTIPATH:        bsprintf(via, "multipath"); break;
2380
    default:                bsprintf(via, "???");
2381
    }
2382
}
2383

    
2384
static void
2385
rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2386
{
2387
  byte via[STD_ADDRESS_P_LENGTH+32], from[STD_ADDRESS_P_LENGTH+8];
2388
  byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2389
  rta *a = e->attrs;
2390
  int primary = (e->net->routes == e);
2391
  int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2392
  void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2393
  struct mpnh *nh;
2394

    
2395
  rt_format_via(e, via);
2396
  tm_format_datetime(tm, &config->tf_route, e->lastmod);
2397
  if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2398
    bsprintf(from, " from %I", a->from);
2399
  else
2400
    from[0] = 0;
2401

    
2402
  get_route_info = a->src->proto->proto->get_route_info;
2403
  if (get_route_info || d->verbose)
2404
    {
2405
      /* Need to normalize the extended attributes */
2406
      ea_list *t = tmpa;
2407
      t = ea_append(t, a->eattrs);
2408
      tmpa = alloca(ea_scan(t));
2409
      ea_merge(t, tmpa);
2410
      ea_sort(tmpa);
2411
    }
2412
  if (get_route_info)
2413
    get_route_info(e, info, tmpa);
2414
  else
2415
    bsprintf(info, " (%d)", e->pref);
2416
  cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name,
2417
             tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2418
  for (nh = a->nexthops; nh; nh = nh->next)
2419
    cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2420
  if (d->verbose)
2421
    rta_show(c, a, tmpa);
2422
}
2423

    
2424
static void
2425
rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2426
{
2427
  rte *e, *ee;
2428
  byte ia[STD_ADDRESS_P_LENGTH+8];
2429
  struct ea_list *tmpa;
2430
  struct announce_hook *a = NULL;
2431
  int first = 1;
2432
  int pass = 0;
2433

    
2434
  bsprintf(ia, "%I/%d", n->n.prefix, n->n.pxlen);
2435

    
2436
  if (d->export_mode)
2437
    {
2438
      if (! d->export_protocol->rt_notify)
2439
        return;
2440

    
2441
      a = proto_find_announce_hook(d->export_protocol, d->table);
2442
      if (!a)
2443
        return;
2444
    }
2445

    
2446
  for (e = n->routes; e; e = e->next)
2447
    {
2448
      if (rte_is_filtered(e) != d->filtered)
2449
        continue;
2450

    
2451
      d->rt_counter++;
2452
      d->net_counter += first;
2453
      first = 0;
2454

    
2455
      if (pass)
2456
        continue;
2457

    
2458
      ee = e;
2459
      rte_update_lock();                /* We use the update buffer for filtering */
2460
      tmpa = make_tmp_attrs(e, rte_update_pool);
2461

    
2462
      /* Special case for merged export */
2463
      if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
2464
        {
2465
          rte *rt_free;
2466
          e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
2467
          pass = 1;
2468

    
2469
          if (!e)
2470
          { e = ee; goto skip; }
2471
        }
2472
      else if (d->export_mode)
2473
        {
2474
          struct proto *ep = d->export_protocol;
2475
          int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2476

    
2477
          if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
2478
            pass = 1;
2479

    
2480
          if (ic < 0)
2481
            goto skip;
2482

    
2483
          if (d->export_mode > RSEM_PREEXPORT)
2484
            {
2485
              /*
2486
               * FIXME - This shows what should be exported according to current
2487
               * filters, but not what was really exported. 'configure soft'
2488
               * command may change the export filter and do not update routes.
2489
               */
2490
              int do_export = (ic > 0) ||
2491
                (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2492

    
2493
              if (do_export != (d->export_mode == RSEM_EXPORT))
2494
                goto skip;
2495

    
2496
              if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED))
2497
                pass = 1;
2498
            }
2499
        }
2500

    
2501
      if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2502
        goto skip;
2503

    
2504
      if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2505
        goto skip;
2506

    
2507
      d->show_counter++;
2508
      if (d->stats < 2)
2509
        rt_show_rte(c, ia, e, d, tmpa);
2510
      ia[0] = 0;
2511

    
2512
    skip:
2513
      if (e != ee)
2514
      {
2515
        rte_free(e);
2516
        e = ee;
2517
      }
2518
      rte_update_unlock();
2519

    
2520
      if (d->primary_only)
2521
        break;
2522
    }
2523
}
2524

    
2525
static void
2526
rt_show_cont(struct cli *c)
2527
{
2528
  struct rt_show_data *d = c->rover;
2529
#ifdef DEBUGGING
2530
  unsigned max = 4;
2531
#else
2532
  unsigned max = 64;
2533
#endif
2534
  struct fib *fib = &d->table->fib;
2535
  struct fib_iterator *it = &d->fit;
2536

    
2537
  FIB_ITERATE_START(fib, it, f)
2538
    {
2539
      net *n = (net *) f;
2540
      if (d->running_on_config && d->running_on_config != config)
2541
        {
2542
          cli_printf(c, 8004, "Stopped due to reconfiguration");
2543
          goto done;
2544
        }
2545
      if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
2546
        {
2547
          cli_printf(c, 8005, "Protocol is down");
2548
          goto done;
2549
        }
2550
      if (!max--)
2551
        {
2552
          FIB_ITERATE_PUT(it, f);
2553
          return;
2554
        }
2555
      rt_show_net(c, n, d);
2556
    }
2557
  FIB_ITERATE_END(f);
2558
  if (d->stats)
2559
    cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2560
  else
2561
    cli_printf(c, 0, "");
2562
done:
2563
  c->cont = c->cleanup = NULL;
2564
}
2565

    
2566
static void
2567
rt_show_cleanup(struct cli *c)
2568
{
2569
  struct rt_show_data *d = c->rover;
2570

    
2571
  /* Unlink the iterator */
2572
  fit_get(&d->table->fib, &d->fit);
2573
}
2574

    
2575
void
2576
rt_show(struct rt_show_data *d)
2577
{
2578
  net *n;
2579

    
2580
  /* Default is either a master table or a table related to a respective protocol */
2581
  if (!d->table && d->export_protocol) d->table = d->export_protocol->table;
2582
  if (!d->table && d->show_protocol) d->table = d->show_protocol->table;
2583
  if (!d->table) d->table = config->master_rtc->table;
2584

    
2585
  /* Filtered routes are neither exported nor have sensible ordering */
2586
  if (d->filtered && (d->export_mode || d->primary_only))
2587
    cli_msg(0, "");
2588

    
2589
  if (d->pxlen == 256)
2590
    {
2591
      FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2592
      this_cli->cont = rt_show_cont;
2593
      this_cli->cleanup = rt_show_cleanup;
2594
      this_cli->rover = d;
2595
    }
2596
  else
2597
    {
2598
      if (d->show_for)
2599
        n = net_route(d->table, d->prefix, d->pxlen);
2600
      else
2601
        n = net_find(d->table, d->prefix, d->pxlen);
2602

    
2603
      if (n)
2604
        rt_show_net(this_cli, n, d);
2605

    
2606
      if (d->rt_counter)
2607
        cli_msg(0, "");
2608
      else
2609
        cli_msg(8001, "Network not in table");
2610
    }
2611
}
2612

    
2613
/*
2614
 *  Documentation for functions declared inline in route.h
2615
 */
2616
#if 0
2617

2618
/**
2619
 * net_find - find a network entry
2620
 * @tab: a routing table
2621
 * @addr: address of the network
2622
 * @len: length of the network prefix
2623
 *
2624
 * net_find() looks up the given network in routing table @tab and
2625
 * returns a pointer to its &net entry or %NULL if no such network
2626
 * exists.
2627
 */
2628
static inline net *net_find(rtable *tab, ip_addr addr, unsigned len)
2629
{ DUMMY; }
2630

2631
/**
2632
 * net_get - obtain a network entry
2633
 * @tab: a routing table
2634
 * @addr: address of the network
2635
 * @len: length of the network prefix
2636
 *
2637
 * net_get() looks up the given network in routing table @tab and
2638
 * returns a pointer to its &net entry. If no such entry exists, it's
2639
 * created.
2640
 */
2641
static inline net *net_get(rtable *tab, ip_addr addr, unsigned len)
2642
{ DUMMY; }
2643

2644
/**
2645
 * rte_cow - copy a route for writing
2646
 * @r: a route entry to be copied
2647
 *
2648
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2649
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2650
 * just returned unchanged, else a new temporary entry with the same contents
2651
 * is created.
2652
 *
2653
 * The primary use of this function is inside the filter machinery -- when
2654
 * a filter wants to modify &rte contents (to change the preference or to
2655
 * attach another set of attributes), it must ensure that the &rte is not
2656
 * shared with anyone else (and especially that it isn't stored in any routing
2657
 * table).
2658
 *
2659
 * Result: a pointer to the new writable &rte.
2660
 */
2661
static inline rte * rte_cow(rte *r)
2662
{ DUMMY; }
2663

2664
#endif