Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ 600998fc

History | View | Annotate | Download (64.6 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/cli.h"
37
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
45

    
46
pool *rt_table_pool;
47

    
48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

    
51
static list routing_tables;
52

    
53
static void rt_format_via(rte *e, byte *via);
54
static void rt_free_hostcache(rtable *tab);
55
static void rt_notify_hostcache(rtable *tab, net *net);
56
static void rt_update_hostcache(rtable *tab);
57
static void rt_next_hop_update(rtable *tab);
58
static inline int rt_prune_table(rtable *tab);
59
static inline void rt_schedule_gc(rtable *tab);
60
static inline void rt_schedule_prune(rtable *tab);
61

    
62

    
63
static inline struct ea_list *
64
make_tmp_attrs(struct rte *rt, struct linpool *pool)
65
{
66
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
67
  mta = rt->attrs->src->proto->make_tmp_attrs;
68
  return mta ? mta(rt, rte_update_pool) : NULL;
69
}
70

    
71
/* Like fib_route(), but skips empty net entries */
72
/*
73
static net *
74
net_route(rtable *tab, ip_addr a, int len)
75
{
76
  ip_addr a0;
77
  net *n;
78

79
  while (len >= 0)
80
    {
81
      a0 = ipa_and(a, ipa_mkmask(len));
82
      n = fib_find(&tab->fib, &a0, len);
83
      if (n && rte_is_valid(n->routes))
84
        return n;
85
      len--;
86
    }
87
  return NULL;
88
}
89
*/
90

    
91
/**
92
 * rte_find - find a route
93
 * @net: network node
94
 * @src: route source
95
 *
96
 * The rte_find() function returns a route for destination @net
97
 * which is from route source @src.
98
 */
99
rte *
100
rte_find(net *net, struct rte_src *src)
101
{
102
  rte *e = net->routes;
103

    
104
  while (e && e->attrs->src != src)
105
    e = e->next;
106
  return e;
107
}
108

    
109
/**
110
 * rte_get_temp - get a temporary &rte
111
 * @a: attributes to assign to the new route (a &rta; in case it's
112
 * un-cached, rte_update() will create a cached copy automatically)
113
 *
114
 * Create a temporary &rte and bind it with the attributes @a.
115
 * Also set route preference to the default preference set for
116
 * the protocol.
117
 */
118
rte *
119
rte_get_temp(rta *a)
120
{
121
  rte *e = sl_alloc(rte_slab);
122

    
123
  e->attrs = a;
124
  e->flags = 0;
125
  e->pref = a->src->proto->preference;
126
  return e;
127
}
128

    
129
rte *
130
rte_do_cow(rte *r)
131
{
132
  rte *e = sl_alloc(rte_slab);
133

    
134
  memcpy(e, r, sizeof(rte));
135
  e->attrs = rta_clone(r->attrs);
136
  e->flags = 0;
137
  return e;
138
}
139

    
140
/**
141
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
142
 * @r: a route entry to be copied
143
 * @lp: a linpool from which to allocate &rta
144
 *
145
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
146
 * modification. There are three possibilities: First, both &rte and &rta are
147
 * private copies, in that case they are returned unchanged.  Second, &rte is
148
 * private copy, but &rta is cached, in that case &rta is duplicated using
149
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
150
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
151
 *
152
 * Note that in the second case, cached &rta loses one reference, while private
153
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
154
 * nexthops, ...) with it. To work properly, original shared &rta should have
155
 * another reference during the life of created private copy.
156
 *
157
 * Result: a pointer to the new writable &rte with writable &rta.
158
 */
159
rte *
160
rte_cow_rta(rte *r, linpool *lp)
161
{
162
  if (!rta_is_cached(r->attrs))
163
    return r;
164

    
165
  rte *e = rte_cow(r);
166
  rta *a = rta_do_cow(r->attrs, lp);
167
  rta_free(e->attrs);
168
  e->attrs = a;
169
  return e;
170
}
171

    
172
static int                                /* Actually better or at least as good as */
173
rte_better(rte *new, rte *old)
174
{
175
  int (*better)(rte *, rte *);
176

    
177
  if (!rte_is_valid(old))
178
    return 1;
179
  if (!rte_is_valid(new))
180
    return 0;
181

    
182
  if (new->pref > old->pref)
183
    return 1;
184
  if (new->pref < old->pref)
185
    return 0;
186
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
187
    {
188
      /*
189
       *  If the user has configured protocol preferences, so that two different protocols
190
       *  have the same preference, try to break the tie by comparing addresses. Not too
191
       *  useful, but keeps the ordering of routes unambiguous.
192
       */
193
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
194
    }
195
  if (better = new->attrs->src->proto->rte_better)
196
    return better(new, old);
197
  return 0;
198
}
199

    
200
static int
201
rte_mergable(rte *pri, rte *sec)
202
{
203
  int (*mergable)(rte *, rte *);
204

    
205
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
206
    return 0;
207

    
208
  if (pri->pref != sec->pref)
209
    return 0;
210

    
211
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
212
    return 0;
213

    
214
  if (mergable = pri->attrs->src->proto->rte_mergable)
215
    return mergable(pri, sec);
216

    
217
  return 0;
218
}
219

    
220
static void
221
rte_trace(struct proto *p, rte *e, int dir, char *msg)
222
{
223
  byte via[IPA_MAX_TEXT_LENGTH+32];
224

    
225
  rt_format_via(e, via);
226
  log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, via);
227
}
228

    
229
static inline void
230
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
231
{
232
  if (p->debug & flag)
233
    rte_trace(p, e, '>', msg);
234
}
235

    
236
static inline void
237
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
238
{
239
  if (p->debug & flag)
240
    rte_trace(p, e, '<', msg);
241
}
242

    
243
static rte *
244
export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
245
{
246
  struct proto *p = ah->proto;
247
  struct filter *filter = ah->out_filter;
248
  struct proto_stats *stats = ah->stats;
249
  ea_list *tmpb = NULL;
250
  rte *rt;
251
  int v;
252

    
253
  rt = rt0;
254
  *rt_free = NULL;
255

    
256
  if (!tmpa)
257
    tmpa = &tmpb;
258

    
259
  *tmpa = make_tmp_attrs(rt, rte_update_pool);
260

    
261
  v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
262
  if (v < 0)
263
    {
264
      if (silent)
265
        goto reject;
266

    
267
      stats->exp_updates_rejected++;
268
      if (v == RIC_REJECT)
269
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
270
      goto reject;
271
    }
272
  if (v > 0)
273
    {
274
      if (!silent)
275
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
276
      goto accept;
277
    }
278

    
279
  v = filter && ((filter == FILTER_REJECT) ||
280
                 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
281
  if (v)
282
    {
283
      if (silent)
284
        goto reject;
285

    
286
      stats->exp_updates_filtered++;
287
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
288
      goto reject;
289
    }
290

    
291
 accept:
292
  if (rt != rt0)
293
    *rt_free = rt;
294
  return rt;
295

    
296
 reject:
297
  /* Discard temporary rte */
298
  if (rt != rt0)
299
    rte_free(rt);
300
  return NULL;
301
}
302

    
303
static void
304
do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
305
{
306
  struct proto *p = ah->proto;
307
  struct proto_stats *stats = ah->stats;
308

    
309

    
310
  /*
311
   * First, apply export limit.
312
   *
313
   * Export route limits has several problems. Because exp_routes
314
   * counter is reset before refeed, we don't really know whether
315
   * limit is breached and whether the update is new or not. Therefore
316
   * the number of really exported routes may exceed the limit
317
   * temporarily (routes exported before and new routes in refeed).
318
   *
319
   * Minor advantage is that if the limit is decreased and refeed is
320
   * requested, the number of exported routes really decrease.
321
   *
322
   * Second problem is that with export limits, we don't know whether
323
   * old was really exported (it might be blocked by limit). When a
324
   * withdraw is exported, we announce it even when the previous
325
   * update was blocked. This is not a big issue, but the same problem
326
   * is in updating exp_routes counter. Therefore, to be consistent in
327
   * increases and decreases of exp_routes, we count exported routes
328
   * regardless of blocking by limits.
329
   *
330
   * Similar problem is in handling updates - when a new route is
331
   * received and blocking is active, the route would be blocked, but
332
   * when an update for the route will be received later, the update
333
   * would be propagated (as old != NULL). Therefore, we have to block
334
   * also non-new updates (contrary to import blocking).
335
   */
336

    
337
  struct proto_limit *l = ah->out_limit;
338
  if (l && new)
339
    {
340
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
341
        proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
342

    
343
      if (l->state == PLS_BLOCKED)
344
        {
345
          stats->exp_routes++;        /* see note above */
346
          stats->exp_updates_rejected++;
347
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
348
          new = NULL;
349

    
350
          if (!old)
351
            return;
352
        }
353
    }
354

    
355

    
356
  if (new)
357
    stats->exp_updates_accepted++;
358
  else
359
    stats->exp_withdraws_accepted++;
360

    
361
  /* Hack: We do not decrease exp_routes during refeed, we instead
362
     reset exp_routes at the start of refeed. */
363
  if (new)
364
    stats->exp_routes++;
365
  if (old && !refeed)
366
    stats->exp_routes--;
367

    
368
  if (p->debug & D_ROUTES)
369
    {
370
      if (new && old)
371
        rte_trace_out(D_ROUTES, p, new, "replaced");
372
      else if (new)
373
        rte_trace_out(D_ROUTES, p, new, "added");
374
      else if (old)
375
        rte_trace_out(D_ROUTES, p, old, "removed");
376
    }
377
  if (!new)
378
    p->rt_notify(p, ah->table, net, NULL, old, NULL);
379
  else if (tmpa)
380
    {
381
      ea_list *t = tmpa;
382
      while (t->next)
383
        t = t->next;
384
      t->next = new->attrs->eattrs;
385
      p->rt_notify(p, ah->table, net, new, old, tmpa);
386
      t->next = NULL;
387
    }
388
  else
389
    p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
390
}
391

    
392
static void
393
rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed)
394
{
395
  struct proto *p = ah->proto;
396
  struct proto_stats *stats = ah->stats;
397

    
398
  rte *new = new0;
399
  rte *old = old0;
400
  rte *new_free = NULL;
401
  rte *old_free = NULL;
402
  ea_list *tmpa = NULL;
403

    
404
  if (new)
405
    stats->exp_updates_received++;
406
  else
407
    stats->exp_withdraws_received++;
408

    
409
  /*
410
   * This is a tricky part - we don't know whether route 'old' was
411
   * exported to protocol 'p' or was filtered by the export filter.
412
   * We try to run the export filter to know this to have a correct
413
   * value in 'old' argument of rte_update (and proper filter value)
414
   *
415
   * FIXME - this is broken because 'configure soft' may change
416
   * filters but keep routes. Refeed is expected to be called after
417
   * change of the filters and with old == new, therefore we do not
418
   * even try to run the filter on an old route, This may lead to
419
   * 'spurious withdraws' but ensure that there are no 'missing
420
   * withdraws'.
421
   *
422
   * This is not completely safe as there is a window between
423
   * reconfiguration and the end of refeed - if a newly filtered
424
   * route disappears during this period, proper withdraw is not
425
   * sent (because old would be also filtered) and the route is
426
   * not refeeded (because it disappeared before that).
427
   */
428

    
429
  if (new)
430
    new = export_filter(ah, new, &new_free, &tmpa, 0);
431

    
432
  if (old && !refeed)
433
    old = export_filter(ah, old, &old_free, NULL, 1);
434

    
435
  if (!new && !old)
436
  {
437
    /*
438
     * As mentioned above, 'old' value may be incorrect in some race conditions.
439
     * We generally ignore it with the exception of withdraw to pipe protocol.
440
     * In that case we rather propagate unfiltered withdraws regardless of
441
     * export filters to ensure that when a protocol is flushed, its routes are
442
     * removed from all tables. Possible spurious unfiltered withdraws are not
443
     * problem here as they are ignored if there is no corresponding route at
444
     * the other end of the pipe. We directly call rt_notify() hook instead of
445
     * do_rt_notify() to avoid logging and stat counters.
446
     */
447

    
448
#ifdef CONFIG_PIPE
449
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
450
      p->rt_notify(p, ah->table, net, NULL, old0, NULL);
451
#endif
452

    
453
    return;
454
  }
455

    
456
  do_rt_notify(ah, net, new, old, tmpa, refeed);
457

    
458
  /* Discard temporary rte's */
459
  if (new_free)
460
    rte_free(new_free);
461
  if (old_free)
462
    rte_free(old_free);
463
}
464

    
465
static void
466
rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
467
{
468
  // struct proto *p = ah->proto;
469
  struct proto_stats *stats = ah->stats;
470

    
471
  rte *r;
472
  rte *new_best = NULL;
473
  rte *old_best = NULL;
474
  rte *new_free = NULL;
475
  rte *old_free = NULL;
476
  ea_list *tmpa = NULL;
477

    
478
  /* Used to track whether we met old_changed position. If before_old is NULL
479
     old_changed was the first and we met it implicitly before current best route. */
480
  int old_meet = old_changed && !before_old;
481

    
482
  /* Note that before_old is either NULL or valid (not rejected) route.
483
     If old_changed is valid, before_old have to be too. If old changed route
484
     was not valid, caller must use NULL for both old_changed and before_old. */
485

    
486
  if (new_changed)
487
    stats->exp_updates_received++;
488
  else
489
    stats->exp_withdraws_received++;
490

    
491
  /* First, find the new_best route - first accepted by filters */
492
  for (r=net->routes; rte_is_valid(r); r=r->next)
493
    {
494
      if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
495
        break;
496

    
497
      /* Note if we walked around the position of old_changed route */
498
      if (r == before_old)
499
        old_meet = 1;
500
    }
501

    
502
  /* 
503
   * Second, handle the feed case. That means we do not care for
504
   * old_best. It is NULL for feed, and the new_best for refeed. 
505
   * For refeed, there is a hack similar to one in rt_notify_basic()
506
   * to ensure withdraws in case of changed filters
507
   */
508
  if (feed)
509
    {
510
      if (feed == 2)        /* refeed */
511
        old_best = new_best ? new_best :
512
          (rte_is_valid(net->routes) ? net->routes : NULL);
513
      else
514
        old_best = NULL;
515

    
516
      if (!new_best && !old_best)
517
        return;
518

    
519
      goto found;
520
    }
521

    
522
  /*
523
   * Now, we find the old_best route. Generally, it is the same as the
524
   * new_best, unless new_best is the same as new_changed or
525
   * old_changed is accepted before new_best.
526
   *
527
   * There are four cases:
528
   *
529
   * - We would find and accept old_changed before new_best, therefore
530
   *   old_changed is old_best. In remaining cases we suppose this
531
   *   is not true.
532
   *
533
   * - We found no new_best, therefore there is also no old_best and
534
   *   we ignore this withdraw.
535
   *
536
   * - We found new_best different than new_changed, therefore
537
   *   old_best is the same as new_best and we ignore this update.
538
   *
539
   * - We found new_best the same as new_changed, therefore it cannot
540
   *   be old_best and we have to continue search for old_best.
541
   */
542

    
543
  /* First case */
544
  if (old_meet)
545
    if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
546
      goto found;
547

    
548
  /* Second case */
549
  if (!new_best)
550
    return;
551

    
552
  /* Third case, we use r instead of new_best, because export_filter() could change it */
553
  if (r != new_changed)
554
    {
555
      if (new_free)
556
        rte_free(new_free);
557
      return;
558
    }
559

    
560
  /* Fourth case */
561
  for (r=r->next; rte_is_valid(r); r=r->next)
562
    {
563
      if (old_best = export_filter(ah, r, &old_free, NULL, 1))
564
        goto found;
565

    
566
      if (r == before_old)
567
        if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
568
          goto found;
569
    }
570

    
571
  /* Implicitly, old_best is NULL and new_best is non-NULL */
572

    
573
 found:
574
  do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));
575

    
576
  /* Discard temporary rte's */
577
  if (new_free)
578
    rte_free(new_free);
579
  if (old_free)
580
    rte_free(old_free);
581
}
582

    
583

    
584
static struct mpnh *
585
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
586
{
587
  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
588
  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
589
  return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
590
}
591

    
592
rte *
593
rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
594
{
595
  // struct proto *p = ah->proto;
596
  struct mpnh *nhs = NULL;
597
  rte *best0, *best, *rt0, *rt, *tmp;
598

    
599
  best0 = net->routes;
600
  *rt_free = NULL;
601

    
602
  if (!rte_is_valid(best0))
603
    return NULL;
604

    
605
  best = export_filter(ah, best0, rt_free, tmpa, silent);
606

    
607
  if (!best || !rte_is_reachable(best))
608
    return best;
609

    
610
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
611
  {
612
    if (!rte_mergable(best0, rt0))
613
      continue;
614

    
615
    rt = export_filter(ah, rt0, &tmp, NULL, 1);
616

    
617
    if (!rt)
618
      continue;
619

    
620
    if (rte_is_reachable(rt))
621
      nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
622

    
623
    if (tmp)
624
      rte_free(tmp);
625
  }
626

    
627
  if (nhs)
628
  {
629
    nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
630

    
631
    if (nhs->next)
632
    {
633
      best = rte_cow_rta(best, rte_update_pool);
634
      best->attrs->dest = RTD_MULTIPATH;
635
      best->attrs->nexthops = nhs;
636
    }
637
  }
638

    
639
  if (best != best0)
640
    *rt_free = best;
641

    
642
  return best;
643
}
644

    
645

    
646
static void
647
rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
648
                 rte *new_best, rte*old_best, int refeed)
649
{
650
  // struct proto *p = ah->proto;
651

    
652
  rte *new_best_free = NULL;
653
  rte *old_best_free = NULL;
654
  rte *new_changed_free = NULL;
655
  rte *old_changed_free = NULL;
656
  ea_list *tmpa = NULL;
657

    
658
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
659

    
660
  /* This check should be done by the caller */
661
  if (!new_best && !old_best)
662
    return;
663

    
664
  /* Check whether the change is relevant to the merged route */
665
  if ((new_best == old_best) && !refeed)
666
  {
667
    new_changed = rte_mergable(new_best, new_changed) ?
668
      export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
669

    
670
    old_changed = rte_mergable(old_best, old_changed) ?
671
      export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
672

    
673
    if (!new_changed && !old_changed)
674
      return;
675
  }
676

    
677
  if (new_best)
678
    ah->stats->exp_updates_received++;
679
  else
680
    ah->stats->exp_withdraws_received++;
681

    
682
  /* Prepare new merged route */
683
  if (new_best)
684
    new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
685

    
686
  /* Prepare old merged route (without proper merged next hops) */
687
  /* There are some issues with running filter on old route - see rt_notify_basic() */
688
  if (old_best && !refeed)
689
    old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
690

    
691
  if (new_best || old_best)
692
    do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
693

    
694
  /* Discard temporary rte's */
695
  if (new_best_free)
696
    rte_free(new_best_free);
697
  if (old_best_free)
698
    rte_free(old_best_free);
699
  if (new_changed_free)
700
    rte_free(new_changed_free);
701
  if (old_changed_free)
702
    rte_free(old_changed_free);
703
}
704

    
705

    
706
/**
707
 * rte_announce - announce a routing table change
708
 * @tab: table the route has been added to
709
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
710
 * @net: network in question
711
 * @new: the new route to be announced
712
 * @old: the previous route for the same network
713
 *
714
 * This function gets a routing table update and announces it
715
 * to all protocols that acccepts given type of route announcement
716
 * and are connected to the same table by their announcement hooks.
717
 *
718
 * Route announcement of type RA_OPTIMAL si generated when optimal
719
 * route (in routing table @tab) changes. In that case @old stores the
720
 * old optimal route.
721
 *
722
 * Route announcement of type RA_ANY si generated when any route (in
723
 * routing table @tab) changes In that case @old stores the old route
724
 * from the same protocol.
725
 *
726
 * For each appropriate protocol, we first call its import_control()
727
 * hook which performs basic checks on the route (each protocol has a
728
 * right to veto or force accept of the route before any filter is
729
 * asked) and adds default values of attributes specific to the new
730
 * protocol (metrics, tags etc.).  Then it consults the protocol's
731
 * export filter and if it accepts the route, the rt_notify() hook of
732
 * the protocol gets called.
733
 */
734
static void
735
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
736
             rte *new_best, rte *old_best, rte *before_old)
737
{
738
  if (!rte_is_valid(new))
739
    new = NULL;
740

    
741
  if (!rte_is_valid(old))
742
    old = before_old = NULL;
743

    
744
  if (!rte_is_valid(new_best))
745
    new_best = NULL;
746

    
747
  if (!rte_is_valid(old_best))
748
    old_best = NULL;
749

    
750
  if (!old && !new)
751
    return;
752

    
753
  if (type == RA_OPTIMAL)
754
    {
755
      if (new)
756
        new->attrs->src->proto->stats.pref_routes++;
757
      if (old)
758
        old->attrs->src->proto->stats.pref_routes--;
759

    
760
      if (tab->hostcache)
761
        rt_notify_hostcache(tab, net);
762
    }
763

    
764
  struct announce_hook *a;
765
  WALK_LIST(a, tab->hooks)
766
    {
767
      ASSERT(a->proto->export_state != ES_DOWN);
768
      if (a->proto->accept_ra_types == type)
769
        if (type == RA_ACCEPTED)
770
          rt_notify_accepted(a, net, new, old, before_old, 0);
771
        else if (type == RA_MERGED)
772
          rt_notify_merged(a, net, new, old, new_best, old_best, 0);
773
        else
774
          rt_notify_basic(a, net, new, old, 0);
775
    }
776
}
777

    
778
static inline int
779
rte_validate(rte *e)
780
{
781
  int c;
782
  net *n = e->net;
783

    
784
  // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
785
  if (!net_validate(n->n.addr))
786
  {
787
    log(L_WARN "Ignoring bogus prefix %N received via %s",
788
        n->n.addr, e->sender->proto->name);
789
    return 0;
790
  }
791

    
792
  c = net_classify(n->n.addr);
793
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
794
  {
795
    log(L_WARN "Ignoring bogus route %N received via %s",
796
        n->n.addr, e->sender->proto->name);
797
    return 0;
798
  }
799

    
800
  return 1;
801
}
802

    
803
/**
804
 * rte_free - delete a &rte
805
 * @e: &rte to be deleted
806
 *
807
 * rte_free() deletes the given &rte from the routing table it's linked to.
808
 */
809
void
810
rte_free(rte *e)
811
{
812
  if (rta_is_cached(e->attrs))
813
    rta_free(e->attrs);
814
  sl_free(rte_slab, e);
815
}
816

    
817
static inline void
818
rte_free_quick(rte *e)
819
{
820
  rta_free(e->attrs);
821
  sl_free(rte_slab, e);
822
}
823

    
824
static int
825
rte_same(rte *x, rte *y)
826
{
827
  return
828
    x->attrs == y->attrs &&
829
    x->flags == y->flags &&
830
    x->pflags == y->pflags &&
831
    x->pref == y->pref &&
832
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
833
}
834

    
835
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
836

    
837
static void
838
rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
839
{
840
  struct proto *p = ah->proto;
841
  struct rtable *table = ah->table;
842
  struct proto_stats *stats = ah->stats;
843
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
844
  rte *before_old = NULL;
845
  rte *old_best = net->routes;
846
  rte *old = NULL;
847
  rte **k;
848

    
849
  k = &net->routes;                        /* Find and remove original route from the same protocol */
850
  while (old = *k)
851
    {
852
      if (old->attrs->src == src)
853
        {
854
          /* If there is the same route in the routing table but from
855
           * a different sender, then there are two paths from the
856
           * source protocol to this routing table through transparent
857
           * pipes, which is not allowed.
858
           *
859
           * We log that and ignore the route. If it is withdraw, we
860
           * ignore it completely (there might be 'spurious withdraws',
861
           * see FIXME in do_rte_announce())
862
           */
863
          if (old->sender->proto != p)
864
            {
865
              if (new)
866
                {
867
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
868
                      net->n.addr, table->name);
869
                  rte_free_quick(new);
870
                }
871
              return;
872
            }
873

    
874
          if (new && rte_same(old, new))
875
            {
876
              /* No changes, ignore the new route */
877

    
878
              if (!rte_is_filtered(new))
879
                {
880
                  stats->imp_updates_ignored++;
881
                  rte_trace_in(D_ROUTES, p, new, "ignored");
882
                }
883

    
884
              rte_free_quick(new);
885
              return;
886
            }
887
          *k = old->next;
888
          break;
889
        }
890
      k = &old->next;
891
      before_old = old;
892
    }
893

    
894
  if (!old)
895
    before_old = NULL;
896

    
897
  if (!old && !new)
898
    {
899
      stats->imp_withdraws_ignored++;
900
      return;
901
    }
902

    
903
  int new_ok = rte_is_ok(new);
904
  int old_ok = rte_is_ok(old);
905

    
906
  struct proto_limit *l = ah->rx_limit;
907
  if (l && !old && new)
908
    {
909
      u32 all_routes = stats->imp_routes + stats->filt_routes;
910

    
911
      if (all_routes >= l->limit)
912
        proto_notify_limit(ah, l, PLD_RX, all_routes);
913

    
914
      if (l->state == PLS_BLOCKED)
915
        {
916
          /* In receive limit the situation is simple, old is NULL so
917
             we just free new and exit like nothing happened */
918

    
919
          stats->imp_updates_ignored++;
920
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
921
          rte_free_quick(new);
922
          return;
923
        }
924
    }
925

    
926
  l = ah->in_limit;
927
  if (l && !old_ok && new_ok)
928
    {
929
      if (stats->imp_routes >= l->limit)
930
        proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
931

    
932
      if (l->state == PLS_BLOCKED)
933
        {
934
          /* In import limit the situation is more complicated. We
935
             shouldn't just drop the route, we should handle it like
936
             it was filtered. We also have to continue the route
937
             processing if old or new is non-NULL, but we should exit
938
             if both are NULL as this case is probably assumed to be
939
             already handled. */
940

    
941
          stats->imp_updates_ignored++;
942
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
943

    
944
          if (ah->in_keep_filtered)
945
            new->flags |= REF_FILTERED;
946
          else
947
            { rte_free_quick(new); new = NULL; }
948

    
949
          /* Note that old && !new could be possible when
950
             ah->in_keep_filtered changed in the recent past. */
951

    
952
          if (!old && !new)
953
            return;
954

    
955
          new_ok = 0;
956
          goto skip_stats1;
957
        }
958
    }
959

    
960
  if (new_ok)
961
    stats->imp_updates_accepted++;
962
  else if (old_ok)
963
    stats->imp_withdraws_accepted++;
964
  else
965
    stats->imp_withdraws_ignored++;
966

    
967
 skip_stats1:
968

    
969
  if (new)
970
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
971
  if (old)
972
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
973

    
974
  if (table->config->sorted)
975
    {
976
      /* If routes are sorted, just insert new route to appropriate position */
977
      if (new)
978
        {
979
          if (before_old && !rte_better(new, before_old))
980
            k = &before_old->next;
981
          else
982
            k = &net->routes;
983

    
984
          for (; *k; k=&(*k)->next)
985
            if (rte_better(new, *k))
986
              break;
987

    
988
          new->next = *k;
989
          *k = new;
990
        }
991
    }
992
  else
993
    {
994
      /* If routes are not sorted, find the best route and move it on
995
         the first position. There are several optimized cases. */
996

    
997
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
998
        goto do_recalculate;
999

    
1000
      if (new && rte_better(new, old_best))
1001
        {
1002
          /* The first case - the new route is cleary optimal,
1003
             we link it at the first position */
1004

    
1005
          new->next = net->routes;
1006
          net->routes = new;
1007
        }
1008
      else if (old == old_best)
1009
        {
1010
          /* The second case - the old best route disappeared, we add the
1011
             new route (if we have any) to the list (we don't care about
1012
             position) and then we elect the new optimal route and relink
1013
             that route at the first position and announce it. New optimal
1014
             route might be NULL if there is no more routes */
1015

    
1016
        do_recalculate:
1017
          /* Add the new route to the list */
1018
          if (new)
1019
            {
1020
              new->next = net->routes;
1021
              net->routes = new;
1022
            }
1023

    
1024
          /* Find a new optimal route (if there is any) */
1025
          if (net->routes)
1026
            {
1027
              rte **bp = &net->routes;
1028
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1029
                if (rte_better(*k, *bp))
1030
                  bp = k;
1031

    
1032
              /* And relink it */
1033
              rte *best = *bp;
1034
              *bp = best->next;
1035
              best->next = net->routes;
1036
              net->routes = best;
1037
            }
1038
        }
1039
      else if (new)
1040
        {
1041
          /* The third case - the new route is not better than the old
1042
             best route (therefore old_best != NULL) and the old best
1043
             route was not removed (therefore old_best == net->routes).
1044
             We just link the new route after the old best route. */
1045

    
1046
          ASSERT(net->routes != NULL);
1047
          new->next = net->routes->next;
1048
          net->routes->next = new;
1049
        }
1050
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1051
    }
1052

    
1053
  if (new)
1054
    new->lastmod = now;
1055

    
1056
  /* Log the route change */
1057
  if (p->debug & D_ROUTES)
1058
    {
1059
      if (new_ok)
1060
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1061
      else if (old_ok)
1062
        {
1063
          if (old != old_best)
1064
            rte_trace(p, old, '>', "removed");
1065
          else if (rte_is_ok(net->routes))
1066
            rte_trace(p, old, '>', "removed [replaced]");
1067
          else
1068
            rte_trace(p, old, '>', "removed [sole]");
1069
        }
1070
    }
1071

    
1072
  /* Propagate the route change */
1073
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1074
  if (net->routes != old_best)
1075
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1076
  if (table->config->sorted)
1077
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1078
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1079

    
1080
  if (!net->routes &&
1081
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1082
      (table->gc_time + table->config->gc_min_time <= now))
1083
    rt_schedule_gc(table);
1084

    
1085
  if (old_ok && p->rte_remove)
1086
    p->rte_remove(net, old);
1087
  if (new_ok && p->rte_insert)
1088
    p->rte_insert(net, new);
1089

    
1090
  if (old)
1091
    rte_free_quick(old);
1092
}
1093

    
1094
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1095

    
1096
static inline void
1097
rte_update_lock(void)
1098
{
1099
  rte_update_nest_cnt++;
1100
}
1101

    
1102
static inline void
1103
rte_update_unlock(void)
1104
{
1105
  if (!--rte_update_nest_cnt)
1106
    lp_flush(rte_update_pool);
1107
}
1108

    
1109
static inline void
1110
rte_hide_dummy_routes(net *net, rte **dummy)
1111
{
1112
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1113
  {
1114
    *dummy = net->routes;
1115
    net->routes = (*dummy)->next;
1116
  }
1117
}
1118

    
1119
static inline void
1120
rte_unhide_dummy_routes(net *net, rte **dummy)
1121
{
1122
  if (*dummy)
1123
  {
1124
    (*dummy)->next = net->routes;
1125
    net->routes = *dummy;
1126
  }
1127
}
1128

    
1129
/**
1130
 * rte_update - enter a new update to a routing table
1131
 * @table: table to be updated
1132
 * @ah: pointer to table announce hook
1133
 * @net: network node
1134
 * @p: protocol submitting the update
1135
 * @src: protocol originating the update
1136
 * @new: a &rte representing the new route or %NULL for route removal.
1137
 *
1138
 * This function is called by the routing protocols whenever they discover
1139
 * a new route or wish to update/remove an existing route. The right announcement
1140
 * sequence is to build route attributes first (either un-cached with @aflags set
1141
 * to zero or a cached one using rta_lookup(); in this case please note that
1142
 * you need to increase the use count of the attributes yourself by calling
1143
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1144
 * the appropriate data and finally submit the new &rte by calling rte_update().
1145
 *
1146
 * @src specifies the protocol that originally created the route and the meaning
1147
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1148
 * same value as @new->attrs->proto. @p specifies the protocol that called
1149
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1150
 * stores @p in @new->sender;
1151
 *
1152
 * When rte_update() gets any route, it automatically validates it (checks,
1153
 * whether the network and next hop address are valid IP addresses and also
1154
 * whether a normal routing protocol doesn't try to smuggle a host or link
1155
 * scope route to the table), converts all protocol dependent attributes stored
1156
 * in the &rte to temporary extended attributes, consults import filters of the
1157
 * protocol to see if the route should be accepted and/or its attributes modified,
1158
 * stores the temporary attributes back to the &rte.
1159
 *
1160
 * Now, having a "public" version of the route, we
1161
 * automatically find any old route defined by the protocol @src
1162
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1163
 * recalculate the optimal route for this destination and finally broadcast
1164
 * the change (if any) to all routing protocols by calling rte_announce().
1165
 *
1166
 * All memory used for attribute lists and other temporary allocations is taken
1167
 * from a special linear pool @rte_update_pool and freed when rte_update()
1168
 * finishes.
1169
 */
1170

    
1171
void
1172
rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
1173
{
1174
  struct proto *p = ah->proto;
1175
  struct proto_stats *stats = ah->stats;
1176
  struct filter *filter = ah->in_filter;
1177
  ea_list *tmpa = NULL;
1178
  rte *dummy = NULL;
1179

    
1180
  rte_update_lock();
1181
  if (new)
1182
    {
1183
      new->sender = ah;
1184

    
1185
      stats->imp_updates_received++;
1186
      if (!rte_validate(new))
1187
        {
1188
          rte_trace_in(D_FILTERS, p, new, "invalid");
1189
          stats->imp_updates_invalid++;
1190
          goto drop;
1191
        }
1192

    
1193
      if (filter == FILTER_REJECT)
1194
        {
1195
          stats->imp_updates_filtered++;
1196
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1197

    
1198
          if (! ah->in_keep_filtered)
1199
            goto drop;
1200

    
1201
          /* new is a private copy, i could modify it */
1202
          new->flags |= REF_FILTERED;
1203
        }
1204
      else
1205
        {
1206
          tmpa = make_tmp_attrs(new, rte_update_pool);
1207
          if (filter && (filter != FILTER_REJECT))
1208
            {
1209
              ea_list *old_tmpa = tmpa;
1210
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1211
              if (fr > F_ACCEPT)
1212
                {
1213
                  stats->imp_updates_filtered++;
1214
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1215

    
1216
                  if (! ah->in_keep_filtered)
1217
                    goto drop;
1218

    
1219
                  new->flags |= REF_FILTERED;
1220
                }
1221
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1222
                src->proto->store_tmp_attrs(new, tmpa);
1223
            }
1224
        }
1225
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1226
        new->attrs = rta_lookup(new->attrs);
1227
      new->flags |= REF_COW;
1228
    }
1229
  else
1230
    {
1231
      stats->imp_withdraws_received++;
1232

    
1233
      if (!net || !src)
1234
        {
1235
          stats->imp_withdraws_ignored++;
1236
          rte_update_unlock();
1237
          return;
1238
        }
1239
    }
1240

    
1241
 recalc:
1242
  rte_hide_dummy_routes(net, &dummy);
1243
  rte_recalculate(ah, net, new, src);
1244
  rte_unhide_dummy_routes(net, &dummy);
1245
  rte_update_unlock();
1246
  return;
1247

    
1248
 drop:
1249
  rte_free(new);
1250
  new = NULL;
1251
  goto recalc;
1252
}
1253

    
1254
/* Independent call to rte_announce(), used from next hop
1255
   recalculation, outside of rte_update(). new must be non-NULL */
1256
static inline void 
1257
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1258
               rte *new_best, rte *old_best)
1259
{
1260
  rte_update_lock();
1261
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1262
  rte_update_unlock();
1263
}
1264

    
1265
void
1266
rte_discard(rtable *t, rte *old)        /* Non-filtered route deletion, used during garbage collection */
1267
{
1268
  rte_update_lock();
1269
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1270
  rte_update_unlock();
1271
}
1272

    
1273
/* Check rtable for best route to given net whether it would be exported do p */
1274
int
1275
rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1276
{
1277
  net *n = net_find(t, a);
1278
  rte *rt = n ? n->routes : NULL;
1279

    
1280
  if (!rte_is_valid(rt))
1281
    return 0;
1282

    
1283
  rte_update_lock();
1284

    
1285
  /* Rest is stripped down export_filter() */
1286
  ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1287
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1288
  if (v == RIC_PROCESS)
1289
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1290

    
1291
   /* Discard temporary rte */
1292
  if (rt != n->routes)
1293
    rte_free(rt);
1294

    
1295
  rte_update_unlock();
1296

    
1297
  return v > 0;
1298
}
1299

    
1300

    
1301
/**
1302
 * rt_refresh_begin - start a refresh cycle
1303
 * @t: related routing table
1304
 * @ah: related announce hook 
1305
 *
1306
 * This function starts a refresh cycle for given routing table and announce
1307
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1308
 * routes to the routing table (by rte_update()). After that, all protocol
1309
 * routes (more precisely routes with @ah as @sender) not sent during the
1310
 * refresh cycle but still in the table from the past are pruned. This is
1311
 * implemented by marking all related routes as stale by REF_STALE flag in
1312
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1313
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1314
 */
1315
void
1316
rt_refresh_begin(rtable *t, struct announce_hook *ah)
1317
{
1318
  FIB_WALK(&t->fib, net, n)
1319
    {
1320
      rte *e;
1321
      for (e = n->routes; e; e = e->next)
1322
        if (e->sender == ah)
1323
          e->flags |= REF_STALE;
1324
    }
1325
  FIB_WALK_END;
1326
}
1327

    
1328
/**
1329
 * rt_refresh_end - end a refresh cycle
1330
 * @t: related routing table
1331
 * @ah: related announce hook 
1332
 *
1333
 * This function starts a refresh cycle for given routing table and announce
1334
 * hook. See rt_refresh_begin() for description of refresh cycles.
1335
 */
1336
void
1337
rt_refresh_end(rtable *t, struct announce_hook *ah)
1338
{
1339
  int prune = 0;
1340

    
1341
  FIB_WALK(&t->fib, net, n)
1342
    {
1343
      rte *e;
1344
      for (e = n->routes; e; e = e->next)
1345
        if ((e->sender == ah) && (e->flags & REF_STALE))
1346
          {
1347
            e->flags |= REF_DISCARD;
1348
            prune = 1;
1349
          }
1350
    }
1351
  FIB_WALK_END;
1352

    
1353
  if (prune)
1354
    rt_schedule_prune(t);
1355
}
1356

    
1357

    
1358
/**
1359
 * rte_dump - dump a route
1360
 * @e: &rte to be dumped
1361
 *
1362
 * This functions dumps contents of a &rte to debug output.
1363
 */
1364
void
1365
rte_dump(rte *e)
1366
{
1367
  net *n = e->net;
1368
  debug("%-1N ", n->n.addr);
1369
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1370
  rta_dump(e->attrs);
1371
  if (e->attrs->src->proto->proto->dump_attrs)
1372
    e->attrs->src->proto->proto->dump_attrs(e);
1373
  debug("\n");
1374
}
1375

    
1376
/**
1377
 * rt_dump - dump a routing table
1378
 * @t: routing table to be dumped
1379
 *
1380
 * This function dumps contents of a given routing table to debug output.
1381
 */
1382
void
1383
rt_dump(rtable *t)
1384
{
1385
  debug("Dump of routing table <%s>\n", t->name);
1386
#ifdef DEBUGGING
1387
  fib_check(&t->fib);
1388
#endif
1389
  FIB_WALK(&t->fib, net, n)
1390
    {
1391
      rte *e;
1392
      for(e=n->routes; e; e=e->next)
1393
        rte_dump(e);
1394
    }
1395
  FIB_WALK_END;
1396

    
1397
  struct announce_hook *a;
1398
  WALK_LIST(a, t->hooks)
1399
    debug("\tAnnounces routes to protocol %s\n", a->proto->name);
1400
  debug("\n");
1401
}
1402

    
1403
/**
1404
 * rt_dump_all - dump all routing tables
1405
 *
1406
 * This function dumps contents of all routing tables to debug output.
1407
 */
1408
void
1409
rt_dump_all(void)
1410
{
1411
  rtable *t;
1412

    
1413
  WALK_LIST(t, routing_tables)
1414
    rt_dump(t);
1415
}
1416

    
1417
static inline void
1418
rt_schedule_prune(rtable *tab)
1419
{
1420
  rt_mark_for_prune(tab);
1421
  ev_schedule(tab->rt_event);
1422
}
1423

    
1424
static inline void
1425
rt_schedule_gc(rtable *tab)
1426
{
1427
  if (tab->gc_scheduled)
1428
    return;
1429

    
1430
  tab->gc_scheduled = 1;
1431
  ev_schedule(tab->rt_event);
1432
}
1433

    
1434
static inline void
1435
rt_schedule_hcu(rtable *tab)
1436
{
1437
  if (tab->hcu_scheduled)
1438
    return;
1439

    
1440
  tab->hcu_scheduled = 1;
1441
  ev_schedule(tab->rt_event);
1442
}
1443

    
1444
static inline void
1445
rt_schedule_nhu(rtable *tab)
1446
{
1447
  if (tab->nhu_state == 0)
1448
    ev_schedule(tab->rt_event);
1449

    
1450
  /* state change 0->1, 2->3 */
1451
  tab->nhu_state |= 1;
1452
}
1453

    
1454

    
1455
static void
1456
rt_prune_nets(rtable *tab)
1457
{
1458
  struct fib_iterator fit;
1459
  int ncnt = 0, ndel = 0;
1460

    
1461
#ifdef DEBUGGING
1462
  fib_check(&tab->fib);
1463
#endif
1464

    
1465
  FIB_ITERATE_INIT(&fit, &tab->fib);
1466
again:
1467
  FIB_ITERATE_START(&tab->fib, &fit, net, n)
1468
    {
1469
      ncnt++;
1470
      if (!n->routes)                /* Orphaned FIB entry */
1471
        {
1472
          FIB_ITERATE_PUT(&fit);
1473
          fib_delete(&tab->fib, n);
1474
          ndel++;
1475
          goto again;
1476
        }
1477
    }
1478
  FIB_ITERATE_END;
1479
  DBG("Pruned %d of %d networks\n", ndel, ncnt);
1480

    
1481
  tab->gc_counter = 0;
1482
  tab->gc_time = now;
1483
  tab->gc_scheduled = 0;
1484
}
1485

    
1486
static void
1487
rt_event(void *ptr)
1488
{
1489
  rtable *tab = ptr;
1490

    
1491
  if (tab->hcu_scheduled)
1492
    rt_update_hostcache(tab);
1493

    
1494
  if (tab->nhu_state)
1495
    rt_next_hop_update(tab);
1496

    
1497
  if (tab->prune_state)
1498
    if (!rt_prune_table(tab))
1499
      {
1500
        /* Table prune unfinished */
1501
        ev_schedule(tab->rt_event);
1502
        return;
1503
      }
1504

    
1505
  if (tab->gc_scheduled)
1506
    {
1507
      rt_prune_nets(tab);
1508
      rt_prune_sources(); // FIXME this should be moved to independent event
1509
    }
1510
}
1511

    
1512
void
1513
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1514
{
1515
  bzero(t, sizeof(*t));
1516
  t->name = name;
1517
  t->config = cf;
1518
  t->addr_type = cf ? cf->addr_type : NET_IP4;
1519
  fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1520
  init_list(&t->hooks);
1521
  if (cf)
1522
    {
1523
      t->rt_event = ev_new(p);
1524
      t->rt_event->hook = rt_event;
1525
      t->rt_event->data = t;
1526
      t->gc_time = now;
1527
    }
1528
}
1529

    
1530
/**
1531
 * rt_init - initialize routing tables
1532
 *
1533
 * This function is called during BIRD startup. It initializes the
1534
 * routing table module.
1535
 */
1536
void
1537
rt_init(void)
1538
{
1539
  rta_init();
1540
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1541
  rte_update_pool = lp_new(rt_table_pool, 4080);
1542
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1543
  init_list(&routing_tables);
1544
}
1545

    
1546

    
1547
static int
1548
rt_prune_step(rtable *tab, int *limit)
1549
{
1550
  struct fib_iterator *fit = &tab->prune_fit;
1551

    
1552
  DBG("Pruning route table %s\n", tab->name);
1553
#ifdef DEBUGGING
1554
  fib_check(&tab->fib);
1555
#endif
1556

    
1557
  if (tab->prune_state == RPS_NONE)
1558
    return 1;
1559

    
1560
  if (tab->prune_state == RPS_SCHEDULED)
1561
    {
1562
      FIB_ITERATE_INIT(fit, &tab->fib);
1563
      tab->prune_state = RPS_RUNNING;
1564
    }
1565

    
1566
again:
1567
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1568
    {
1569
      rte *e;
1570

    
1571
    rescan:
1572
      for (e=n->routes; e; e=e->next)
1573
        if (e->sender->proto->flushing || (e->flags & REF_DISCARD))
1574
          {
1575
            if (*limit <= 0)
1576
              {
1577
                FIB_ITERATE_PUT(fit);
1578
                return 0;
1579
              }
1580

    
1581
            rte_discard(tab, e);
1582
            (*limit)--;
1583

    
1584
            goto rescan;
1585
          }
1586
      if (!n->routes)                /* Orphaned FIB entry */
1587
        {
1588
          FIB_ITERATE_PUT(fit);
1589
          fib_delete(&tab->fib, n);
1590
          goto again;
1591
        }
1592
    }
1593
  FIB_ITERATE_END;
1594

    
1595
#ifdef DEBUGGING
1596
  fib_check(&tab->fib);
1597
#endif
1598

    
1599
  tab->prune_state = RPS_NONE;
1600
  return 1;
1601
}
1602

    
1603
/**
1604
 * rt_prune_table - prune a routing table
1605
 *
1606
 * This function scans the routing table @tab and removes routes belonging to
1607
 * flushing protocols, discarded routes and also stale network entries, in a
1608
 * similar fashion like rt_prune_loop(). Returns 1 when all such routes are
1609
 * pruned. Contrary to rt_prune_loop(), this function is not a part of the
1610
 * protocol flushing loop, but it is called from rt_event() for just one routing
1611
 * table.
1612
 *
1613
 * Note that rt_prune_table() and rt_prune_loop() share (for each table) the
1614
 * prune state (@prune_state) and also the pruning iterator (@prune_fit).
1615
 */
1616
static inline int
1617
rt_prune_table(rtable *tab)
1618
{
1619
  int limit = 512;
1620
  return rt_prune_step(tab, &limit);
1621
}
1622

    
1623
/**
1624
 * rt_prune_loop - prune routing tables
1625
 *
1626
 * The prune loop scans routing tables and removes routes belonging to flushing
1627
 * protocols, discarded routes and also stale network entries. Returns 1 when
1628
 * all such routes are pruned. It is a part of the protocol flushing loop.
1629
 */
1630
int
1631
rt_prune_loop(void)
1632
{
1633
  int limit = 512;
1634
  rtable *t;
1635

    
1636
  WALK_LIST(t, routing_tables)
1637
    if (! rt_prune_step(t, &limit))
1638
      return 0;
1639

    
1640
  return 1;
1641
}
1642

    
1643
void
1644
rt_preconfig(struct config *c)
1645
{
1646
  struct symbol *s = cf_get_symbol("master");
1647

    
1648
  init_list(&c->tables);
1649
  c->master_rtc = rt_new_table(s, NET_IP4);
1650
}
1651

    
1652

    
1653
/* 
1654
 * Some functions for handing internal next hop updates
1655
 * triggered by rt_schedule_nhu().
1656
 */
1657

    
1658
static inline int
1659
rta_next_hop_outdated(rta *a)
1660
{
1661
  struct hostentry *he = a->hostentry;
1662

    
1663
  if (!he)
1664
    return 0;
1665

    
1666
  if (!he->src)
1667
    return a->dest != RTD_UNREACHABLE;
1668

    
1669
  return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1670
    (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1671
    !mpnh_same(a->nexthops, he->src->nexthops);
1672
}
1673

    
1674
static inline void
1675
rta_apply_hostentry(rta *a, struct hostentry *he)
1676
{
1677
  a->hostentry = he;
1678
  a->iface = he->src ? he->src->iface : NULL;
1679
  a->gw = he->gw;
1680
  a->dest = he->dest;
1681
  a->igp_metric = he->igp_metric;
1682
  a->nexthops = he->src ? he->src->nexthops : NULL;
1683
}
1684

    
1685
static inline rte *
1686
rt_next_hop_update_rte(rtable *tab, rte *old)
1687
{
1688
  rta a;
1689
  memcpy(&a, old->attrs, sizeof(rta));
1690
  rta_apply_hostentry(&a, old->attrs->hostentry);
1691
  a.aflags = 0;
1692

    
1693
  rte *e = sl_alloc(rte_slab);
1694
  memcpy(e, old, sizeof(rte));
1695
  e->attrs = rta_lookup(&a);
1696

    
1697
  return e;
1698
}
1699

    
1700
static inline int
1701
rt_next_hop_update_net(rtable *tab, net *n)
1702
{
1703
  rte **k, *e, *new, *old_best, **new_best;
1704
  int count = 0;
1705
  int free_old_best = 0;
1706

    
1707
  old_best = n->routes;
1708
  if (!old_best)
1709
    return 0;
1710

    
1711
  for (k = &n->routes; e = *k; k = &e->next)
1712
    if (rta_next_hop_outdated(e->attrs))
1713
      {
1714
        new = rt_next_hop_update_rte(tab, e);
1715
        *k = new;
1716

    
1717
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1718
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1719

    
1720
        /* Call a pre-comparison hook */
1721
        /* Not really an efficient way to compute this */
1722
        if (e->attrs->src->proto->rte_recalculate)
1723
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1724

    
1725
        if (e != old_best)
1726
          rte_free_quick(e);
1727
        else /* Freeing of the old best rte is postponed */
1728
          free_old_best = 1;
1729

    
1730
        e = new;
1731
        count++;
1732
      }
1733

    
1734
  if (!count)
1735
    return 0;
1736

    
1737
  /* Find the new best route */
1738
  new_best = NULL;
1739
  for (k = &n->routes; e = *k; k = &e->next)
1740
    {
1741
      if (!new_best || rte_better(e, *new_best))
1742
        new_best = k;
1743
    }
1744

    
1745
  /* Relink the new best route to the first position */
1746
  new = *new_best;
1747
  if (new != n->routes)
1748
    {
1749
      *new_best = new->next;
1750
      new->next = n->routes;
1751
      n->routes = new;
1752
    }
1753

    
1754
  /* Announce the new best route */
1755
  if (new != old_best)
1756
    {
1757
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1758
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1759
    }
1760

    
1761
  /* FIXME: Better announcement of merged routes */
1762
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1763

    
1764
   if (free_old_best)
1765
    rte_free_quick(old_best);
1766

    
1767
  return count;
1768
}
1769

    
1770
static void
1771
rt_next_hop_update(rtable *tab)
1772
{
1773
  struct fib_iterator *fit = &tab->nhu_fit;
1774
  int max_feed = 32;
1775

    
1776
  if (tab->nhu_state == 0)
1777
    return;
1778

    
1779
  if (tab->nhu_state == 1)
1780
    {
1781
      FIB_ITERATE_INIT(fit, &tab->fib);
1782
      tab->nhu_state = 2;
1783
    }
1784

    
1785
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1786
    {
1787
      if (max_feed <= 0)
1788
        {
1789
          FIB_ITERATE_PUT(fit);
1790
          ev_schedule(tab->rt_event);
1791
          return;
1792
        }
1793
      max_feed -= rt_next_hop_update_net(tab, n);
1794
    }
1795
  FIB_ITERATE_END;
1796

    
1797
  /* state change 2->0, 3->1 */
1798
  tab->nhu_state &= 1;
1799

    
1800
  if (tab->nhu_state > 0)
1801
    ev_schedule(tab->rt_event);
1802
}
1803

    
1804

    
1805
struct rtable_config *
1806
rt_new_table(struct symbol *s, uint addr_type)
1807
{
1808
  /* Hack that allows to 'redefine' the master table */
1809
  if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc))
1810
    return s->def;
1811

    
1812
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1813

    
1814
  cf_define_symbol(s, SYM_TABLE, c);
1815
  c->name = s->name;
1816
  c->addr_type = addr_type;
1817
  add_tail(&new_config->tables, &c->n);
1818
  c->gc_max_ops = 1000;
1819
  c->gc_min_time = 5;
1820
  return c;
1821
}
1822

    
1823
/**
1824
 * rt_lock_table - lock a routing table
1825
 * @r: routing table to be locked
1826
 *
1827
 * Lock a routing table, because it's in use by a protocol,
1828
 * preventing it from being freed when it gets undefined in a new
1829
 * configuration.
1830
 */
1831
void
1832
rt_lock_table(rtable *r)
1833
{
1834
  r->use_count++;
1835
}
1836

    
1837
/**
1838
 * rt_unlock_table - unlock a routing table
1839
 * @r: routing table to be unlocked
1840
 *
1841
 * Unlock a routing table formerly locked by rt_lock_table(),
1842
 * that is decrease its use count and delete it if it's scheduled
1843
 * for deletion by configuration changes.
1844
 */
1845
void
1846
rt_unlock_table(rtable *r)
1847
{
1848
  if (!--r->use_count && r->deleted)
1849
    {
1850
      struct config *conf = r->deleted;
1851
      DBG("Deleting routing table %s\n", r->name);
1852
      r->config->table = NULL;
1853
      if (r->hostcache)
1854
        rt_free_hostcache(r);
1855
      rem_node(&r->n);
1856
      fib_free(&r->fib);
1857
      rfree(r->rt_event);
1858
      mb_free(r);
1859
      config_del_obstacle(conf);
1860
    }
1861
}
1862

    
1863
/**
1864
 * rt_commit - commit new routing table configuration
1865
 * @new: new configuration
1866
 * @old: original configuration or %NULL if it's boot time config
1867
 *
1868
 * Scan differences between @old and @new configuration and modify
1869
 * the routing tables according to these changes. If @new defines a
1870
 * previously unknown table, create it, if it omits a table existing
1871
 * in @old, schedule it for deletion (it gets deleted when all protocols
1872
 * disconnect from it by calling rt_unlock_table()), if it exists
1873
 * in both configurations, leave it unchanged.
1874
 */
1875
void
1876
rt_commit(struct config *new, struct config *old)
1877
{
1878
  struct rtable_config *o, *r;
1879

    
1880
  DBG("rt_commit:\n");
1881
  if (old)
1882
    {
1883
      WALK_LIST(o, old->tables)
1884
        {
1885
          rtable *ot = o->table;
1886
          if (!ot->deleted)
1887
            {
1888
              struct symbol *sym = cf_find_symbol(new, o->name);
1889
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
1890
                {
1891
                  DBG("\t%s: same\n", o->name);
1892
                  r = sym->def;
1893
                  r->table = ot;
1894
                  ot->name = r->name;
1895
                  ot->config = r;
1896
                  if (o->sorted != r->sorted)
1897
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1898
                }
1899
              else
1900
                {
1901
                  DBG("\t%s: deleted\n", o->name);
1902
                  ot->deleted = old;
1903
                  config_add_obstacle(old);
1904
                  rt_lock_table(ot);
1905
                  rt_unlock_table(ot);
1906
                }
1907
            }
1908
        }
1909
    }
1910

    
1911
  WALK_LIST(r, new->tables)
1912
    if (!r->table)
1913
      {
1914
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1915
        DBG("\t%s: created\n", r->name);
1916
        rt_setup(rt_table_pool, t, r->name, r);
1917
        add_tail(&routing_tables, &t->n);
1918
        r->table = t;
1919
      }
1920
  DBG("\tdone\n");
1921
}
1922

    
1923
static inline void
1924
do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
1925
{
1926
  rte_update_lock();
1927
  if (type == RA_ACCEPTED)
1928
    rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
1929
  else if (type == RA_MERGED)
1930
    rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
1931
  else
1932
    rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
1933
  rte_update_unlock();
1934
}
1935

    
1936
/**
1937
 * rt_feed_baby - advertise routes to a new protocol
1938
 * @p: protocol to be fed
1939
 *
1940
 * This function performs one pass of advertisement of routes to a newly
1941
 * initialized protocol. It's called by the protocol code as long as it
1942
 * has something to do. (We avoid transferring all the routes in single
1943
 * pass in order not to monopolize CPU time.)
1944
 */
1945
int
1946
rt_feed_baby(struct proto *p)
1947
{
1948
  struct announce_hook *h;
1949
  struct fib_iterator *fit;
1950
  int max_feed = 256;
1951

    
1952
  if (!p->feed_ahook)                        /* Need to initialize first */
1953
    {
1954
      if (!p->ahooks)
1955
        return 1;
1956
      DBG("Announcing routes to new protocol %s\n", p->name);
1957
      p->feed_ahook = p->ahooks;
1958
      fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator));
1959
      goto next_hook;
1960
    }
1961
  fit = p->feed_iterator;
1962

    
1963
again:
1964
  h = p->feed_ahook;
1965
  FIB_ITERATE_START(&h->table->fib, fit, net, n)
1966
    {
1967
      rte *e = n->routes;
1968
      if (max_feed <= 0)
1969
        {
1970
          FIB_ITERATE_PUT(fit);
1971
          return 0;
1972
        }
1973

    
1974
      /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
1975

    
1976
      if ((p->accept_ra_types == RA_OPTIMAL) ||
1977
          (p->accept_ra_types == RA_ACCEPTED) ||
1978
          (p->accept_ra_types == RA_MERGED))
1979
        if (rte_is_valid(e))
1980
          {
1981
            if (p->export_state != ES_FEEDING)
1982
              return 1;  /* In the meantime, the protocol fell down. */
1983

    
1984
            do_feed_baby(p, p->accept_ra_types, h, n, e);
1985
            max_feed--;
1986
          }
1987

    
1988
      if (p->accept_ra_types == RA_ANY)
1989
        for(e = n->routes; e; e = e->next)
1990
          {
1991
            if (p->export_state != ES_FEEDING)
1992
              return 1;  /* In the meantime, the protocol fell down. */
1993

    
1994
            if (!rte_is_valid(e))
1995
              continue;
1996

    
1997
            do_feed_baby(p, RA_ANY, h, n, e);
1998
            max_feed--;
1999
          }
2000
    }
2001
  FIB_ITERATE_END;
2002
  p->feed_ahook = h->next;
2003
  if (!p->feed_ahook)
2004
    {
2005
      mb_free(p->feed_iterator);
2006
      p->feed_iterator = NULL;
2007
      return 1;
2008
    }
2009

    
2010
next_hook:
2011
  h = p->feed_ahook;
2012
  FIB_ITERATE_INIT(fit, &h->table->fib);
2013
  goto again;
2014
}
2015

    
2016
/**
2017
 * rt_feed_baby_abort - abort protocol feeding
2018
 * @p: protocol
2019
 *
2020
 * This function is called by the protocol code when the protocol
2021
 * stops or ceases to exist before the last iteration of rt_feed_baby()
2022
 * has finished.
2023
 */
2024
void
2025
rt_feed_baby_abort(struct proto *p)
2026
{
2027
  if (p->feed_ahook)
2028
    {
2029
      /* Unlink the iterator and exit */
2030
      fit_get(&p->feed_ahook->table->fib, p->feed_iterator);
2031
      p->feed_ahook = NULL;
2032
    }
2033
}
2034

    
2035

    
2036
static inline unsigned
2037
ptr_hash(void *ptr)
2038
{
2039
  uintptr_t p = (uintptr_t) ptr;
2040
  return p ^ (p << 8) ^ (p >> 16);
2041
}
2042

    
2043
static inline unsigned
2044
hc_hash(ip_addr a, rtable *dep)
2045
{
2046
  return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff;
2047
}
2048

    
2049
static inline void
2050
hc_insert(struct hostcache *hc, struct hostentry *he)
2051
{
2052
  uint k = he->hash_key >> hc->hash_shift;
2053
  he->next = hc->hash_table[k];
2054
  hc->hash_table[k] = he;
2055
}
2056

    
2057
static inline void
2058
hc_remove(struct hostcache *hc, struct hostentry *he)
2059
{
2060
  struct hostentry **hep;
2061
  uint k = he->hash_key >> hc->hash_shift;
2062

    
2063
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2064
  *hep = he->next;
2065
}
2066

    
2067
#define HC_DEF_ORDER 10
2068
#define HC_HI_MARK *4
2069
#define HC_HI_STEP 2
2070
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2071
#define HC_LO_MARK /5
2072
#define HC_LO_STEP 2
2073
#define HC_LO_ORDER 10
2074

    
2075
static void
2076
hc_alloc_table(struct hostcache *hc, unsigned order)
2077
{
2078
  unsigned hsize = 1 << order;
2079
  hc->hash_order = order;
2080
  hc->hash_shift = 16 - order;
2081
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
2082
  hc->hash_min = (order <= HC_LO_ORDER) ?  0 : (hsize HC_LO_MARK);
2083

    
2084
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2085
}
2086

    
2087
static void
2088
hc_resize(struct hostcache *hc, unsigned new_order)
2089
{
2090
  unsigned old_size = 1 << hc->hash_order;
2091
  struct hostentry **old_table = hc->hash_table;
2092
  struct hostentry *he, *hen;
2093
  int i;
2094

    
2095
  hc_alloc_table(hc, new_order);
2096
  for (i = 0; i < old_size; i++)
2097
    for (he = old_table[i]; he != NULL; he=hen)
2098
      {
2099
        hen = he->next;
2100
        hc_insert(hc, he);
2101
      }
2102
  mb_free(old_table);
2103
}
2104

    
2105
static struct hostentry *
2106
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2107
{
2108
  struct hostentry *he = sl_alloc(hc->slab);
2109

    
2110
  he->addr = a;
2111
  he->link = ll;
2112
  he->tab = dep;
2113
  he->hash_key = k;
2114
  he->uc = 0;
2115
  he->src = NULL;
2116

    
2117
  add_tail(&hc->hostentries, &he->ln);
2118
  hc_insert(hc, he);
2119

    
2120
  hc->hash_items++;
2121
  if (hc->hash_items > hc->hash_max)
2122
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2123

    
2124
  return he;
2125
}
2126

    
2127
static void
2128
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2129
{
2130
  rta_free(he->src);
2131

    
2132
  rem_node(&he->ln);
2133
  hc_remove(hc, he);
2134
  sl_free(hc->slab, he);
2135

    
2136
  hc->hash_items--;
2137
  if (hc->hash_items < hc->hash_min)
2138
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2139
}
2140

    
2141
static void
2142
rt_init_hostcache(rtable *tab)
2143
{
2144
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2145
  init_list(&hc->hostentries);
2146

    
2147
  hc->hash_items = 0;
2148
  hc_alloc_table(hc, HC_DEF_ORDER);
2149
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2150

    
2151
  hc->lp = lp_new(rt_table_pool, 1008);
2152
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2153

    
2154
  tab->hostcache = hc;
2155
}
2156

    
2157
static void
2158
rt_free_hostcache(rtable *tab)
2159
{
2160
  struct hostcache *hc = tab->hostcache;
2161

    
2162
  node *n;
2163
  WALK_LIST(n, hc->hostentries)
2164
    {
2165
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2166
      rta_free(he->src);
2167

    
2168
      if (he->uc)
2169
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2170
    }
2171

    
2172
  rfree(hc->slab);
2173
  rfree(hc->lp);
2174
  mb_free(hc->hash_table);
2175
  mb_free(hc);
2176
}
2177

    
2178
static void
2179
rt_notify_hostcache(rtable *tab, net *net)
2180
{
2181
  struct hostcache *hc = tab->hostcache;
2182

    
2183
  if (tab->hcu_scheduled)
2184
    return;
2185

    
2186
  // XXXX
2187
  // if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen))
2188
  // rt_schedule_hcu(tab);
2189
}
2190

    
2191
static int
2192
if_local_addr(ip_addr a, struct iface *i)
2193
{
2194
  struct ifa *b;
2195

    
2196
  WALK_LIST(b, i->addrs)
2197
    if (ipa_equal(a, b->ip))
2198
      return 1;
2199

    
2200
  return 0;
2201
}
2202

    
2203
static u32 
2204
rt_get_igp_metric(rte *rt)
2205
{
2206
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2207

    
2208
  if (ea)
2209
    return ea->u.data;
2210

    
2211
  rta *a = rt->attrs;
2212

    
2213
#ifdef CONFIG_OSPF
2214
  if ((a->source == RTS_OSPF) ||
2215
      (a->source == RTS_OSPF_IA) ||
2216
      (a->source == RTS_OSPF_EXT1))
2217
    return rt->u.ospf.metric1;
2218
#endif
2219

    
2220
#ifdef CONFIG_RIP
2221
  if (a->source == RTS_RIP)
2222
    return rt->u.rip.metric;
2223
#endif
2224

    
2225
  /* Device routes */
2226
  if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2227
    return 0;
2228

    
2229
  return IGP_METRIC_UNKNOWN;
2230
}
2231

    
2232
static int
2233
rt_update_hostentry(rtable *tab, struct hostentry *he)
2234
{
2235
  rta *old_src = he->src;
2236
  int pxlen = 0;
2237

    
2238
  /* Reset the hostentry */ 
2239
  he->src = NULL;
2240
  he->gw = IPA_NONE;
2241
  he->dest = RTD_UNREACHABLE;
2242
  he->igp_metric = 0;
2243

    
2244
  // XXXX
2245
  // net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
2246
  net *n = NULL;
2247
  if (n)
2248
    {
2249
      rte *e = n->routes;
2250
      rta *a = e->attrs;
2251
      pxlen = n->n.addr->pxlen;
2252

    
2253
      if (a->hostentry)
2254
        {
2255
          /* Recursive route should not depend on another recursive route */
2256
          log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2257
              he->addr, n->n.addr);
2258
          goto done;
2259
        }
2260

    
2261
      if (a->dest == RTD_DEVICE)
2262
        {
2263
          if (if_local_addr(he->addr, a->iface))
2264
            {
2265
              /* The host address is a local address, this is not valid */
2266
              log(L_WARN "Next hop address %I is a local address of iface %s",
2267
                  he->addr, a->iface->name);
2268
              goto done;
2269
                  }
2270

    
2271
          /* The host is directly reachable, use link as a gateway */
2272
          he->gw = he->link;
2273
          he->dest = RTD_ROUTER;
2274
        }
2275
      else
2276
        {
2277
          /* The host is reachable through some route entry */
2278
          he->gw = a->gw;
2279
          he->dest = a->dest;
2280
        }
2281

    
2282
      he->src = rta_clone(a);
2283
      he->igp_metric = rt_get_igp_metric(e);
2284
    }
2285

    
2286
 done:
2287
  /* Add a prefix range to the trie */
2288
  /* XXXX
2289
  if (ipa_is_ip4(he->addr))
2290
    trie_add_prefix(tab->hostcache->trie, he->addr, IP4_MAX_PREFIX_LENGTH, pxlen, IP4_MAX_PREFIX_LENGTH);
2291
  else
2292
    trie_add_prefix(tab->hostcache->trie, he->addr, IP6_MAX_PREFIX_LENGTH, pxlen, IP6_MAX_PREFIX_LENGTH);
2293
  */
2294

    
2295
  rta_free(old_src);
2296
  return old_src != he->src;
2297
}
2298

    
2299
static void
2300
rt_update_hostcache(rtable *tab)
2301
{
2302
  struct hostcache *hc = tab->hostcache;
2303
  struct hostentry *he;
2304
  node *n, *x;
2305

    
2306
  /* Reset the trie */
2307
  lp_flush(hc->lp);
2308
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2309

    
2310
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2311
    {
2312
      he = SKIP_BACK(struct hostentry, ln, n);
2313
      if (!he->uc)
2314
        {
2315
          hc_delete_hostentry(hc, he);
2316
          continue;
2317
        }
2318

    
2319
      if (rt_update_hostentry(tab, he))
2320
        rt_schedule_nhu(he->tab);
2321
    }
2322

    
2323
  tab->hcu_scheduled = 0;
2324
}
2325

    
2326
static struct hostentry *
2327
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2328
{
2329
  struct hostentry *he;
2330

    
2331
  if (!tab->hostcache)
2332
    rt_init_hostcache(tab);
2333

    
2334
  uint k = hc_hash(a, dep);
2335
  struct hostcache *hc = tab->hostcache;
2336
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2337
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2338
      return he;
2339

    
2340
  he = hc_new_hostentry(hc, a, ll, dep, k);
2341
  rt_update_hostentry(tab, he);
2342
  return he;
2343
}
2344

    
2345
void
2346
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2347
{
2348
  rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2349
}
2350

    
2351

    
2352
/*
2353
 *  CLI commands
2354
 */
2355

    
2356
static void
2357
rt_format_via(rte *e, byte *via)
2358
{
2359
  rta *a = e->attrs;
2360

    
2361
  switch (a->dest)
2362
    {
2363
    case RTD_ROUTER:        bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2364
    case RTD_DEVICE:        bsprintf(via, "dev %s", a->iface->name); break;
2365
    case RTD_BLACKHOLE:        bsprintf(via, "blackhole"); break;
2366
    case RTD_UNREACHABLE:        bsprintf(via, "unreachable"); break;
2367
    case RTD_PROHIBIT:        bsprintf(via, "prohibited"); break;
2368
    case RTD_MULTIPATH:        bsprintf(via, "multipath"); break;
2369
    default:                bsprintf(via, "???");
2370
    }
2371
}
2372

    
2373
static void
2374
rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2375
{
2376
  byte via[IPA_MAX_TEXT_LENGTH+32];
2377
  byte from[IPA_MAX_TEXT_LENGTH+8];
2378
  byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2379
  rta *a = e->attrs;
2380
  int primary = (e->net->routes == e);
2381
  int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2382
  void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2383
  struct mpnh *nh;
2384

    
2385
  rt_format_via(e, via);
2386
  tm_format_datetime(tm, &config->tf_route, e->lastmod);
2387
  if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2388
    bsprintf(from, " from %I", a->from);
2389
  else
2390
    from[0] = 0;
2391

    
2392
  get_route_info = a->src->proto->proto->get_route_info;
2393
  if (get_route_info || d->verbose)
2394
    {
2395
      /* Need to normalize the extended attributes */
2396
      ea_list *t = tmpa;
2397
      t = ea_append(t, a->eattrs);
2398
      tmpa = alloca(ea_scan(t));
2399
      ea_merge(t, tmpa);
2400
      ea_sort(tmpa);
2401
    }
2402
  if (get_route_info)
2403
    get_route_info(e, info, tmpa);
2404
  else
2405
    bsprintf(info, " (%d)", e->pref);
2406
  cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name,
2407
             tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2408
  for (nh = a->nexthops; nh; nh = nh->next)
2409
    cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2410
  if (d->verbose)
2411
    rta_show(c, a, tmpa);
2412
}
2413

    
2414
static void
2415
rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2416
{
2417
  rte *e, *ee;
2418
  byte ia[NET_MAX_TEXT_LENGTH+1];
2419
  struct ea_list *tmpa;
2420
  struct announce_hook *a = NULL;
2421
  int first = 1;
2422
  int pass = 0;
2423

    
2424
  bsprintf(ia, "%N", n->n.addr);
2425

    
2426
  if (d->export_mode)
2427
    {
2428
      if (! d->export_protocol->rt_notify)
2429
        return;
2430

    
2431
      a = proto_find_announce_hook(d->export_protocol, d->table);
2432
      if (!a)
2433
        return;
2434
    }
2435

    
2436
  for (e = n->routes; e; e = e->next)
2437
    {
2438
      if (rte_is_filtered(e) != d->filtered)
2439
        continue;
2440

    
2441
      d->rt_counter++;
2442
      d->net_counter += first;
2443
      first = 0;
2444

    
2445
      if (pass)
2446
        continue;
2447

    
2448
      ee = e;
2449
      rte_update_lock();                /* We use the update buffer for filtering */
2450
      tmpa = make_tmp_attrs(e, rte_update_pool);
2451

    
2452
      /* Special case for merged export */
2453
      if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
2454
        {
2455
          rte *rt_free;
2456
          e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
2457
          pass = 1;
2458

    
2459
          if (!e)
2460
          { e = ee; goto skip; }
2461
        }
2462
      else if (d->export_mode)
2463
        {
2464
          struct proto *ep = d->export_protocol;
2465
          int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2466

    
2467
          if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
2468
            pass = 1;
2469

    
2470
          if (ic < 0)
2471
            goto skip;
2472

    
2473
          if (d->export_mode > RSEM_PREEXPORT)
2474
            {
2475
              /*
2476
               * FIXME - This shows what should be exported according to current
2477
               * filters, but not what was really exported. 'configure soft'
2478
               * command may change the export filter and do not update routes.
2479
               */
2480
              int do_export = (ic > 0) ||
2481
                (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2482

    
2483
              if (do_export != (d->export_mode == RSEM_EXPORT))
2484
                goto skip;
2485

    
2486
              if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED))
2487
                pass = 1;
2488
            }
2489
        }
2490

    
2491
      if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2492
        goto skip;
2493

    
2494
      if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2495
        goto skip;
2496

    
2497
      d->show_counter++;
2498
      if (d->stats < 2)
2499
        rt_show_rte(c, ia, e, d, tmpa);
2500
      ia[0] = 0;
2501

    
2502
    skip:
2503
      if (e != ee)
2504
      {
2505
        rte_free(e);
2506
        e = ee;
2507
      }
2508
      rte_update_unlock();
2509

    
2510
      if (d->primary_only)
2511
        break;
2512
    }
2513
}
2514

    
2515
static void
2516
rt_show_cont(struct cli *c)
2517
{
2518
  struct rt_show_data *d = c->rover;
2519
#ifdef DEBUGGING
2520
  unsigned max = 4;
2521
#else
2522
  unsigned max = 64;
2523
#endif
2524
  struct fib *fib = &d->table->fib;
2525
  struct fib_iterator *it = &d->fit;
2526

    
2527
  FIB_ITERATE_START(fib, it, net, n)
2528
    {
2529
      if (d->running_on_config && d->running_on_config != config)
2530
        {
2531
          cli_printf(c, 8004, "Stopped due to reconfiguration");
2532
          goto done;
2533
        }
2534
      if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
2535
        {
2536
          cli_printf(c, 8005, "Protocol is down");
2537
          goto done;
2538
        }
2539
      if (!max--)
2540
        {
2541
          FIB_ITERATE_PUT(it);
2542
          return;
2543
        }
2544
      rt_show_net(c, n, d);
2545
    }
2546
  FIB_ITERATE_END;
2547
  if (d->stats)
2548
    cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2549
  else
2550
    cli_printf(c, 0, "");
2551
done:
2552
  c->cont = c->cleanup = NULL;
2553
}
2554

    
2555
static void
2556
rt_show_cleanup(struct cli *c)
2557
{
2558
  struct rt_show_data *d = c->rover;
2559

    
2560
  /* Unlink the iterator */
2561
  fit_get(&d->table->fib, &d->fit);
2562
}
2563

    
2564
void
2565
rt_show(struct rt_show_data *d)
2566
{
2567
  net *n;
2568

    
2569
  /* Default is either a master table or a table related to a respective protocol */
2570
  if (!d->table && d->export_protocol) d->table = d->export_protocol->table;
2571
  if (!d->table && d->show_protocol) d->table = d->show_protocol->table;
2572
  if (!d->table) d->table = config->master_rtc->table;
2573

    
2574
  /* Filtered routes are neither exported nor have sensible ordering */
2575
  if (d->filtered && (d->export_mode || d->primary_only))
2576
    cli_msg(0, "");
2577

    
2578
  if (!d->prefix)
2579
    {
2580
      FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2581
      this_cli->cont = rt_show_cont;
2582
      this_cli->cleanup = rt_show_cleanup;
2583
      this_cli->rover = d;
2584
    }
2585
  else
2586
    {
2587
      /* XXXX 
2588
      if (d->show_for)
2589
        n = net_route(d->table, d->prefix, d->pxlen);
2590
      else
2591
        n = net_find(d->table, d->prefix, d->pxlen);
2592
      */
2593
      n = NULL;
2594

    
2595
      if (n)
2596
        rt_show_net(this_cli, n, d);
2597

    
2598
      if (d->rt_counter)
2599
        cli_msg(0, "");
2600
      else
2601
        cli_msg(8001, "Network not in table");
2602
    }
2603
}
2604

    
2605
/*
2606
 *  Documentation for functions declared inline in route.h
2607
 */
2608
#if 0
2609

2610
/**
2611
 * net_find - find a network entry
2612
 * @tab: a routing table
2613
 * @addr: address of the network
2614
 *
2615
 * net_find() looks up the given network in routing table @tab and
2616
 * returns a pointer to its &net entry or %NULL if no such network
2617
 * exists.
2618
 */
2619
static inline net *net_find(rtable *tab, net_addr *addr)
2620
{ DUMMY; }
2621

2622
/**
2623
 * net_get - obtain a network entry
2624
 * @tab: a routing table
2625
 * @addr: address of the network
2626
 *
2627
 * net_get() looks up the given network in routing table @tab and
2628
 * returns a pointer to its &net entry. If no such entry exists, it's
2629
 * created.
2630
 */
2631
static inline net *net_get(rtable *tab, net_addr *addr)
2632
{ DUMMY; }
2633

2634
/**
2635
 * rte_cow - copy a route for writing
2636
 * @r: a route entry to be copied
2637
 *
2638
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2639
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2640
 * just returned unchanged, else a new temporary entry with the same contents
2641
 * is created.
2642
 *
2643
 * The primary use of this function is inside the filter machinery -- when
2644
 * a filter wants to modify &rte contents (to change the preference or to
2645
 * attach another set of attributes), it must ensure that the &rte is not
2646
 * shared with anyone else (and especially that it isn't stored in any routing
2647
 * table).
2648
 *
2649
 * Result: a pointer to the new writable &rte.
2650
 */
2651
static inline rte * rte_cow(rte *r)
2652
{ DUMMY; }
2653

2654
#endif