Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ fe9f1a6d

History | View | Annotate | Download (64.6 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/cli.h"
37
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
45

    
46
pool *rt_table_pool;
47

    
48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

    
51
static list routing_tables;
52

    
53
static void rt_format_via(rte *e, byte *via);
54
static void rt_free_hostcache(rtable *tab);
55
static void rt_notify_hostcache(rtable *tab, net *net);
56
static void rt_update_hostcache(rtable *tab);
57
static void rt_next_hop_update(rtable *tab);
58
static inline int rt_prune_table(rtable *tab);
59
static inline void rt_schedule_gc(rtable *tab);
60
static inline void rt_schedule_prune(rtable *tab);
61

    
62

    
63
static inline struct ea_list *
64
make_tmp_attrs(struct rte *rt, struct linpool *pool)
65
{
66
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
67
  mta = rt->attrs->src->proto->make_tmp_attrs;
68
  return mta ? mta(rt, rte_update_pool) : NULL;
69
}
70

    
71
/* Like fib_route(), but skips empty net entries */
72
/*
73
static net *
74
net_route(rtable *tab, ip_addr a, int len)
75
{
76
  ip_addr a0;
77
  net *n;
78

79
  while (len >= 0)
80
    {
81
      a0 = ipa_and(a, ipa_mkmask(len));
82
      n = fib_find(&tab->fib, &a0, len);
83
      if (n && rte_is_valid(n->routes))
84
        return n;
85
      len--;
86
    }
87
  return NULL;
88
}
89
*/
90

    
91
/**
92
 * rte_find - find a route
93
 * @net: network node
94
 * @src: route source
95
 *
96
 * The rte_find() function returns a route for destination @net
97
 * which is from route source @src.
98
 */
99
rte *
100
rte_find(net *net, struct rte_src *src)
101
{
102
  rte *e = net->routes;
103

    
104
  while (e && e->attrs->src != src)
105
    e = e->next;
106
  return e;
107
}
108

    
109
/**
110
 * rte_get_temp - get a temporary &rte
111
 * @a: attributes to assign to the new route (a &rta; in case it's
112
 * un-cached, rte_update() will create a cached copy automatically)
113
 *
114
 * Create a temporary &rte and bind it with the attributes @a.
115
 * Also set route preference to the default preference set for
116
 * the protocol.
117
 */
118
rte *
119
rte_get_temp(rta *a)
120
{
121
  rte *e = sl_alloc(rte_slab);
122

    
123
  e->attrs = a;
124
  e->flags = 0;
125
  e->pref = a->src->proto->preference;
126
  return e;
127
}
128

    
129
rte *
130
rte_do_cow(rte *r)
131
{
132
  rte *e = sl_alloc(rte_slab);
133

    
134
  memcpy(e, r, sizeof(rte));
135
  e->attrs = rta_clone(r->attrs);
136
  e->flags = 0;
137
  return e;
138
}
139

    
140
/**
141
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
142
 * @r: a route entry to be copied
143
 * @lp: a linpool from which to allocate &rta
144
 *
145
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
146
 * modification. There are three possibilities: First, both &rte and &rta are
147
 * private copies, in that case they are returned unchanged.  Second, &rte is
148
 * private copy, but &rta is cached, in that case &rta is duplicated using
149
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
150
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
151
 *
152
 * Note that in the second case, cached &rta loses one reference, while private
153
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
154
 * nexthops, ...) with it. To work properly, original shared &rta should have
155
 * another reference during the life of created private copy.
156
 *
157
 * Result: a pointer to the new writable &rte with writable &rta.
158
 */
159
rte *
160
rte_cow_rta(rte *r, linpool *lp)
161
{
162
  if (!rta_is_cached(r->attrs))
163
    return r;
164

    
165
  rte *e = rte_cow(r);
166
  rta *a = rta_do_cow(r->attrs, lp);
167
  rta_free(e->attrs);
168
  e->attrs = a;
169
  return e;
170
}
171

    
172
static int                                /* Actually better or at least as good as */
173
rte_better(rte *new, rte *old)
174
{
175
  int (*better)(rte *, rte *);
176

    
177
  if (!rte_is_valid(old))
178
    return 1;
179
  if (!rte_is_valid(new))
180
    return 0;
181

    
182
  if (new->pref > old->pref)
183
    return 1;
184
  if (new->pref < old->pref)
185
    return 0;
186
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
187
    {
188
      /*
189
       *  If the user has configured protocol preferences, so that two different protocols
190
       *  have the same preference, try to break the tie by comparing addresses. Not too
191
       *  useful, but keeps the ordering of routes unambiguous.
192
       */
193
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
194
    }
195
  if (better = new->attrs->src->proto->rte_better)
196
    return better(new, old);
197
  return 0;
198
}
199

    
200
static int
201
rte_mergable(rte *pri, rte *sec)
202
{
203
  int (*mergable)(rte *, rte *);
204

    
205
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
206
    return 0;
207

    
208
  if (pri->pref != sec->pref)
209
    return 0;
210

    
211
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
212
    return 0;
213

    
214
  if (mergable = pri->attrs->src->proto->rte_mergable)
215
    return mergable(pri, sec);
216

    
217
  return 0;
218
}
219

    
220
static void
221
rte_trace(struct proto *p, rte *e, int dir, char *msg)
222
{
223
  byte via[STD_ADDRESS_P_LENGTH+32];
224

    
225
  rt_format_via(e, via);
226
  log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, via);
227
}
228

    
229
static inline void
230
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
231
{
232
  if (p->debug & flag)
233
    rte_trace(p, e, '>', msg);
234
}
235

    
236
static inline void
237
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
238
{
239
  if (p->debug & flag)
240
    rte_trace(p, e, '<', msg);
241
}
242

    
243
static rte *
244
export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
245
{
246
  struct proto *p = ah->proto;
247
  struct filter *filter = ah->out_filter;
248
  struct proto_stats *stats = ah->stats;
249
  ea_list *tmpb = NULL;
250
  rte *rt;
251
  int v;
252

    
253
  rt = rt0;
254
  *rt_free = NULL;
255

    
256
  if (!tmpa)
257
    tmpa = &tmpb;
258

    
259
  *tmpa = make_tmp_attrs(rt, rte_update_pool);
260

    
261
  v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
262
  if (v < 0)
263
    {
264
      if (silent)
265
        goto reject;
266

    
267
      stats->exp_updates_rejected++;
268
      if (v == RIC_REJECT)
269
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
270
      goto reject;
271
    }
272
  if (v > 0)
273
    {
274
      if (!silent)
275
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
276
      goto accept;
277
    }
278

    
279
  v = filter && ((filter == FILTER_REJECT) ||
280
                 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
281
  if (v)
282
    {
283
      if (silent)
284
        goto reject;
285

    
286
      stats->exp_updates_filtered++;
287
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
288
      goto reject;
289
    }
290

    
291
 accept:
292
  if (rt != rt0)
293
    *rt_free = rt;
294
  return rt;
295

    
296
 reject:
297
  /* Discard temporary rte */
298
  if (rt != rt0)
299
    rte_free(rt);
300
  return NULL;
301
}
302

    
303
static void
304
do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
305
{
306
  struct proto *p = ah->proto;
307
  struct proto_stats *stats = ah->stats;
308

    
309

    
310
  /*
311
   * First, apply export limit.
312
   *
313
   * Export route limits has several problems. Because exp_routes
314
   * counter is reset before refeed, we don't really know whether
315
   * limit is breached and whether the update is new or not. Therefore
316
   * the number of really exported routes may exceed the limit
317
   * temporarily (routes exported before and new routes in refeed).
318
   *
319
   * Minor advantage is that if the limit is decreased and refeed is
320
   * requested, the number of exported routes really decrease.
321
   *
322
   * Second problem is that with export limits, we don't know whether
323
   * old was really exported (it might be blocked by limit). When a
324
   * withdraw is exported, we announce it even when the previous
325
   * update was blocked. This is not a big issue, but the same problem
326
   * is in updating exp_routes counter. Therefore, to be consistent in
327
   * increases and decreases of exp_routes, we count exported routes
328
   * regardless of blocking by limits.
329
   *
330
   * Similar problem is in handling updates - when a new route is
331
   * received and blocking is active, the route would be blocked, but
332
   * when an update for the route will be received later, the update
333
   * would be propagated (as old != NULL). Therefore, we have to block
334
   * also non-new updates (contrary to import blocking).
335
   */
336

    
337
  struct proto_limit *l = ah->out_limit;
338
  if (l && new)
339
    {
340
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
341
        proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
342

    
343
      if (l->state == PLS_BLOCKED)
344
        {
345
          stats->exp_routes++;        /* see note above */
346
          stats->exp_updates_rejected++;
347
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
348
          new = NULL;
349

    
350
          if (!old)
351
            return;
352
        }
353
    }
354

    
355

    
356
  if (new)
357
    stats->exp_updates_accepted++;
358
  else
359
    stats->exp_withdraws_accepted++;
360

    
361
  /* Hack: We do not decrease exp_routes during refeed, we instead
362
     reset exp_routes at the start of refeed. */
363
  if (new)
364
    stats->exp_routes++;
365
  if (old && !refeed)
366
    stats->exp_routes--;
367

    
368
  if (p->debug & D_ROUTES)
369
    {
370
      if (new && old)
371
        rte_trace_out(D_ROUTES, p, new, "replaced");
372
      else if (new)
373
        rte_trace_out(D_ROUTES, p, new, "added");
374
      else if (old)
375
        rte_trace_out(D_ROUTES, p, old, "removed");
376
    }
377
  if (!new)
378
    p->rt_notify(p, ah->table, net, NULL, old, NULL);
379
  else if (tmpa)
380
    {
381
      ea_list *t = tmpa;
382
      while (t->next)
383
        t = t->next;
384
      t->next = new->attrs->eattrs;
385
      p->rt_notify(p, ah->table, net, new, old, tmpa);
386
      t->next = NULL;
387
    }
388
  else
389
    p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
390
}
391

    
392
static void
393
rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed)
394
{
395
  struct proto *p = ah->proto;
396
  struct proto_stats *stats = ah->stats;
397

    
398
  rte *new = new0;
399
  rte *old = old0;
400
  rte *new_free = NULL;
401
  rte *old_free = NULL;
402
  ea_list *tmpa = NULL;
403

    
404
  if (new)
405
    stats->exp_updates_received++;
406
  else
407
    stats->exp_withdraws_received++;
408

    
409
  /*
410
   * This is a tricky part - we don't know whether route 'old' was
411
   * exported to protocol 'p' or was filtered by the export filter.
412
   * We try to run the export filter to know this to have a correct
413
   * value in 'old' argument of rte_update (and proper filter value)
414
   *
415
   * FIXME - this is broken because 'configure soft' may change
416
   * filters but keep routes. Refeed is expected to be called after
417
   * change of the filters and with old == new, therefore we do not
418
   * even try to run the filter on an old route, This may lead to
419
   * 'spurious withdraws' but ensure that there are no 'missing
420
   * withdraws'.
421
   *
422
   * This is not completely safe as there is a window between
423
   * reconfiguration and the end of refeed - if a newly filtered
424
   * route disappears during this period, proper withdraw is not
425
   * sent (because old would be also filtered) and the route is
426
   * not refeeded (because it disappeared before that).
427
   */
428

    
429
  if (new)
430
    new = export_filter(ah, new, &new_free, &tmpa, 0);
431

    
432
  if (old && !refeed)
433
    old = export_filter(ah, old, &old_free, NULL, 1);
434

    
435
  if (!new && !old)
436
  {
437
    /*
438
     * As mentioned above, 'old' value may be incorrect in some race conditions.
439
     * We generally ignore it with the exception of withdraw to pipe protocol.
440
     * In that case we rather propagate unfiltered withdraws regardless of
441
     * export filters to ensure that when a protocol is flushed, its routes are
442
     * removed from all tables. Possible spurious unfiltered withdraws are not
443
     * problem here as they are ignored if there is no corresponding route at
444
     * the other end of the pipe. We directly call rt_notify() hook instead of
445
     * do_rt_notify() to avoid logging and stat counters.
446
     */
447

    
448
#ifdef CONFIG_PIPE
449
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
450
      p->rt_notify(p, ah->table, net, NULL, old0, NULL);
451
#endif
452

    
453
    return;
454
  }
455

    
456
  do_rt_notify(ah, net, new, old, tmpa, refeed);
457

    
458
  /* Discard temporary rte's */
459
  if (new_free)
460
    rte_free(new_free);
461
  if (old_free)
462
    rte_free(old_free);
463
}
464

    
465
static void
466
rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
467
{
468
  // struct proto *p = ah->proto;
469
  struct proto_stats *stats = ah->stats;
470

    
471
  rte *r;
472
  rte *new_best = NULL;
473
  rte *old_best = NULL;
474
  rte *new_free = NULL;
475
  rte *old_free = NULL;
476
  ea_list *tmpa = NULL;
477

    
478
  /* Used to track whether we met old_changed position. If before_old is NULL
479
     old_changed was the first and we met it implicitly before current best route. */
480
  int old_meet = old_changed && !before_old;
481

    
482
  /* Note that before_old is either NULL or valid (not rejected) route.
483
     If old_changed is valid, before_old have to be too. If old changed route
484
     was not valid, caller must use NULL for both old_changed and before_old. */
485

    
486
  if (new_changed)
487
    stats->exp_updates_received++;
488
  else
489
    stats->exp_withdraws_received++;
490

    
491
  /* First, find the new_best route - first accepted by filters */
492
  for (r=net->routes; rte_is_valid(r); r=r->next)
493
    {
494
      if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
495
        break;
496

    
497
      /* Note if we walked around the position of old_changed route */
498
      if (r == before_old)
499
        old_meet = 1;
500
    }
501

    
502
  /* 
503
   * Second, handle the feed case. That means we do not care for
504
   * old_best. It is NULL for feed, and the new_best for refeed. 
505
   * For refeed, there is a hack similar to one in rt_notify_basic()
506
   * to ensure withdraws in case of changed filters
507
   */
508
  if (feed)
509
    {
510
      if (feed == 2)        /* refeed */
511
        old_best = new_best ? new_best :
512
          (rte_is_valid(net->routes) ? net->routes : NULL);
513
      else
514
        old_best = NULL;
515

    
516
      if (!new_best && !old_best)
517
        return;
518

    
519
      goto found;
520
    }
521

    
522
  /*
523
   * Now, we find the old_best route. Generally, it is the same as the
524
   * new_best, unless new_best is the same as new_changed or
525
   * old_changed is accepted before new_best.
526
   *
527
   * There are four cases:
528
   *
529
   * - We would find and accept old_changed before new_best, therefore
530
   *   old_changed is old_best. In remaining cases we suppose this
531
   *   is not true.
532
   *
533
   * - We found no new_best, therefore there is also no old_best and
534
   *   we ignore this withdraw.
535
   *
536
   * - We found new_best different than new_changed, therefore
537
   *   old_best is the same as new_best and we ignore this update.
538
   *
539
   * - We found new_best the same as new_changed, therefore it cannot
540
   *   be old_best and we have to continue search for old_best.
541
   */
542

    
543
  /* First case */
544
  if (old_meet)
545
    if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
546
      goto found;
547

    
548
  /* Second case */
549
  if (!new_best)
550
    return;
551

    
552
  /* Third case, we use r instead of new_best, because export_filter() could change it */
553
  if (r != new_changed)
554
    {
555
      if (new_free)
556
        rte_free(new_free);
557
      return;
558
    }
559

    
560
  /* Fourth case */
561
  for (r=r->next; rte_is_valid(r); r=r->next)
562
    {
563
      if (old_best = export_filter(ah, r, &old_free, NULL, 1))
564
        goto found;
565

    
566
      if (r == before_old)
567
        if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
568
          goto found;
569
    }
570

    
571
  /* Implicitly, old_best is NULL and new_best is non-NULL */
572

    
573
 found:
574
  do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));
575

    
576
  /* Discard temporary rte's */
577
  if (new_free)
578
    rte_free(new_free);
579
  if (old_free)
580
    rte_free(old_free);
581
}
582

    
583

    
584
static struct mpnh *
585
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
586
{
587
  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
588
  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
589
  return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
590
}
591

    
592
rte *
593
rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
594
{
595
  // struct proto *p = ah->proto;
596
  struct mpnh *nhs = NULL;
597
  rte *best0, *best, *rt0, *rt, *tmp;
598

    
599
  best0 = net->routes;
600
  *rt_free = NULL;
601

    
602
  if (!rte_is_valid(best0))
603
    return NULL;
604

    
605
  best = export_filter(ah, best0, rt_free, tmpa, silent);
606

    
607
  if (!best || !rte_is_reachable(best))
608
    return best;
609

    
610
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
611
  {
612
    if (!rte_mergable(best0, rt0))
613
      continue;
614

    
615
    rt = export_filter(ah, rt0, &tmp, NULL, 1);
616

    
617
    if (!rt)
618
      continue;
619

    
620
    if (rte_is_reachable(rt))
621
      nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
622

    
623
    if (tmp)
624
      rte_free(tmp);
625
  }
626

    
627
  if (nhs)
628
  {
629
    nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
630

    
631
    if (nhs->next)
632
    {
633
      best = rte_cow_rta(best, rte_update_pool);
634
      best->attrs->dest = RTD_MULTIPATH;
635
      best->attrs->nexthops = nhs;
636
    }
637
  }
638

    
639
  if (best != best0)
640
    *rt_free = best;
641

    
642
  return best;
643
}
644

    
645

    
646
static void
647
rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
648
                 rte *new_best, rte*old_best, int refeed)
649
{
650
  // struct proto *p = ah->proto;
651

    
652
  rte *new_best_free = NULL;
653
  rte *old_best_free = NULL;
654
  rte *new_changed_free = NULL;
655
  rte *old_changed_free = NULL;
656
  ea_list *tmpa = NULL;
657

    
658
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
659

    
660
  /* This check should be done by the caller */
661
  if (!new_best && !old_best)
662
    return;
663

    
664
  /* Check whether the change is relevant to the merged route */
665
  if ((new_best == old_best) && !refeed)
666
  {
667
    new_changed = rte_mergable(new_best, new_changed) ?
668
      export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
669

    
670
    old_changed = rte_mergable(old_best, old_changed) ?
671
      export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
672

    
673
    if (!new_changed && !old_changed)
674
      return;
675
  }
676

    
677
  if (new_best)
678
    ah->stats->exp_updates_received++;
679
  else
680
    ah->stats->exp_withdraws_received++;
681

    
682
  /* Prepare new merged route */
683
  if (new_best)
684
    new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
685

    
686
  /* Prepare old merged route (without proper merged next hops) */
687
  /* There are some issues with running filter on old route - see rt_notify_basic() */
688
  if (old_best && !refeed)
689
    old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
690

    
691
  if (new_best || old_best)
692
    do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
693

    
694
  /* Discard temporary rte's */
695
  if (new_best_free)
696
    rte_free(new_best_free);
697
  if (old_best_free)
698
    rte_free(old_best_free);
699
  if (new_changed_free)
700
    rte_free(new_changed_free);
701
  if (old_changed_free)
702
    rte_free(old_changed_free);
703
}
704

    
705

    
706
/**
707
 * rte_announce - announce a routing table change
708
 * @tab: table the route has been added to
709
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
710
 * @net: network in question
711
 * @new: the new route to be announced
712
 * @old: the previous route for the same network
713
 *
714
 * This function gets a routing table update and announces it
715
 * to all protocols that acccepts given type of route announcement
716
 * and are connected to the same table by their announcement hooks.
717
 *
718
 * Route announcement of type RA_OPTIMAL si generated when optimal
719
 * route (in routing table @tab) changes. In that case @old stores the
720
 * old optimal route.
721
 *
722
 * Route announcement of type RA_ANY si generated when any route (in
723
 * routing table @tab) changes In that case @old stores the old route
724
 * from the same protocol.
725
 *
726
 * For each appropriate protocol, we first call its import_control()
727
 * hook which performs basic checks on the route (each protocol has a
728
 * right to veto or force accept of the route before any filter is
729
 * asked) and adds default values of attributes specific to the new
730
 * protocol (metrics, tags etc.).  Then it consults the protocol's
731
 * export filter and if it accepts the route, the rt_notify() hook of
732
 * the protocol gets called.
733
 */
734
static void
735
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
736
             rte *new_best, rte *old_best, rte *before_old)
737
{
738
  if (!rte_is_valid(new))
739
    new = NULL;
740

    
741
  if (!rte_is_valid(old))
742
    old = before_old = NULL;
743

    
744
  if (!rte_is_valid(new_best))
745
    new_best = NULL;
746

    
747
  if (!rte_is_valid(old_best))
748
    old_best = NULL;
749

    
750
  if (!old && !new)
751
    return;
752

    
753
  if (type == RA_OPTIMAL)
754
    {
755
      if (new)
756
        new->attrs->src->proto->stats.pref_routes++;
757
      if (old)
758
        old->attrs->src->proto->stats.pref_routes--;
759

    
760
      if (tab->hostcache)
761
        rt_notify_hostcache(tab, net);
762
    }
763

    
764
  struct announce_hook *a;
765
  WALK_LIST(a, tab->hooks)
766
    {
767
      ASSERT(a->proto->export_state != ES_DOWN);
768
      if (a->proto->accept_ra_types == type)
769
        if (type == RA_ACCEPTED)
770
          rt_notify_accepted(a, net, new, old, before_old, 0);
771
        else if (type == RA_MERGED)
772
          rt_notify_merged(a, net, new, old, new_best, old_best, 0);
773
        else
774
          rt_notify_basic(a, net, new, old, 0);
775
    }
776
}
777

    
778
static inline int
779
rte_validate(rte *e)
780
{
781
  int c;
782
  net *n = e->net;
783

    
784
  // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
785
  if (!net_validate(n->n.addr))
786
  {
787
    log(L_WARN "Ignoring bogus prefix %N received via %s",
788
        n->n.addr, e->sender->proto->name);
789
    return 0;
790
  }
791

    
792
  c = net_classify(n->n.addr);
793
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
794
  {
795
    log(L_WARN "Ignoring bogus route %N received via %s",
796
        n->n.addr, e->sender->proto->name);
797
    return 0;
798
  }
799

    
800
  return 1;
801
}
802

    
803
/**
804
 * rte_free - delete a &rte
805
 * @e: &rte to be deleted
806
 *
807
 * rte_free() deletes the given &rte from the routing table it's linked to.
808
 */
809
void
810
rte_free(rte *e)
811
{
812
  if (rta_is_cached(e->attrs))
813
    rta_free(e->attrs);
814
  sl_free(rte_slab, e);
815
}
816

    
817
static inline void
818
rte_free_quick(rte *e)
819
{
820
  rta_free(e->attrs);
821
  sl_free(rte_slab, e);
822
}
823

    
824
static int
825
rte_same(rte *x, rte *y)
826
{
827
  return
828
    x->attrs == y->attrs &&
829
    x->flags == y->flags &&
830
    x->pflags == y->pflags &&
831
    x->pref == y->pref &&
832
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
833
}
834

    
835
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
836

    
837
static void
838
rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
839
{
840
  struct proto *p = ah->proto;
841
  struct rtable *table = ah->table;
842
  struct proto_stats *stats = ah->stats;
843
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
844
  rte *before_old = NULL;
845
  rte *old_best = net->routes;
846
  rte *old = NULL;
847
  rte **k;
848

    
849
  k = &net->routes;                        /* Find and remove original route from the same protocol */
850
  while (old = *k)
851
    {
852
      if (old->attrs->src == src)
853
        {
854
          /* If there is the same route in the routing table but from
855
           * a different sender, then there are two paths from the
856
           * source protocol to this routing table through transparent
857
           * pipes, which is not allowed.
858
           *
859
           * We log that and ignore the route. If it is withdraw, we
860
           * ignore it completely (there might be 'spurious withdraws',
861
           * see FIXME in do_rte_announce())
862
           */
863
          if (old->sender->proto != p)
864
            {
865
              if (new)
866
                {
867
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
868
                      net->n.addr, table->name);
869
                  rte_free_quick(new);
870
                }
871
              return;
872
            }
873

    
874
          if (new && rte_same(old, new))
875
            {
876
              /* No changes, ignore the new route */
877

    
878
              if (!rte_is_filtered(new))
879
                {
880
                  stats->imp_updates_ignored++;
881
                  rte_trace_in(D_ROUTES, p, new, "ignored");
882
                }
883

    
884
              rte_free_quick(new);
885
              return;
886
            }
887
          *k = old->next;
888
          break;
889
        }
890
      k = &old->next;
891
      before_old = old;
892
    }
893

    
894
  if (!old)
895
    before_old = NULL;
896

    
897
  if (!old && !new)
898
    {
899
      stats->imp_withdraws_ignored++;
900
      return;
901
    }
902

    
903
  int new_ok = rte_is_ok(new);
904
  int old_ok = rte_is_ok(old);
905

    
906
  struct proto_limit *l = ah->rx_limit;
907
  if (l && !old && new)
908
    {
909
      u32 all_routes = stats->imp_routes + stats->filt_routes;
910

    
911
      if (all_routes >= l->limit)
912
        proto_notify_limit(ah, l, PLD_RX, all_routes);
913

    
914
      if (l->state == PLS_BLOCKED)
915
        {
916
          /* In receive limit the situation is simple, old is NULL so
917
             we just free new and exit like nothing happened */
918

    
919
          stats->imp_updates_ignored++;
920
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
921
          rte_free_quick(new);
922
          return;
923
        }
924
    }
925

    
926
  l = ah->in_limit;
927
  if (l && !old_ok && new_ok)
928
    {
929
      if (stats->imp_routes >= l->limit)
930
        proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
931

    
932
      if (l->state == PLS_BLOCKED)
933
        {
934
          /* In import limit the situation is more complicated. We
935
             shouldn't just drop the route, we should handle it like
936
             it was filtered. We also have to continue the route
937
             processing if old or new is non-NULL, but we should exit
938
             if both are NULL as this case is probably assumed to be
939
             already handled. */
940

    
941
          stats->imp_updates_ignored++;
942
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
943

    
944
          if (ah->in_keep_filtered)
945
            new->flags |= REF_FILTERED;
946
          else
947
            { rte_free_quick(new); new = NULL; }
948

    
949
          /* Note that old && !new could be possible when
950
             ah->in_keep_filtered changed in the recent past. */
951

    
952
          if (!old && !new)
953
            return;
954

    
955
          new_ok = 0;
956
          goto skip_stats1;
957
        }
958
    }
959

    
960
  if (new_ok)
961
    stats->imp_updates_accepted++;
962
  else if (old_ok)
963
    stats->imp_withdraws_accepted++;
964
  else
965
    stats->imp_withdraws_ignored++;
966

    
967
 skip_stats1:
968

    
969
  if (new)
970
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
971
  if (old)
972
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
973

    
974
  if (table->config->sorted)
975
    {
976
      /* If routes are sorted, just insert new route to appropriate position */
977
      if (new)
978
        {
979
          if (before_old && !rte_better(new, before_old))
980
            k = &before_old->next;
981
          else
982
            k = &net->routes;
983

    
984
          for (; *k; k=&(*k)->next)
985
            if (rte_better(new, *k))
986
              break;
987

    
988
          new->next = *k;
989
          *k = new;
990
        }
991
    }
992
  else
993
    {
994
      /* If routes are not sorted, find the best route and move it on
995
         the first position. There are several optimized cases. */
996

    
997
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
998
        goto do_recalculate;
999

    
1000
      if (new && rte_better(new, old_best))
1001
        {
1002
          /* The first case - the new route is cleary optimal,
1003
             we link it at the first position */
1004

    
1005
          new->next = net->routes;
1006
          net->routes = new;
1007
        }
1008
      else if (old == old_best)
1009
        {
1010
          /* The second case - the old best route disappeared, we add the
1011
             new route (if we have any) to the list (we don't care about
1012
             position) and then we elect the new optimal route and relink
1013
             that route at the first position and announce it. New optimal
1014
             route might be NULL if there is no more routes */
1015

    
1016
        do_recalculate:
1017
          /* Add the new route to the list */
1018
          if (new)
1019
            {
1020
              new->next = net->routes;
1021
              net->routes = new;
1022
            }
1023

    
1024
          /* Find a new optimal route (if there is any) */
1025
          if (net->routes)
1026
            {
1027
              rte **bp = &net->routes;
1028
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1029
                if (rte_better(*k, *bp))
1030
                  bp = k;
1031

    
1032
              /* And relink it */
1033
              rte *best = *bp;
1034
              *bp = best->next;
1035
              best->next = net->routes;
1036
              net->routes = best;
1037
            }
1038
        }
1039
      else if (new)
1040
        {
1041
          /* The third case - the new route is not better than the old
1042
             best route (therefore old_best != NULL) and the old best
1043
             route was not removed (therefore old_best == net->routes).
1044
             We just link the new route after the old best route. */
1045

    
1046
          ASSERT(net->routes != NULL);
1047
          new->next = net->routes->next;
1048
          net->routes->next = new;
1049
        }
1050
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1051
    }
1052

    
1053
  if (new)
1054
    new->lastmod = now;
1055

    
1056
  /* Log the route change */
1057
  if (p->debug & D_ROUTES)
1058
    {
1059
      if (new_ok)
1060
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1061
      else if (old_ok)
1062
        {
1063
          if (old != old_best)
1064
            rte_trace(p, old, '>', "removed");
1065
          else if (rte_is_ok(net->routes))
1066
            rte_trace(p, old, '>', "removed [replaced]");
1067
          else
1068
            rte_trace(p, old, '>', "removed [sole]");
1069
        }
1070
    }
1071

    
1072
  /* Propagate the route change */
1073
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1074
  if (net->routes != old_best)
1075
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1076
  if (table->config->sorted)
1077
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1078
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1079

    
1080
  if (!net->routes &&
1081
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1082
      (table->gc_time + table->config->gc_min_time <= now))
1083
    rt_schedule_gc(table);
1084

    
1085
  if (old_ok && p->rte_remove)
1086
    p->rte_remove(net, old);
1087
  if (new_ok && p->rte_insert)
1088
    p->rte_insert(net, new);
1089

    
1090
  if (old)
1091
    rte_free_quick(old);
1092
}
1093

    
1094
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1095

    
1096
static inline void
1097
rte_update_lock(void)
1098
{
1099
  rte_update_nest_cnt++;
1100
}
1101

    
1102
static inline void
1103
rte_update_unlock(void)
1104
{
1105
  if (!--rte_update_nest_cnt)
1106
    lp_flush(rte_update_pool);
1107
}
1108

    
1109
static inline void
1110
rte_hide_dummy_routes(net *net, rte **dummy)
1111
{
1112
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1113
  {
1114
    *dummy = net->routes;
1115
    net->routes = (*dummy)->next;
1116
  }
1117
}
1118

    
1119
static inline void
1120
rte_unhide_dummy_routes(net *net, rte **dummy)
1121
{
1122
  if (*dummy)
1123
  {
1124
    (*dummy)->next = net->routes;
1125
    net->routes = *dummy;
1126
  }
1127
}
1128

    
1129
/**
1130
 * rte_update - enter a new update to a routing table
1131
 * @table: table to be updated
1132
 * @ah: pointer to table announce hook
1133
 * @net: network node
1134
 * @p: protocol submitting the update
1135
 * @src: protocol originating the update
1136
 * @new: a &rte representing the new route or %NULL for route removal.
1137
 *
1138
 * This function is called by the routing protocols whenever they discover
1139
 * a new route or wish to update/remove an existing route. The right announcement
1140
 * sequence is to build route attributes first (either un-cached with @aflags set
1141
 * to zero or a cached one using rta_lookup(); in this case please note that
1142
 * you need to increase the use count of the attributes yourself by calling
1143
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1144
 * the appropriate data and finally submit the new &rte by calling rte_update().
1145
 *
1146
 * @src specifies the protocol that originally created the route and the meaning
1147
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1148
 * same value as @new->attrs->proto. @p specifies the protocol that called
1149
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1150
 * stores @p in @new->sender;
1151
 *
1152
 * When rte_update() gets any route, it automatically validates it (checks,
1153
 * whether the network and next hop address are valid IP addresses and also
1154
 * whether a normal routing protocol doesn't try to smuggle a host or link
1155
 * scope route to the table), converts all protocol dependent attributes stored
1156
 * in the &rte to temporary extended attributes, consults import filters of the
1157
 * protocol to see if the route should be accepted and/or its attributes modified,
1158
 * stores the temporary attributes back to the &rte.
1159
 *
1160
 * Now, having a "public" version of the route, we
1161
 * automatically find any old route defined by the protocol @src
1162
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1163
 * recalculate the optimal route for this destination and finally broadcast
1164
 * the change (if any) to all routing protocols by calling rte_announce().
1165
 *
1166
 * All memory used for attribute lists and other temporary allocations is taken
1167
 * from a special linear pool @rte_update_pool and freed when rte_update()
1168
 * finishes.
1169
 */
1170

    
1171
void
1172
rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
1173
{
1174
  struct proto *p = ah->proto;
1175
  struct proto_stats *stats = ah->stats;
1176
  struct filter *filter = ah->in_filter;
1177
  ea_list *tmpa = NULL;
1178
  rte *dummy = NULL;
1179

    
1180
  rte_update_lock();
1181
  if (new)
1182
    {
1183
      new->sender = ah;
1184

    
1185
      stats->imp_updates_received++;
1186
      if (!rte_validate(new))
1187
        {
1188
          rte_trace_in(D_FILTERS, p, new, "invalid");
1189
          stats->imp_updates_invalid++;
1190
          goto drop;
1191
        }
1192

    
1193
      if (filter == FILTER_REJECT)
1194
        {
1195
          stats->imp_updates_filtered++;
1196
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1197

    
1198
          if (! ah->in_keep_filtered)
1199
            goto drop;
1200

    
1201
          /* new is a private copy, i could modify it */
1202
          new->flags |= REF_FILTERED;
1203
        }
1204
      else
1205
        {
1206
          tmpa = make_tmp_attrs(new, rte_update_pool);
1207
          if (filter && (filter != FILTER_REJECT))
1208
            {
1209
              ea_list *old_tmpa = tmpa;
1210
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1211
              if (fr > F_ACCEPT)
1212
                {
1213
                  stats->imp_updates_filtered++;
1214
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1215

    
1216
                  if (! ah->in_keep_filtered)
1217
                    goto drop;
1218

    
1219
                  new->flags |= REF_FILTERED;
1220
                }
1221
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1222
                src->proto->store_tmp_attrs(new, tmpa);
1223
            }
1224
        }
1225
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1226
        new->attrs = rta_lookup(new->attrs);
1227
      new->flags |= REF_COW;
1228
    }
1229
  else
1230
    {
1231
      stats->imp_withdraws_received++;
1232

    
1233
      if (!net || !src)
1234
        {
1235
          stats->imp_withdraws_ignored++;
1236
          rte_update_unlock();
1237
          return;
1238
        }
1239
    }
1240

    
1241
 recalc:
1242
  rte_hide_dummy_routes(net, &dummy);
1243
  rte_recalculate(ah, net, new, src);
1244
  rte_unhide_dummy_routes(net, &dummy);
1245
  rte_update_unlock();
1246
  return;
1247

    
1248
 drop:
1249
  rte_free(new);
1250
  new = NULL;
1251
  goto recalc;
1252
}
1253

    
1254
/* Independent call to rte_announce(), used from next hop
1255
   recalculation, outside of rte_update(). new must be non-NULL */
1256
static inline void 
1257
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1258
               rte *new_best, rte *old_best)
1259
{
1260
  rte_update_lock();
1261
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1262
  rte_update_unlock();
1263
}
1264

    
1265
void
1266
rte_discard(rtable *t, rte *old)        /* Non-filtered route deletion, used during garbage collection */
1267
{
1268
  rte_update_lock();
1269
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1270
  rte_update_unlock();
1271
}
1272

    
1273
/* Check rtable for best route to given net whether it would be exported do p */
1274
int
1275
rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1276
{
1277
  net *n = net_find(t, a);
1278
  rte *rt = n ? n->routes : NULL;
1279

    
1280
  if (!rte_is_valid(rt))
1281
    return 0;
1282

    
1283
  rte_update_lock();
1284

    
1285
  /* Rest is stripped down export_filter() */
1286
  ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1287
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1288
  if (v == RIC_PROCESS)
1289
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1290

    
1291
   /* Discard temporary rte */
1292
  if (rt != n->routes)
1293
    rte_free(rt);
1294

    
1295
  rte_update_unlock();
1296

    
1297
  return v > 0;
1298
}
1299

    
1300

    
1301
/**
1302
 * rt_refresh_begin - start a refresh cycle
1303
 * @t: related routing table
1304
 * @ah: related announce hook 
1305
 *
1306
 * This function starts a refresh cycle for given routing table and announce
1307
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1308
 * routes to the routing table (by rte_update()). After that, all protocol
1309
 * routes (more precisely routes with @ah as @sender) not sent during the
1310
 * refresh cycle but still in the table from the past are pruned. This is
1311
 * implemented by marking all related routes as stale by REF_STALE flag in
1312
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1313
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1314
 */
1315
void
1316
rt_refresh_begin(rtable *t, struct announce_hook *ah)
1317
{
1318
  net *n;
1319
  rte *e;
1320

    
1321
  FIB_WALK(&t->fib, fn)
1322
    {
1323
      n = (net *) fn;
1324
      for (e = n->routes; e; e = e->next)
1325
        if (e->sender == ah)
1326
          e->flags |= REF_STALE;
1327
    }
1328
  FIB_WALK_END;
1329
}
1330

    
1331
/**
1332
 * rt_refresh_end - end a refresh cycle
1333
 * @t: related routing table
1334
 * @ah: related announce hook 
1335
 *
1336
 * This function starts a refresh cycle for given routing table and announce
1337
 * hook. See rt_refresh_begin() for description of refresh cycles.
1338
 */
1339
void
1340
rt_refresh_end(rtable *t, struct announce_hook *ah)
1341
{
1342
  int prune = 0;
1343
  net *n;
1344
  rte *e;
1345

    
1346
  FIB_WALK(&t->fib, fn)
1347
    {
1348
      n = (net *) fn;
1349
      for (e = n->routes; e; e = e->next)
1350
        if ((e->sender == ah) && (e->flags & REF_STALE))
1351
          {
1352
            e->flags |= REF_DISCARD;
1353
            prune = 1;
1354
          }
1355
    }
1356
  FIB_WALK_END;
1357

    
1358
  if (prune)
1359
    rt_schedule_prune(t);
1360
}
1361

    
1362

    
1363
/**
1364
 * rte_dump - dump a route
1365
 * @e: &rte to be dumped
1366
 *
1367
 * This functions dumps contents of a &rte to debug output.
1368
 */
1369
void
1370
rte_dump(rte *e)
1371
{
1372
  net *n = e->net;
1373
  debug("%-1N ", n->n.addr);
1374
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1375
  rta_dump(e->attrs);
1376
  if (e->attrs->src->proto->proto->dump_attrs)
1377
    e->attrs->src->proto->proto->dump_attrs(e);
1378
  debug("\n");
1379
}
1380

    
1381
/**
1382
 * rt_dump - dump a routing table
1383
 * @t: routing table to be dumped
1384
 *
1385
 * This function dumps contents of a given routing table to debug output.
1386
 */
1387
void
1388
rt_dump(rtable *t)
1389
{
1390
  rte *e;
1391
  net *n;
1392
  struct announce_hook *a;
1393

    
1394
  debug("Dump of routing table <%s>\n", t->name);
1395
#ifdef DEBUGGING
1396
  fib_check(&t->fib);
1397
#endif
1398
  FIB_WALK(&t->fib, fn)
1399
    {
1400
      n = (net *) fn;
1401
      for(e=n->routes; e; e=e->next)
1402
        rte_dump(e);
1403
    }
1404
  FIB_WALK_END;
1405
  WALK_LIST(a, t->hooks)
1406
    debug("\tAnnounces routes to protocol %s\n", a->proto->name);
1407
  debug("\n");
1408
}
1409

    
1410
/**
1411
 * rt_dump_all - dump all routing tables
1412
 *
1413
 * This function dumps contents of all routing tables to debug output.
1414
 */
1415
void
1416
rt_dump_all(void)
1417
{
1418
  rtable *t;
1419

    
1420
  WALK_LIST(t, routing_tables)
1421
    rt_dump(t);
1422
}
1423

    
1424
static inline void
1425
rt_schedule_prune(rtable *tab)
1426
{
1427
  rt_mark_for_prune(tab);
1428
  ev_schedule(tab->rt_event);
1429
}
1430

    
1431
static inline void
1432
rt_schedule_gc(rtable *tab)
1433
{
1434
  if (tab->gc_scheduled)
1435
    return;
1436

    
1437
  tab->gc_scheduled = 1;
1438
  ev_schedule(tab->rt_event);
1439
}
1440

    
1441
static inline void
1442
rt_schedule_hcu(rtable *tab)
1443
{
1444
  if (tab->hcu_scheduled)
1445
    return;
1446

    
1447
  tab->hcu_scheduled = 1;
1448
  ev_schedule(tab->rt_event);
1449
}
1450

    
1451
static inline void
1452
rt_schedule_nhu(rtable *tab)
1453
{
1454
  if (tab->nhu_state == 0)
1455
    ev_schedule(tab->rt_event);
1456

    
1457
  /* state change 0->1, 2->3 */
1458
  tab->nhu_state |= 1;
1459
}
1460

    
1461

    
1462
static void
1463
rt_prune_nets(rtable *tab)
1464
{
1465
  struct fib_iterator fit;
1466
  int ncnt = 0, ndel = 0;
1467

    
1468
#ifdef DEBUGGING
1469
  fib_check(&tab->fib);
1470
#endif
1471

    
1472
  FIB_ITERATE_INIT(&fit, &tab->fib);
1473
again:
1474
  FIB_ITERATE_START(&tab->fib, &fit, f)
1475
    {
1476
      net *n = (net *) f;
1477
      ncnt++;
1478
      if (!n->routes)                /* Orphaned FIB entry */
1479
        {
1480
          FIB_ITERATE_PUT(&fit, f);
1481
          fib_delete(&tab->fib, f);
1482
          ndel++;
1483
          goto again;
1484
        }
1485
    }
1486
  FIB_ITERATE_END(f);
1487
  DBG("Pruned %d of %d networks\n", ndel, ncnt);
1488

    
1489
  tab->gc_counter = 0;
1490
  tab->gc_time = now;
1491
  tab->gc_scheduled = 0;
1492
}
1493

    
1494
static void
1495
rt_event(void *ptr)
1496
{
1497
  rtable *tab = ptr;
1498

    
1499
  if (tab->hcu_scheduled)
1500
    rt_update_hostcache(tab);
1501

    
1502
  if (tab->nhu_state)
1503
    rt_next_hop_update(tab);
1504

    
1505
  if (tab->prune_state)
1506
    if (!rt_prune_table(tab))
1507
      {
1508
        /* Table prune unfinished */
1509
        ev_schedule(tab->rt_event);
1510
        return;
1511
      }
1512

    
1513
  if (tab->gc_scheduled)
1514
    {
1515
      rt_prune_nets(tab);
1516
      rt_prune_sources(); // FIXME this should be moved to independent event
1517
    }
1518
}
1519

    
1520
void
1521
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1522
{
1523
  bzero(t, sizeof(*t));
1524
  t->name = name;
1525
  t->config = cf;
1526
  t->addr_type = cf ? cf->addr_type : NET_IP4;
1527
  fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1528
  init_list(&t->hooks);
1529
  if (cf)
1530
    {
1531
      t->rt_event = ev_new(p);
1532
      t->rt_event->hook = rt_event;
1533
      t->rt_event->data = t;
1534
      t->gc_time = now;
1535
    }
1536
}
1537

    
1538
/**
1539
 * rt_init - initialize routing tables
1540
 *
1541
 * This function is called during BIRD startup. It initializes the
1542
 * routing table module.
1543
 */
1544
void
1545
rt_init(void)
1546
{
1547
  rta_init();
1548
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1549
  rte_update_pool = lp_new(rt_table_pool, 4080);
1550
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1551
  init_list(&routing_tables);
1552
}
1553

    
1554

    
1555
static int
1556
rt_prune_step(rtable *tab, int *limit)
1557
{
1558
  struct fib_iterator *fit = &tab->prune_fit;
1559

    
1560
  DBG("Pruning route table %s\n", tab->name);
1561
#ifdef DEBUGGING
1562
  fib_check(&tab->fib);
1563
#endif
1564

    
1565
  if (tab->prune_state == RPS_NONE)
1566
    return 1;
1567

    
1568
  if (tab->prune_state == RPS_SCHEDULED)
1569
    {
1570
      FIB_ITERATE_INIT(fit, &tab->fib);
1571
      tab->prune_state = RPS_RUNNING;
1572
    }
1573

    
1574
again:
1575
  FIB_ITERATE_START(&tab->fib, fit, fn)
1576
    {
1577
      net *n = (net *) fn;
1578
      rte *e;
1579

    
1580
    rescan:
1581
      for (e=n->routes; e; e=e->next)
1582
        if (e->sender->proto->flushing || (e->flags & REF_DISCARD))
1583
          {
1584
            if (*limit <= 0)
1585
              {
1586
                FIB_ITERATE_PUT(fit, fn);
1587
                return 0;
1588
              }
1589

    
1590
            rte_discard(tab, e);
1591
            (*limit)--;
1592

    
1593
            goto rescan;
1594
          }
1595
      if (!n->routes)                /* Orphaned FIB entry */
1596
        {
1597
          FIB_ITERATE_PUT(fit, fn);
1598
          fib_delete(&tab->fib, fn);
1599
          goto again;
1600
        }
1601
    }
1602
  FIB_ITERATE_END(fn);
1603

    
1604
#ifdef DEBUGGING
1605
  fib_check(&tab->fib);
1606
#endif
1607

    
1608
  tab->prune_state = RPS_NONE;
1609
  return 1;
1610
}
1611

    
1612
/**
1613
 * rt_prune_table - prune a routing table
1614
 *
1615
 * This function scans the routing table @tab and removes routes belonging to
1616
 * flushing protocols, discarded routes and also stale network entries, in a
1617
 * similar fashion like rt_prune_loop(). Returns 1 when all such routes are
1618
 * pruned. Contrary to rt_prune_loop(), this function is not a part of the
1619
 * protocol flushing loop, but it is called from rt_event() for just one routing
1620
 * table.
1621
 *
1622
 * Note that rt_prune_table() and rt_prune_loop() share (for each table) the
1623
 * prune state (@prune_state) and also the pruning iterator (@prune_fit).
1624
 */
1625
static inline int
1626
rt_prune_table(rtable *tab)
1627
{
1628
  int limit = 512;
1629
  return rt_prune_step(tab, &limit);
1630
}
1631

    
1632
/**
1633
 * rt_prune_loop - prune routing tables
1634
 *
1635
 * The prune loop scans routing tables and removes routes belonging to flushing
1636
 * protocols, discarded routes and also stale network entries. Returns 1 when
1637
 * all such routes are pruned. It is a part of the protocol flushing loop.
1638
 */
1639
int
1640
rt_prune_loop(void)
1641
{
1642
  int limit = 512;
1643
  rtable *t;
1644

    
1645
  WALK_LIST(t, routing_tables)
1646
    if (! rt_prune_step(t, &limit))
1647
      return 0;
1648

    
1649
  return 1;
1650
}
1651

    
1652
void
1653
rt_preconfig(struct config *c)
1654
{
1655
  struct symbol *s = cf_find_symbol("master");
1656

    
1657
  init_list(&c->tables);
1658
  c->master_rtc = rt_new_table(s, NET_IP4);
1659
}
1660

    
1661

    
1662
/* 
1663
 * Some functions for handing internal next hop updates
1664
 * triggered by rt_schedule_nhu().
1665
 */
1666

    
1667
static inline int
1668
rta_next_hop_outdated(rta *a)
1669
{
1670
  struct hostentry *he = a->hostentry;
1671

    
1672
  if (!he)
1673
    return 0;
1674

    
1675
  if (!he->src)
1676
    return a->dest != RTD_UNREACHABLE;
1677

    
1678
  return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1679
    (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1680
    !mpnh_same(a->nexthops, he->src->nexthops);
1681
}
1682

    
1683
static inline void
1684
rta_apply_hostentry(rta *a, struct hostentry *he)
1685
{
1686
  a->hostentry = he;
1687
  a->iface = he->src ? he->src->iface : NULL;
1688
  a->gw = he->gw;
1689
  a->dest = he->dest;
1690
  a->igp_metric = he->igp_metric;
1691
  a->nexthops = he->src ? he->src->nexthops : NULL;
1692
}
1693

    
1694
static inline rte *
1695
rt_next_hop_update_rte(rtable *tab, rte *old)
1696
{
1697
  rta a;
1698
  memcpy(&a, old->attrs, sizeof(rta));
1699
  rta_apply_hostentry(&a, old->attrs->hostentry);
1700
  a.aflags = 0;
1701

    
1702
  rte *e = sl_alloc(rte_slab);
1703
  memcpy(e, old, sizeof(rte));
1704
  e->attrs = rta_lookup(&a);
1705

    
1706
  return e;
1707
}
1708

    
1709
static inline int
1710
rt_next_hop_update_net(rtable *tab, net *n)
1711
{
1712
  rte **k, *e, *new, *old_best, **new_best;
1713
  int count = 0;
1714
  int free_old_best = 0;
1715

    
1716
  old_best = n->routes;
1717
  if (!old_best)
1718
    return 0;
1719

    
1720
  for (k = &n->routes; e = *k; k = &e->next)
1721
    if (rta_next_hop_outdated(e->attrs))
1722
      {
1723
        new = rt_next_hop_update_rte(tab, e);
1724
        *k = new;
1725

    
1726
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1727
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1728

    
1729
        /* Call a pre-comparison hook */
1730
        /* Not really an efficient way to compute this */
1731
        if (e->attrs->src->proto->rte_recalculate)
1732
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1733

    
1734
        if (e != old_best)
1735
          rte_free_quick(e);
1736
        else /* Freeing of the old best rte is postponed */
1737
          free_old_best = 1;
1738

    
1739
        e = new;
1740
        count++;
1741
      }
1742

    
1743
  if (!count)
1744
    return 0;
1745

    
1746
  /* Find the new best route */
1747
  new_best = NULL;
1748
  for (k = &n->routes; e = *k; k = &e->next)
1749
    {
1750
      if (!new_best || rte_better(e, *new_best))
1751
        new_best = k;
1752
    }
1753

    
1754
  /* Relink the new best route to the first position */
1755
  new = *new_best;
1756
  if (new != n->routes)
1757
    {
1758
      *new_best = new->next;
1759
      new->next = n->routes;
1760
      n->routes = new;
1761
    }
1762

    
1763
  /* Announce the new best route */
1764
  if (new != old_best)
1765
    {
1766
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1767
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1768
    }
1769

    
1770
  /* FIXME: Better announcement of merged routes */
1771
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1772

    
1773
   if (free_old_best)
1774
    rte_free_quick(old_best);
1775

    
1776
  return count;
1777
}
1778

    
1779
static void
1780
rt_next_hop_update(rtable *tab)
1781
{
1782
  struct fib_iterator *fit = &tab->nhu_fit;
1783
  int max_feed = 32;
1784

    
1785
  if (tab->nhu_state == 0)
1786
    return;
1787

    
1788
  if (tab->nhu_state == 1)
1789
    {
1790
      FIB_ITERATE_INIT(fit, &tab->fib);
1791
      tab->nhu_state = 2;
1792
    }
1793

    
1794
  FIB_ITERATE_START(&tab->fib, fit, fn)
1795
    {
1796
      if (max_feed <= 0)
1797
        {
1798
          FIB_ITERATE_PUT(fit, fn);
1799
          ev_schedule(tab->rt_event);
1800
          return;
1801
        }
1802
      max_feed -= rt_next_hop_update_net(tab, (net *) fn);
1803
    }
1804
  FIB_ITERATE_END(fn);
1805

    
1806
  /* state change 2->0, 3->1 */
1807
  tab->nhu_state &= 1;
1808

    
1809
  if (tab->nhu_state > 0)
1810
    ev_schedule(tab->rt_event);
1811
}
1812

    
1813

    
1814
struct rtable_config *
1815
rt_new_table(struct symbol *s, uint addr_type)
1816
{
1817
  /* Hack that allows to 'redefine' the master table */
1818
  if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc))
1819
    return s->def;
1820

    
1821
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1822

    
1823
  cf_define_symbol(s, SYM_TABLE, c);
1824
  c->name = s->name;
1825
  c->addr_type = addr_type;
1826
  add_tail(&new_config->tables, &c->n);
1827
  c->gc_max_ops = 1000;
1828
  c->gc_min_time = 5;
1829
  return c;
1830
}
1831

    
1832
/**
1833
 * rt_lock_table - lock a routing table
1834
 * @r: routing table to be locked
1835
 *
1836
 * Lock a routing table, because it's in use by a protocol,
1837
 * preventing it from being freed when it gets undefined in a new
1838
 * configuration.
1839
 */
1840
void
1841
rt_lock_table(rtable *r)
1842
{
1843
  r->use_count++;
1844
}
1845

    
1846
/**
1847
 * rt_unlock_table - unlock a routing table
1848
 * @r: routing table to be unlocked
1849
 *
1850
 * Unlock a routing table formerly locked by rt_lock_table(),
1851
 * that is decrease its use count and delete it if it's scheduled
1852
 * for deletion by configuration changes.
1853
 */
1854
void
1855
rt_unlock_table(rtable *r)
1856
{
1857
  if (!--r->use_count && r->deleted)
1858
    {
1859
      struct config *conf = r->deleted;
1860
      DBG("Deleting routing table %s\n", r->name);
1861
      if (r->hostcache)
1862
        rt_free_hostcache(r);
1863
      rem_node(&r->n);
1864
      fib_free(&r->fib);
1865
      rfree(r->rt_event);
1866
      mb_free(r);
1867
      config_del_obstacle(conf);
1868
    }
1869
}
1870

    
1871
/**
1872
 * rt_commit - commit new routing table configuration
1873
 * @new: new configuration
1874
 * @old: original configuration or %NULL if it's boot time config
1875
 *
1876
 * Scan differences between @old and @new configuration and modify
1877
 * the routing tables according to these changes. If @new defines a
1878
 * previously unknown table, create it, if it omits a table existing
1879
 * in @old, schedule it for deletion (it gets deleted when all protocols
1880
 * disconnect from it by calling rt_unlock_table()), if it exists
1881
 * in both configurations, leave it unchanged.
1882
 */
1883
void
1884
rt_commit(struct config *new, struct config *old)
1885
{
1886
  struct rtable_config *o, *r;
1887

    
1888
  DBG("rt_commit:\n");
1889
  if (old)
1890
    {
1891
      WALK_LIST(o, old->tables)
1892
        {
1893
          rtable *ot = o->table;
1894
          if (!ot->deleted)
1895
            {
1896
              struct symbol *sym = cf_find_symbol(o->name);
1897
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
1898
                {
1899
                  DBG("\t%s: same\n", o->name);
1900
                  r = sym->def;
1901
                  r->table = ot;
1902
                  ot->name = r->name;
1903
                  ot->config = r;
1904
                  if (o->sorted != r->sorted)
1905
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1906
                }
1907
              else
1908
                {
1909
                  DBG("\t%s: deleted\n", o->name);
1910
                  ot->deleted = old;
1911
                  config_add_obstacle(old);
1912
                  rt_lock_table(ot);
1913
                  rt_unlock_table(ot);
1914
                }
1915
            }
1916
        }
1917
    }
1918

    
1919
  WALK_LIST(r, new->tables)
1920
    if (!r->table)
1921
      {
1922
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1923
        DBG("\t%s: created\n", r->name);
1924
        rt_setup(rt_table_pool, t, r->name, r);
1925
        add_tail(&routing_tables, &t->n);
1926
        r->table = t;
1927
      }
1928
  DBG("\tdone\n");
1929
}
1930

    
1931
static inline void
1932
do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
1933
{
1934
  rte_update_lock();
1935
  if (type == RA_ACCEPTED)
1936
    rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
1937
  else if (type == RA_MERGED)
1938
    rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
1939
  else
1940
    rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
1941
  rte_update_unlock();
1942
}
1943

    
1944
/**
1945
 * rt_feed_baby - advertise routes to a new protocol
1946
 * @p: protocol to be fed
1947
 *
1948
 * This function performs one pass of advertisement of routes to a newly
1949
 * initialized protocol. It's called by the protocol code as long as it
1950
 * has something to do. (We avoid transferring all the routes in single
1951
 * pass in order not to monopolize CPU time.)
1952
 */
1953
int
1954
rt_feed_baby(struct proto *p)
1955
{
1956
  struct announce_hook *h;
1957
  struct fib_iterator *fit;
1958
  int max_feed = 256;
1959

    
1960
  if (!p->feed_ahook)                        /* Need to initialize first */
1961
    {
1962
      if (!p->ahooks)
1963
        return 1;
1964
      DBG("Announcing routes to new protocol %s\n", p->name);
1965
      p->feed_ahook = p->ahooks;
1966
      fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator));
1967
      goto next_hook;
1968
    }
1969
  fit = p->feed_iterator;
1970

    
1971
again:
1972
  h = p->feed_ahook;
1973
  FIB_ITERATE_START(&h->table->fib, fit, fn)
1974
    {
1975
      net *n = (net *) fn;
1976
      rte *e = n->routes;
1977
      if (max_feed <= 0)
1978
        {
1979
          FIB_ITERATE_PUT(fit, fn);
1980
          return 0;
1981
        }
1982

    
1983
      /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
1984

    
1985
      if ((p->accept_ra_types == RA_OPTIMAL) ||
1986
          (p->accept_ra_types == RA_ACCEPTED) ||
1987
          (p->accept_ra_types == RA_MERGED))
1988
        if (rte_is_valid(e))
1989
          {
1990
            if (p->export_state != ES_FEEDING)
1991
              return 1;  /* In the meantime, the protocol fell down. */
1992

    
1993
            do_feed_baby(p, p->accept_ra_types, h, n, e);
1994
            max_feed--;
1995
          }
1996

    
1997
      if (p->accept_ra_types == RA_ANY)
1998
        for(e = n->routes; e; e = e->next)
1999
          {
2000
            if (p->export_state != ES_FEEDING)
2001
              return 1;  /* In the meantime, the protocol fell down. */
2002

    
2003
            if (!rte_is_valid(e))
2004
              continue;
2005

    
2006
            do_feed_baby(p, RA_ANY, h, n, e);
2007
            max_feed--;
2008
          }
2009
    }
2010
  FIB_ITERATE_END(fn);
2011
  p->feed_ahook = h->next;
2012
  if (!p->feed_ahook)
2013
    {
2014
      mb_free(p->feed_iterator);
2015
      p->feed_iterator = NULL;
2016
      return 1;
2017
    }
2018

    
2019
next_hook:
2020
  h = p->feed_ahook;
2021
  FIB_ITERATE_INIT(fit, &h->table->fib);
2022
  goto again;
2023
}
2024

    
2025
/**
2026
 * rt_feed_baby_abort - abort protocol feeding
2027
 * @p: protocol
2028
 *
2029
 * This function is called by the protocol code when the protocol
2030
 * stops or ceases to exist before the last iteration of rt_feed_baby()
2031
 * has finished.
2032
 */
2033
void
2034
rt_feed_baby_abort(struct proto *p)
2035
{
2036
  if (p->feed_ahook)
2037
    {
2038
      /* Unlink the iterator and exit */
2039
      fit_get(&p->feed_ahook->table->fib, p->feed_iterator);
2040
      p->feed_ahook = NULL;
2041
    }
2042
}
2043

    
2044

    
2045
static inline unsigned
2046
ptr_hash(void *ptr)
2047
{
2048
  uintptr_t p = (uintptr_t) ptr;
2049
  return p ^ (p << 8) ^ (p >> 16);
2050
}
2051

    
2052
static inline unsigned
2053
hc_hash(ip_addr a, rtable *dep)
2054
{
2055
  return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff;
2056
}
2057

    
2058
static inline void
2059
hc_insert(struct hostcache *hc, struct hostentry *he)
2060
{
2061
  uint k = he->hash_key >> hc->hash_shift;
2062
  he->next = hc->hash_table[k];
2063
  hc->hash_table[k] = he;
2064
}
2065

    
2066
static inline void
2067
hc_remove(struct hostcache *hc, struct hostentry *he)
2068
{
2069
  struct hostentry **hep;
2070
  uint k = he->hash_key >> hc->hash_shift;
2071

    
2072
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2073
  *hep = he->next;
2074
}
2075

    
2076
#define HC_DEF_ORDER 10
2077
#define HC_HI_MARK *4
2078
#define HC_HI_STEP 2
2079
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2080
#define HC_LO_MARK /5
2081
#define HC_LO_STEP 2
2082
#define HC_LO_ORDER 10
2083

    
2084
static void
2085
hc_alloc_table(struct hostcache *hc, unsigned order)
2086
{
2087
  unsigned hsize = 1 << order;
2088
  hc->hash_order = order;
2089
  hc->hash_shift = 16 - order;
2090
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
2091
  hc->hash_min = (order <= HC_LO_ORDER) ?  0 : (hsize HC_LO_MARK);
2092

    
2093
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2094
}
2095

    
2096
static void
2097
hc_resize(struct hostcache *hc, unsigned new_order)
2098
{
2099
  unsigned old_size = 1 << hc->hash_order;
2100
  struct hostentry **old_table = hc->hash_table;
2101
  struct hostentry *he, *hen;
2102
  int i;
2103

    
2104
  hc_alloc_table(hc, new_order);
2105
  for (i = 0; i < old_size; i++)
2106
    for (he = old_table[i]; he != NULL; he=hen)
2107
      {
2108
        hen = he->next;
2109
        hc_insert(hc, he);
2110
      }
2111
  mb_free(old_table);
2112
}
2113

    
2114
static struct hostentry *
2115
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2116
{
2117
  struct hostentry *he = sl_alloc(hc->slab);
2118

    
2119
  he->addr = a;
2120
  he->link = ll;
2121
  he->tab = dep;
2122
  he->hash_key = k;
2123
  he->uc = 0;
2124
  he->src = NULL;
2125

    
2126
  add_tail(&hc->hostentries, &he->ln);
2127
  hc_insert(hc, he);
2128

    
2129
  hc->hash_items++;
2130
  if (hc->hash_items > hc->hash_max)
2131
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2132

    
2133
  return he;
2134
}
2135

    
2136
static void
2137
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2138
{
2139
  rta_free(he->src);
2140

    
2141
  rem_node(&he->ln);
2142
  hc_remove(hc, he);
2143
  sl_free(hc->slab, he);
2144

    
2145
  hc->hash_items--;
2146
  if (hc->hash_items < hc->hash_min)
2147
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2148
}
2149

    
2150
static void
2151
rt_init_hostcache(rtable *tab)
2152
{
2153
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2154
  init_list(&hc->hostentries);
2155

    
2156
  hc->hash_items = 0;
2157
  hc_alloc_table(hc, HC_DEF_ORDER);
2158
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2159

    
2160
  hc->lp = lp_new(rt_table_pool, 1008);
2161
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2162

    
2163
  tab->hostcache = hc;
2164
}
2165

    
2166
static void
2167
rt_free_hostcache(rtable *tab)
2168
{
2169
  struct hostcache *hc = tab->hostcache;
2170

    
2171
  node *n;
2172
  WALK_LIST(n, hc->hostentries)
2173
    {
2174
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2175
      rta_free(he->src);
2176

    
2177
      if (he->uc)
2178
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2179
    }
2180

    
2181
  rfree(hc->slab);
2182
  rfree(hc->lp);
2183
  mb_free(hc->hash_table);
2184
  mb_free(hc);
2185
}
2186

    
2187
static void
2188
rt_notify_hostcache(rtable *tab, net *net)
2189
{
2190
  struct hostcache *hc = tab->hostcache;
2191

    
2192
  if (tab->hcu_scheduled)
2193
    return;
2194

    
2195
  // XXXX
2196
  // if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen))
2197
  // rt_schedule_hcu(tab);
2198
}
2199

    
2200
static int
2201
if_local_addr(ip_addr a, struct iface *i)
2202
{
2203
  struct ifa *b;
2204

    
2205
  WALK_LIST(b, i->addrs)
2206
    if (ipa_equal(a, b->ip))
2207
      return 1;
2208

    
2209
  return 0;
2210
}
2211

    
2212
static u32 
2213
rt_get_igp_metric(rte *rt)
2214
{
2215
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2216

    
2217
  if (ea)
2218
    return ea->u.data;
2219

    
2220
  rta *a = rt->attrs;
2221

    
2222
#ifdef CONFIG_OSPF
2223
  if ((a->source == RTS_OSPF) ||
2224
      (a->source == RTS_OSPF_IA) ||
2225
      (a->source == RTS_OSPF_EXT1))
2226
    return rt->u.ospf.metric1;
2227
#endif
2228

    
2229
#ifdef CONFIG_RIP
2230
  if (a->source == RTS_RIP)
2231
    return rt->u.rip.metric;
2232
#endif
2233

    
2234
  /* Device routes */
2235
  if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2236
    return 0;
2237

    
2238
  return IGP_METRIC_UNKNOWN;
2239
}
2240

    
2241
static int
2242
rt_update_hostentry(rtable *tab, struct hostentry *he)
2243
{
2244
  rta *old_src = he->src;
2245
  int pxlen = 0;
2246

    
2247
  /* Reset the hostentry */ 
2248
  he->src = NULL;
2249
  he->gw = IPA_NONE;
2250
  he->dest = RTD_UNREACHABLE;
2251
  he->igp_metric = 0;
2252

    
2253
  // XXXX
2254
  // net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
2255
  net *n = NULL;
2256
  if (n)
2257
    {
2258
      rte *e = n->routes;
2259
      rta *a = e->attrs;
2260
      pxlen = n->n.addr->pxlen;
2261

    
2262
      if (a->hostentry)
2263
        {
2264
          /* Recursive route should not depend on another recursive route */
2265
          log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2266
              he->addr, n->n.addr);
2267
          goto done;
2268
        }
2269

    
2270
      if (a->dest == RTD_DEVICE)
2271
        {
2272
          if (if_local_addr(he->addr, a->iface))
2273
            {
2274
              /* The host address is a local address, this is not valid */
2275
              log(L_WARN "Next hop address %I is a local address of iface %s",
2276
                  he->addr, a->iface->name);
2277
              goto done;
2278
                  }
2279

    
2280
          /* The host is directly reachable, use link as a gateway */
2281
          he->gw = he->link;
2282
          he->dest = RTD_ROUTER;
2283
        }
2284
      else
2285
        {
2286
          /* The host is reachable through some route entry */
2287
          he->gw = a->gw;
2288
          he->dest = a->dest;
2289
        }
2290

    
2291
      he->src = rta_clone(a);
2292
      he->igp_metric = rt_get_igp_metric(e);
2293
    }
2294

    
2295
 done:
2296
  /* Add a prefix range to the trie */
2297
  trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
2298

    
2299
  rta_free(old_src);
2300
  return old_src != he->src;
2301
}
2302

    
2303
static void
2304
rt_update_hostcache(rtable *tab)
2305
{
2306
  struct hostcache *hc = tab->hostcache;
2307
  struct hostentry *he;
2308
  node *n, *x;
2309

    
2310
  /* Reset the trie */
2311
  lp_flush(hc->lp);
2312
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2313

    
2314
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2315
    {
2316
      he = SKIP_BACK(struct hostentry, ln, n);
2317
      if (!he->uc)
2318
        {
2319
          hc_delete_hostentry(hc, he);
2320
          continue;
2321
        }
2322

    
2323
      if (rt_update_hostentry(tab, he))
2324
        rt_schedule_nhu(he->tab);
2325
    }
2326

    
2327
  tab->hcu_scheduled = 0;
2328
}
2329

    
2330
static struct hostentry *
2331
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2332
{
2333
  struct hostentry *he;
2334

    
2335
  if (!tab->hostcache)
2336
    rt_init_hostcache(tab);
2337

    
2338
  uint k = hc_hash(a, dep);
2339
  struct hostcache *hc = tab->hostcache;
2340
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2341
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2342
      return he;
2343

    
2344
  he = hc_new_hostentry(hc, a, ll, dep, k);
2345
  rt_update_hostentry(tab, he);
2346
  return he;
2347
}
2348

    
2349
void
2350
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2351
{
2352
  rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2353
}
2354

    
2355

    
2356
/*
2357
 *  CLI commands
2358
 */
2359

    
2360
static void
2361
rt_format_via(rte *e, byte *via)
2362
{
2363
  rta *a = e->attrs;
2364

    
2365
  switch (a->dest)
2366
    {
2367
    case RTD_ROUTER:        bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2368
    case RTD_DEVICE:        bsprintf(via, "dev %s", a->iface->name); break;
2369
    case RTD_BLACKHOLE:        bsprintf(via, "blackhole"); break;
2370
    case RTD_UNREACHABLE:        bsprintf(via, "unreachable"); break;
2371
    case RTD_PROHIBIT:        bsprintf(via, "prohibited"); break;
2372
    case RTD_MULTIPATH:        bsprintf(via, "multipath"); break;
2373
    default:                bsprintf(via, "???");
2374
    }
2375
}
2376

    
2377
static void
2378
rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2379
{
2380
  byte via[STD_ADDRESS_P_LENGTH+32], from[STD_ADDRESS_P_LENGTH+8];
2381
  byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2382
  rta *a = e->attrs;
2383
  int primary = (e->net->routes == e);
2384
  int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2385
  void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2386
  struct mpnh *nh;
2387

    
2388
  rt_format_via(e, via);
2389
  tm_format_datetime(tm, &config->tf_route, e->lastmod);
2390
  if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2391
    bsprintf(from, " from %I", a->from);
2392
  else
2393
    from[0] = 0;
2394

    
2395
  get_route_info = a->src->proto->proto->get_route_info;
2396
  if (get_route_info || d->verbose)
2397
    {
2398
      /* Need to normalize the extended attributes */
2399
      ea_list *t = tmpa;
2400
      t = ea_append(t, a->eattrs);
2401
      tmpa = alloca(ea_scan(t));
2402
      ea_merge(t, tmpa);
2403
      ea_sort(tmpa);
2404
    }
2405
  if (get_route_info)
2406
    get_route_info(e, info, tmpa);
2407
  else
2408
    bsprintf(info, " (%d)", e->pref);
2409
  cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name,
2410
             tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2411
  for (nh = a->nexthops; nh; nh = nh->next)
2412
    cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2413
  if (d->verbose)
2414
    rta_show(c, a, tmpa);
2415
}
2416

    
2417
static void
2418
rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2419
{
2420
  rte *e, *ee;
2421
  byte ia[STD_ADDRESS_P_LENGTH+8];
2422
  struct ea_list *tmpa;
2423
  struct announce_hook *a = NULL;
2424
  int first = 1;
2425
  int pass = 0;
2426

    
2427
  bsprintf(ia, "%N", n->n.addr);
2428

    
2429
  if (d->export_mode)
2430
    {
2431
      if (! d->export_protocol->rt_notify)
2432
        return;
2433

    
2434
      a = proto_find_announce_hook(d->export_protocol, d->table);
2435
      if (!a)
2436
        return;
2437
    }
2438

    
2439
  for (e = n->routes; e; e = e->next)
2440
    {
2441
      if (rte_is_filtered(e) != d->filtered)
2442
        continue;
2443

    
2444
      d->rt_counter++;
2445
      d->net_counter += first;
2446
      first = 0;
2447

    
2448
      if (pass)
2449
        continue;
2450

    
2451
      ee = e;
2452
      rte_update_lock();                /* We use the update buffer for filtering */
2453
      tmpa = make_tmp_attrs(e, rte_update_pool);
2454

    
2455
      /* Special case for merged export */
2456
      if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
2457
        {
2458
          rte *rt_free;
2459
          e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
2460
          pass = 1;
2461

    
2462
          if (!e)
2463
          { e = ee; goto skip; }
2464
        }
2465
      else if (d->export_mode)
2466
        {
2467
          struct proto *ep = d->export_protocol;
2468
          int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2469

    
2470
          if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
2471
            pass = 1;
2472

    
2473
          if (ic < 0)
2474
            goto skip;
2475

    
2476
          if (d->export_mode > RSEM_PREEXPORT)
2477
            {
2478
              /*
2479
               * FIXME - This shows what should be exported according to current
2480
               * filters, but not what was really exported. 'configure soft'
2481
               * command may change the export filter and do not update routes.
2482
               */
2483
              int do_export = (ic > 0) ||
2484
                (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2485

    
2486
              if (do_export != (d->export_mode == RSEM_EXPORT))
2487
                goto skip;
2488

    
2489
              if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED))
2490
                pass = 1;
2491
            }
2492
        }
2493

    
2494
      if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2495
        goto skip;
2496

    
2497
      if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2498
        goto skip;
2499

    
2500
      d->show_counter++;
2501
      if (d->stats < 2)
2502
        rt_show_rte(c, ia, e, d, tmpa);
2503
      ia[0] = 0;
2504

    
2505
    skip:
2506
      if (e != ee)
2507
      {
2508
        rte_free(e);
2509
        e = ee;
2510
      }
2511
      rte_update_unlock();
2512

    
2513
      if (d->primary_only)
2514
        break;
2515
    }
2516
}
2517

    
2518
static void
2519
rt_show_cont(struct cli *c)
2520
{
2521
  struct rt_show_data *d = c->rover;
2522
#ifdef DEBUGGING
2523
  unsigned max = 4;
2524
#else
2525
  unsigned max = 64;
2526
#endif
2527
  struct fib *fib = &d->table->fib;
2528
  struct fib_iterator *it = &d->fit;
2529

    
2530
  FIB_ITERATE_START(fib, it, f)
2531
    {
2532
      net *n = (net *) f;
2533
      if (d->running_on_config && d->running_on_config != config)
2534
        {
2535
          cli_printf(c, 8004, "Stopped due to reconfiguration");
2536
          goto done;
2537
        }
2538
      if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
2539
        {
2540
          cli_printf(c, 8005, "Protocol is down");
2541
          goto done;
2542
        }
2543
      if (!max--)
2544
        {
2545
          FIB_ITERATE_PUT(it, f);
2546
          return;
2547
        }
2548
      rt_show_net(c, n, d);
2549
    }
2550
  FIB_ITERATE_END(f);
2551
  if (d->stats)
2552
    cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2553
  else
2554
    cli_printf(c, 0, "");
2555
done:
2556
  c->cont = c->cleanup = NULL;
2557
}
2558

    
2559
static void
2560
rt_show_cleanup(struct cli *c)
2561
{
2562
  struct rt_show_data *d = c->rover;
2563

    
2564
  /* Unlink the iterator */
2565
  fit_get(&d->table->fib, &d->fit);
2566
}
2567

    
2568
void
2569
rt_show(struct rt_show_data *d)
2570
{
2571
  net *n;
2572

    
2573
  /* Default is either a master table or a table related to a respective protocol */
2574
  if (!d->table && d->export_protocol) d->table = d->export_protocol->table;
2575
  if (!d->table && d->show_protocol) d->table = d->show_protocol->table;
2576
  if (!d->table) d->table = config->master_rtc->table;
2577

    
2578
  /* Filtered routes are neither exported nor have sensible ordering */
2579
  if (d->filtered && (d->export_mode || d->primary_only))
2580
    cli_msg(0, "");
2581

    
2582
  if (d->pxlen == 256)
2583
    {
2584
      FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2585
      this_cli->cont = rt_show_cont;
2586
      this_cli->cleanup = rt_show_cleanup;
2587
      this_cli->rover = d;
2588
    }
2589
  else
2590
    {
2591
      /* XXXX 
2592
      if (d->show_for)
2593
        n = net_route(d->table, d->prefix, d->pxlen);
2594
      else
2595
        n = net_find(d->table, d->prefix, d->pxlen);
2596
      */
2597
      n = NULL;
2598

    
2599
      if (n)
2600
        rt_show_net(this_cli, n, d);
2601

    
2602
      if (d->rt_counter)
2603
        cli_msg(0, "");
2604
      else
2605
        cli_msg(8001, "Network not in table");
2606
    }
2607
}
2608

    
2609
/*
2610
 *  Documentation for functions declared inline in route.h
2611
 */
2612
#if 0
2613

2614
/**
2615
 * net_find - find a network entry
2616
 * @tab: a routing table
2617
 * @addr: address of the network
2618
 *
2619
 * net_find() looks up the given network in routing table @tab and
2620
 * returns a pointer to its &net entry or %NULL if no such network
2621
 * exists.
2622
 */
2623
static inline net *net_find(rtable *tab, net_addr *addr)
2624
{ DUMMY; }
2625

2626
/**
2627
 * net_get - obtain a network entry
2628
 * @tab: a routing table
2629
 * @addr: address of the network
2630
 *
2631
 * net_get() looks up the given network in routing table @tab and
2632
 * returns a pointer to its &net entry. If no such entry exists, it's
2633
 * created.
2634
 */
2635
static inline net *net_get(rtable *tab, net_addr *addr)
2636
{ DUMMY; }
2637

2638
/**
2639
 * rte_cow - copy a route for writing
2640
 * @r: a route entry to be copied
2641
 *
2642
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2643
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2644
 * just returned unchanged, else a new temporary entry with the same contents
2645
 * is created.
2646
 *
2647
 * The primary use of this function is inside the filter machinery -- when
2648
 * a filter wants to modify &rte contents (to change the preference or to
2649
 * attach another set of attributes), it must ensure that the &rte is not
2650
 * shared with anyone else (and especially that it isn't stored in any routing
2651
 * table).
2652
 *
2653
 * Result: a pointer to the new writable &rte.
2654
 */
2655
static inline rte * rte_cow(rte *r)
2656
{ DUMMY; }
2657

2658
#endif