Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ f4a60a9b

History | View | Annotate | Download (65.7 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/cli.h"
37
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
45

    
46
pool *rt_table_pool;
47

    
48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

    
51
static list routing_tables;
52

    
53
static void rt_format_via(rte *e, byte *via);
54
static void rt_free_hostcache(rtable *tab);
55
static void rt_notify_hostcache(rtable *tab, net *net);
56
static void rt_update_hostcache(rtable *tab);
57
static void rt_next_hop_update(rtable *tab);
58
static inline void rt_prune_table(rtable *tab);
59

    
60

    
61
static inline struct ea_list *
62
make_tmp_attrs(struct rte *rt, struct linpool *pool)
63
{
64
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
65
  mta = rt->attrs->src->proto->make_tmp_attrs;
66
  return mta ? mta(rt, rte_update_pool) : NULL;
67
}
68

    
69

    
70
/* Like fib_route(), but skips empty net entries */
71
static inline void *
72
net_route_ip4(struct fib *f, net_addr_ip4 *n)
73
{
74
  net *r;
75

    
76
  while (r = fib_find(f, (net_addr *) n),
77
         !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
78
  {
79
    n->pxlen--;
80
    ip4_clrbit(&n->prefix, n->pxlen);
81
  }
82

    
83
  return r;
84
}
85

    
86
static inline void *
87
net_route_ip6(struct fib *f, net_addr_ip6 *n)
88
{
89
  net *r;
90

    
91
  while (r = fib_find(f, (net_addr *) n),
92
         !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
93
  {
94
    n->pxlen--;
95
    ip6_clrbit(&n->prefix, n->pxlen);
96
  }
97

    
98
  return r;
99
}
100

    
101
static byte
102
net_roa4_check(rtable *tab, const net_addr_ip4 *px, u32 asn)
103
{
104
  struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
105
  byte anything = 0;
106

    
107
  struct fib_node *fn;
108
  while (1)
109
  {
110
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
111
    {
112
      net *r = fib_node_to_user(&tab->fib, fn);
113
      if (rte_is_valid(r->routes) && ipa_in_netX(ipa_from_ip4(px->prefix), r->n.addr))
114
      {
115
        net_addr_roa4 *roa = (void *) r->n.addr;
116
        anything = 1;
117
        if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
118
          return ROA_VALID;
119
      }
120
    }
121

    
122
    if (n.pxlen == 0)
123
      break;
124

    
125
    n.pxlen--;
126
    ip4_clrbit(&n.prefix, n.pxlen);
127
  }
128

    
129
  return anything ? ROA_INVALID : ROA_UNKNOWN;
130
}
131

    
132
static byte
133
net_roa6_check(rtable *tab, const net_addr_ip6 *px, u32 asn)
134
{
135
  struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
136
  byte anything = 0;
137

    
138
  struct fib_node *fn;
139
  while (1)
140
  {
141
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
142
    {
143
      net *r = fib_node_to_user(&tab->fib, fn);
144
      if (rte_is_valid(r->routes) && ipa_in_netX(ipa_from_ip6(px->prefix), r->n.addr))
145
      {
146
        net_addr_roa6 *roa = (void *) r->n.addr;
147
        anything = 1;
148
        if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
149
          return ROA_VALID;
150
      }
151
    }
152

    
153
    if (n.pxlen == 0)
154
      break;
155

    
156
    n.pxlen--;
157
    ip6_clrbit(&n.prefix, n.pxlen);
158
  }
159

    
160
  return anything ? ROA_INVALID : ROA_UNKNOWN;
161
}
162

    
163
byte
164
net_roa_check(rtable *tab, const net_addr *n, u32 asn)
165
{
166
  if (tab->addr_type == NET_ROA4)
167
    return net_roa4_check(tab, (const net_addr_ip4 *) n, asn);
168
  else
169
    return net_roa6_check(tab, (const net_addr_ip6 *) n, asn);
170
}
171

    
172
void *
173
net_route(rtable *tab, const net_addr *n)
174
{
175
  ASSERT(tab->addr_type == n->type);
176

    
177
  net_addr *n0 = alloca(n->length);
178
  net_copy(n0, n);
179

    
180
  switch (n->type)
181
  {
182
  case NET_IP4:
183
  case NET_VPN4:
184
  case NET_ROA4:
185
    return net_route_ip4(&tab->fib, (net_addr_ip4 *) n0);
186

    
187
  case NET_IP6:
188
  case NET_VPN6:
189
  case NET_ROA6:
190
    return net_route_ip6(&tab->fib, (net_addr_ip6 *) n0);
191

    
192
  default:
193
    return NULL;
194
  }
195
}
196

    
197
/**
198
 * rte_find - find a route
199
 * @net: network node
200
 * @src: route source
201
 *
202
 * The rte_find() function returns a route for destination @net
203
 * which is from route source @src.
204
 */
205
rte *
206
rte_find(net *net, struct rte_src *src)
207
{
208
  rte *e = net->routes;
209

    
210
  while (e && e->attrs->src != src)
211
    e = e->next;
212
  return e;
213
}
214

    
215
/**
216
 * rte_get_temp - get a temporary &rte
217
 * @a: attributes to assign to the new route (a &rta; in case it's
218
 * un-cached, rte_update() will create a cached copy automatically)
219
 *
220
 * Create a temporary &rte and bind it with the attributes @a.
221
 * Also set route preference to the default preference set for
222
 * the protocol.
223
 */
224
rte *
225
rte_get_temp(rta *a)
226
{
227
  rte *e = sl_alloc(rte_slab);
228

    
229
  e->attrs = a;
230
  e->flags = 0;
231
  e->pref = 0;
232
  return e;
233
}
234

    
235
rte *
236
rte_do_cow(rte *r)
237
{
238
  rte *e = sl_alloc(rte_slab);
239

    
240
  memcpy(e, r, sizeof(rte));
241
  e->attrs = rta_clone(r->attrs);
242
  e->flags = 0;
243
  return e;
244
}
245

    
246
/**
247
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
248
 * @r: a route entry to be copied
249
 * @lp: a linpool from which to allocate &rta
250
 *
251
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
252
 * modification. There are three possibilities: First, both &rte and &rta are
253
 * private copies, in that case they are returned unchanged.  Second, &rte is
254
 * private copy, but &rta is cached, in that case &rta is duplicated using
255
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
256
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
257
 *
258
 * Note that in the second case, cached &rta loses one reference, while private
259
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
260
 * nexthops, ...) with it. To work properly, original shared &rta should have
261
 * another reference during the life of created private copy.
262
 *
263
 * Result: a pointer to the new writable &rte with writable &rta.
264
 */
265
rte *
266
rte_cow_rta(rte *r, linpool *lp)
267
{
268
  if (!rta_is_cached(r->attrs))
269
    return r;
270

    
271
  rte *e = rte_cow(r);
272
  rta *a = rta_do_cow(r->attrs, lp);
273
  rta_free(e->attrs);
274
  e->attrs = a;
275
  return e;
276
}
277

    
278
static int                                /* Actually better or at least as good as */
279
rte_better(rte *new, rte *old)
280
{
281
  int (*better)(rte *, rte *);
282

    
283
  if (!rte_is_valid(old))
284
    return 1;
285
  if (!rte_is_valid(new))
286
    return 0;
287

    
288
  if (new->pref > old->pref)
289
    return 1;
290
  if (new->pref < old->pref)
291
    return 0;
292
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
293
    {
294
      /*
295
       *  If the user has configured protocol preferences, so that two different protocols
296
       *  have the same preference, try to break the tie by comparing addresses. Not too
297
       *  useful, but keeps the ordering of routes unambiguous.
298
       */
299
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
300
    }
301
  if (better = new->attrs->src->proto->rte_better)
302
    return better(new, old);
303
  return 0;
304
}
305

    
306
static int
307
rte_mergable(rte *pri, rte *sec)
308
{
309
  int (*mergable)(rte *, rte *);
310

    
311
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
312
    return 0;
313

    
314
  if (pri->pref != sec->pref)
315
    return 0;
316

    
317
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
318
    return 0;
319

    
320
  if (mergable = pri->attrs->src->proto->rte_mergable)
321
    return mergable(pri, sec);
322

    
323
  return 0;
324
}
325

    
326
static void
327
rte_trace(struct proto *p, rte *e, int dir, char *msg)
328
{
329
  byte via[IPA_MAX_TEXT_LENGTH+32];
330

    
331
  rt_format_via(e, via);
332
  log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, via);
333
}
334

    
335
static inline void
336
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
337
{
338
  if (p->debug & flag)
339
    rte_trace(p, e, '>', msg);
340
}
341

    
342
static inline void
343
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
344
{
345
  if (p->debug & flag)
346
    rte_trace(p, e, '<', msg);
347
}
348

    
349
static rte *
350
export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
351
{
352
  struct proto *p = c->proto;
353
  struct filter *filter = c->out_filter;
354
  struct proto_stats *stats = &c->stats;
355
  ea_list *tmpb = NULL;
356
  rte *rt;
357
  int v;
358

    
359
  rt = rt0;
360
  *rt_free = NULL;
361

    
362
  if (!tmpa)
363
    tmpa = &tmpb;
364

    
365
  *tmpa = make_tmp_attrs(rt, rte_update_pool);
366

    
367
  v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
368
  if (v < 0)
369
    {
370
      if (silent)
371
        goto reject;
372

    
373
      stats->exp_updates_rejected++;
374
      if (v == RIC_REJECT)
375
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
376
      goto reject;
377
    }
378
  if (v > 0)
379
    {
380
      if (!silent)
381
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
382
      goto accept;
383
    }
384

    
385
  v = filter && ((filter == FILTER_REJECT) ||
386
                 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
387
  if (v)
388
    {
389
      if (silent)
390
        goto reject;
391

    
392
      stats->exp_updates_filtered++;
393
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
394
      goto reject;
395
    }
396

    
397
 accept:
398
  if (rt != rt0)
399
    *rt_free = rt;
400
  return rt;
401

    
402
 reject:
403
  /* Discard temporary rte */
404
  if (rt != rt0)
405
    rte_free(rt);
406
  return NULL;
407
}
408

    
409
static void
410
do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
411
{
412
  struct proto *p = c->proto;
413
  struct proto_stats *stats = &c->stats;
414

    
415

    
416
  /*
417
   * First, apply export limit.
418
   *
419
   * Export route limits has several problems. Because exp_routes
420
   * counter is reset before refeed, we don't really know whether
421
   * limit is breached and whether the update is new or not. Therefore
422
   * the number of really exported routes may exceed the limit
423
   * temporarily (routes exported before and new routes in refeed).
424
   *
425
   * Minor advantage is that if the limit is decreased and refeed is
426
   * requested, the number of exported routes really decrease.
427
   *
428
   * Second problem is that with export limits, we don't know whether
429
   * old was really exported (it might be blocked by limit). When a
430
   * withdraw is exported, we announce it even when the previous
431
   * update was blocked. This is not a big issue, but the same problem
432
   * is in updating exp_routes counter. Therefore, to be consistent in
433
   * increases and decreases of exp_routes, we count exported routes
434
   * regardless of blocking by limits.
435
   *
436
   * Similar problem is in handling updates - when a new route is
437
   * received and blocking is active, the route would be blocked, but
438
   * when an update for the route will be received later, the update
439
   * would be propagated (as old != NULL). Therefore, we have to block
440
   * also non-new updates (contrary to import blocking).
441
   */
442

    
443
  struct channel_limit *l = &c->out_limit;
444
  if (l->action && new)
445
    {
446
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
447
        channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
448

    
449
      if (l->state == PLS_BLOCKED)
450
        {
451
          stats->exp_routes++;        /* see note above */
452
          stats->exp_updates_rejected++;
453
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
454
          new = NULL;
455

    
456
          if (!old)
457
            return;
458
        }
459
    }
460

    
461

    
462
  if (new)
463
    stats->exp_updates_accepted++;
464
  else
465
    stats->exp_withdraws_accepted++;
466

    
467
  /* Hack: We do not decrease exp_routes during refeed, we instead
468
     reset exp_routes at the start of refeed. */
469
  if (new)
470
    stats->exp_routes++;
471
  if (old && !refeed)
472
    stats->exp_routes--;
473

    
474
  if (p->debug & D_ROUTES)
475
    {
476
      if (new && old)
477
        rte_trace_out(D_ROUTES, p, new, "replaced");
478
      else if (new)
479
        rte_trace_out(D_ROUTES, p, new, "added");
480
      else if (old)
481
        rte_trace_out(D_ROUTES, p, old, "removed");
482
    }
483
  if (!new)
484
    p->rt_notify(p, c->table, net, NULL, old, NULL);
485
  else if (tmpa)
486
    {
487
      ea_list *t = tmpa;
488
      while (t->next)
489
        t = t->next;
490
      t->next = new->attrs->eattrs;
491
      p->rt_notify(p, c->table, net, new, old, tmpa);
492
      t->next = NULL;
493
    }
494
  else
495
    p->rt_notify(p, c->table, net, new, old, new->attrs->eattrs);
496
}
497

    
498
static void
499
rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
500
{
501
  struct proto *p = c->proto;
502

    
503
  rte *new = new0;
504
  rte *old = old0;
505
  rte *new_free = NULL;
506
  rte *old_free = NULL;
507
  ea_list *tmpa = NULL;
508

    
509
  if (new)
510
    c->stats.exp_updates_received++;
511
  else
512
    c->stats.exp_withdraws_received++;
513

    
514
  /*
515
   * This is a tricky part - we don't know whether route 'old' was
516
   * exported to protocol 'p' or was filtered by the export filter.
517
   * We try to run the export filter to know this to have a correct
518
   * value in 'old' argument of rte_update (and proper filter value)
519
   *
520
   * FIXME - this is broken because 'configure soft' may change
521
   * filters but keep routes. Refeed is expected to be called after
522
   * change of the filters and with old == new, therefore we do not
523
   * even try to run the filter on an old route, This may lead to
524
   * 'spurious withdraws' but ensure that there are no 'missing
525
   * withdraws'.
526
   *
527
   * This is not completely safe as there is a window between
528
   * reconfiguration and the end of refeed - if a newly filtered
529
   * route disappears during this period, proper withdraw is not
530
   * sent (because old would be also filtered) and the route is
531
   * not refeeded (because it disappeared before that).
532
   */
533

    
534
  if (new)
535
    new = export_filter(c, new, &new_free, &tmpa, 0);
536

    
537
  if (old && !refeed)
538
    old = export_filter(c, old, &old_free, NULL, 1);
539

    
540
  if (!new && !old)
541
  {
542
    /*
543
     * As mentioned above, 'old' value may be incorrect in some race conditions.
544
     * We generally ignore it with the exception of withdraw to pipe protocol.
545
     * In that case we rather propagate unfiltered withdraws regardless of
546
     * export filters to ensure that when a protocol is flushed, its routes are
547
     * removed from all tables. Possible spurious unfiltered withdraws are not
548
     * problem here as they are ignored if there is no corresponding route at
549
     * the other end of the pipe. We directly call rt_notify() hook instead of
550
     * do_rt_notify() to avoid logging and stat counters.
551
     */
552

    
553
#ifdef CONFIG_PIPE
554
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
555
      p->rt_notify(p, c->table, net, NULL, old0, NULL);
556
#endif
557

    
558
    return;
559
  }
560

    
561
  do_rt_notify(c, net, new, old, tmpa, refeed);
562

    
563
  /* Discard temporary rte's */
564
  if (new_free)
565
    rte_free(new_free);
566
  if (old_free)
567
    rte_free(old_free);
568
}
569

    
570
static void
571
rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
572
{
573
  // struct proto *p = c->proto;
574

    
575
  rte *r;
576
  rte *new_best = NULL;
577
  rte *old_best = NULL;
578
  rte *new_free = NULL;
579
  rte *old_free = NULL;
580
  ea_list *tmpa = NULL;
581

    
582
  /* Used to track whether we met old_changed position. If before_old is NULL
583
     old_changed was the first and we met it implicitly before current best route. */
584
  int old_meet = old_changed && !before_old;
585

    
586
  /* Note that before_old is either NULL or valid (not rejected) route.
587
     If old_changed is valid, before_old have to be too. If old changed route
588
     was not valid, caller must use NULL for both old_changed and before_old. */
589

    
590
  if (new_changed)
591
    c->stats.exp_updates_received++;
592
  else
593
    c->stats.exp_withdraws_received++;
594

    
595
  /* First, find the new_best route - first accepted by filters */
596
  for (r=net->routes; rte_is_valid(r); r=r->next)
597
    {
598
      if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
599
        break;
600

    
601
      /* Note if we walked around the position of old_changed route */
602
      if (r == before_old)
603
        old_meet = 1;
604
    }
605

    
606
  /* 
607
   * Second, handle the feed case. That means we do not care for
608
   * old_best. It is NULL for feed, and the new_best for refeed. 
609
   * For refeed, there is a hack similar to one in rt_notify_basic()
610
   * to ensure withdraws in case of changed filters
611
   */
612
  if (feed)
613
    {
614
      if (feed == 2)        /* refeed */
615
        old_best = new_best ? new_best :
616
          (rte_is_valid(net->routes) ? net->routes : NULL);
617
      else
618
        old_best = NULL;
619

    
620
      if (!new_best && !old_best)
621
        return;
622

    
623
      goto found;
624
    }
625

    
626
  /*
627
   * Now, we find the old_best route. Generally, it is the same as the
628
   * new_best, unless new_best is the same as new_changed or
629
   * old_changed is accepted before new_best.
630
   *
631
   * There are four cases:
632
   *
633
   * - We would find and accept old_changed before new_best, therefore
634
   *   old_changed is old_best. In remaining cases we suppose this
635
   *   is not true.
636
   *
637
   * - We found no new_best, therefore there is also no old_best and
638
   *   we ignore this withdraw.
639
   *
640
   * - We found new_best different than new_changed, therefore
641
   *   old_best is the same as new_best and we ignore this update.
642
   *
643
   * - We found new_best the same as new_changed, therefore it cannot
644
   *   be old_best and we have to continue search for old_best.
645
   */
646

    
647
  /* First case */
648
  if (old_meet)
649
    if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
650
      goto found;
651

    
652
  /* Second case */
653
  if (!new_best)
654
    return;
655

    
656
  /* Third case, we use r instead of new_best, because export_filter() could change it */
657
  if (r != new_changed)
658
    {
659
      if (new_free)
660
        rte_free(new_free);
661
      return;
662
    }
663

    
664
  /* Fourth case */
665
  for (r=r->next; rte_is_valid(r); r=r->next)
666
    {
667
      if (old_best = export_filter(c, r, &old_free, NULL, 1))
668
        goto found;
669

    
670
      if (r == before_old)
671
        if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
672
          goto found;
673
    }
674

    
675
  /* Implicitly, old_best is NULL and new_best is non-NULL */
676

    
677
 found:
678
  do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
679

    
680
  /* Discard temporary rte's */
681
  if (new_free)
682
    rte_free(new_free);
683
  if (old_free)
684
    rte_free(old_free);
685
}
686

    
687

    
688
static struct mpnh *
689
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
690
{
691
  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
692
  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
693
  return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
694
}
695

    
696
rte *
697
rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, int silent)
698
{
699
  // struct proto *p = c->proto;
700
  struct mpnh *nhs = NULL;
701
  rte *best0, *best, *rt0, *rt, *tmp;
702

    
703
  best0 = net->routes;
704
  *rt_free = NULL;
705

    
706
  if (!rte_is_valid(best0))
707
    return NULL;
708

    
709
  best = export_filter(c, best0, rt_free, tmpa, silent);
710

    
711
  if (!best || !rte_is_reachable(best))
712
    return best;
713

    
714
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
715
  {
716
    if (!rte_mergable(best0, rt0))
717
      continue;
718

    
719
    rt = export_filter(c, rt0, &tmp, NULL, 1);
720

    
721
    if (!rt)
722
      continue;
723

    
724
    if (rte_is_reachable(rt))
725
      nhs = mpnh_merge_rta(nhs, rt->attrs, c->merge_limit);
726

    
727
    if (tmp)
728
      rte_free(tmp);
729
  }
730

    
731
  if (nhs)
732
  {
733
    nhs = mpnh_merge_rta(nhs, best->attrs, c->merge_limit);
734

    
735
    if (nhs->next)
736
    {
737
      best = rte_cow_rta(best, rte_update_pool);
738
      best->attrs->dest = RTD_MULTIPATH;
739
      best->attrs->nexthops = nhs;
740
    }
741
  }
742

    
743
  if (best != best0)
744
    *rt_free = best;
745

    
746
  return best;
747
}
748

    
749

    
750
static void
751
rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
752
                 rte *new_best, rte*old_best, int refeed)
753
{
754
  // struct proto *p = c->proto;
755

    
756
  rte *new_best_free = NULL;
757
  rte *old_best_free = NULL;
758
  rte *new_changed_free = NULL;
759
  rte *old_changed_free = NULL;
760
  ea_list *tmpa = NULL;
761

    
762
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
763

    
764
  /* This check should be done by the caller */
765
  if (!new_best && !old_best)
766
    return;
767

    
768
  /* Check whether the change is relevant to the merged route */
769
  if ((new_best == old_best) && !refeed)
770
  {
771
    new_changed = rte_mergable(new_best, new_changed) ?
772
      export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
773

    
774
    old_changed = rte_mergable(old_best, old_changed) ?
775
      export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
776

    
777
    if (!new_changed && !old_changed)
778
      return;
779
  }
780

    
781
  if (new_best)
782
    c->stats.exp_updates_received++;
783
  else
784
    c->stats.exp_withdraws_received++;
785

    
786
  /* Prepare new merged route */
787
  if (new_best)
788
    new_best = rt_export_merged(c, net, &new_best_free, &tmpa, 0);
789

    
790
  /* Prepare old merged route (without proper merged next hops) */
791
  /* There are some issues with running filter on old route - see rt_notify_basic() */
792
  if (old_best && !refeed)
793
    old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
794

    
795
  if (new_best || old_best)
796
    do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
797

    
798
  /* Discard temporary rte's */
799
  if (new_best_free)
800
    rte_free(new_best_free);
801
  if (old_best_free)
802
    rte_free(old_best_free);
803
  if (new_changed_free)
804
    rte_free(new_changed_free);
805
  if (old_changed_free)
806
    rte_free(old_changed_free);
807
}
808

    
809

    
810
/**
811
 * rte_announce - announce a routing table change
812
 * @tab: table the route has been added to
813
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
814
 * @net: network in question
815
 * @new: the new route to be announced
816
 * @old: the previous route for the same network
817
 *
818
 * This function gets a routing table update and announces it
819
 * to all protocols that acccepts given type of route announcement
820
 * and are connected to the same table by their announcement hooks.
821
 *
822
 * Route announcement of type RA_OPTIMAL si generated when optimal
823
 * route (in routing table @tab) changes. In that case @old stores the
824
 * old optimal route.
825
 *
826
 * Route announcement of type RA_ANY si generated when any route (in
827
 * routing table @tab) changes In that case @old stores the old route
828
 * from the same protocol.
829
 *
830
 * For each appropriate protocol, we first call its import_control()
831
 * hook which performs basic checks on the route (each protocol has a
832
 * right to veto or force accept of the route before any filter is
833
 * asked) and adds default values of attributes specific to the new
834
 * protocol (metrics, tags etc.).  Then it consults the protocol's
835
 * export filter and if it accepts the route, the rt_notify() hook of
836
 * the protocol gets called.
837
 */
838
static void
839
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
840
             rte *new_best, rte *old_best, rte *before_old)
841
{
842
  if (!rte_is_valid(new))
843
    new = NULL;
844

    
845
  if (!rte_is_valid(old))
846
    old = before_old = NULL;
847

    
848
  if (!rte_is_valid(new_best))
849
    new_best = NULL;
850

    
851
  if (!rte_is_valid(old_best))
852
    old_best = NULL;
853

    
854
  if (!old && !new)
855
    return;
856

    
857
  if ((type == RA_OPTIMAL) && tab->hostcache)
858
    rt_notify_hostcache(tab, net);
859

    
860
  struct channel *c; node *n;
861
  WALK_LIST2(c, n, tab->channels, table_node)
862
    {
863
      if (c->export_state == ES_DOWN)
864
        continue;
865

    
866
      if (c->ra_mode == type)
867
        if (type == RA_ACCEPTED)
868
          rt_notify_accepted(c, net, new, old, before_old, 0);
869
        else if (type == RA_MERGED)
870
          rt_notify_merged(c, net, new, old, new_best, old_best, 0);
871
        else
872
          rt_notify_basic(c, net, new, old, 0);
873
    }
874
}
875

    
876
static inline int
877
rte_validate(rte *e)
878
{
879
  int c;
880
  net *n = e->net;
881

    
882
  // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
883
  if (!net_validate(n->n.addr))
884
  {
885
    log(L_WARN "Ignoring bogus prefix %N received via %s",
886
        n->n.addr, e->sender->proto->name);
887
    return 0;
888
  }
889

    
890
  c = net_classify(n->n.addr);
891
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
892
  {
893
    log(L_WARN "Ignoring bogus route %N received via %s",
894
        n->n.addr, e->sender->proto->name);
895
    return 0;
896
  }
897

    
898
  return 1;
899
}
900

    
901
/**
902
 * rte_free - delete a &rte
903
 * @e: &rte to be deleted
904
 *
905
 * rte_free() deletes the given &rte from the routing table it's linked to.
906
 */
907
void
908
rte_free(rte *e)
909
{
910
  if (rta_is_cached(e->attrs))
911
    rta_free(e->attrs);
912
  sl_free(rte_slab, e);
913
}
914

    
915
static inline void
916
rte_free_quick(rte *e)
917
{
918
  rta_free(e->attrs);
919
  sl_free(rte_slab, e);
920
}
921

    
922
static int
923
rte_same(rte *x, rte *y)
924
{
925
  return
926
    x->attrs == y->attrs &&
927
    x->flags == y->flags &&
928
    x->pflags == y->pflags &&
929
    x->pref == y->pref &&
930
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
931
}
932

    
933
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
934

    
935
static void
936
rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
937
{
938
  struct proto *p = c->proto;
939
  struct rtable *table = c->table;
940
  struct proto_stats *stats = &c->stats;
941
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
942
  rte *before_old = NULL;
943
  rte *old_best = net->routes;
944
  rte *old = NULL;
945
  rte **k;
946

    
947
  k = &net->routes;                        /* Find and remove original route from the same protocol */
948
  while (old = *k)
949
    {
950
      if (old->attrs->src == src)
951
        {
952
          /* If there is the same route in the routing table but from
953
           * a different sender, then there are two paths from the
954
           * source protocol to this routing table through transparent
955
           * pipes, which is not allowed.
956
           *
957
           * We log that and ignore the route. If it is withdraw, we
958
           * ignore it completely (there might be 'spurious withdraws',
959
           * see FIXME in do_rte_announce())
960
           */
961
          if (old->sender->proto != p)
962
            {
963
              if (new)
964
                {
965
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
966
                      net->n.addr, table->name);
967
                  rte_free_quick(new);
968
                }
969
              return;
970
            }
971

    
972
          if (new && rte_same(old, new))
973
            {
974
              /* No changes, ignore the new route */
975

    
976
              if (!rte_is_filtered(new))
977
                {
978
                  stats->imp_updates_ignored++;
979
                  rte_trace_in(D_ROUTES, p, new, "ignored");
980
                }
981

    
982
              rte_free_quick(new);
983
              return;
984
            }
985
          *k = old->next;
986
          break;
987
        }
988
      k = &old->next;
989
      before_old = old;
990
    }
991

    
992
  if (!old)
993
    before_old = NULL;
994

    
995
  if (!old && !new)
996
    {
997
      stats->imp_withdraws_ignored++;
998
      return;
999
    }
1000

    
1001
  int new_ok = rte_is_ok(new);
1002
  int old_ok = rte_is_ok(old);
1003

    
1004
  struct channel_limit *l = &c->rx_limit;
1005
  if (l->action && !old && new)
1006
    {
1007
      u32 all_routes = stats->imp_routes + stats->filt_routes;
1008

    
1009
      if (all_routes >= l->limit)
1010
        channel_notify_limit(c, l, PLD_RX, all_routes);
1011

    
1012
      if (l->state == PLS_BLOCKED)
1013
        {
1014
          /* In receive limit the situation is simple, old is NULL so
1015
             we just free new and exit like nothing happened */
1016

    
1017
          stats->imp_updates_ignored++;
1018
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1019
          rte_free_quick(new);
1020
          return;
1021
        }
1022
    }
1023

    
1024
  l = &c->in_limit;
1025
  if (l->action && !old_ok && new_ok)
1026
    {
1027
      if (stats->imp_routes >= l->limit)
1028
        channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1029

    
1030
      if (l->state == PLS_BLOCKED)
1031
        {
1032
          /* In import limit the situation is more complicated. We
1033
             shouldn't just drop the route, we should handle it like
1034
             it was filtered. We also have to continue the route
1035
             processing if old or new is non-NULL, but we should exit
1036
             if both are NULL as this case is probably assumed to be
1037
             already handled. */
1038

    
1039
          stats->imp_updates_ignored++;
1040
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1041

    
1042
          if (c->in_keep_filtered)
1043
            new->flags |= REF_FILTERED;
1044
          else
1045
            { rte_free_quick(new); new = NULL; }
1046

    
1047
          /* Note that old && !new could be possible when
1048
             c->in_keep_filtered changed in the recent past. */
1049

    
1050
          if (!old && !new)
1051
            return;
1052

    
1053
          new_ok = 0;
1054
          goto skip_stats1;
1055
        }
1056
    }
1057

    
1058
  if (new_ok)
1059
    stats->imp_updates_accepted++;
1060
  else if (old_ok)
1061
    stats->imp_withdraws_accepted++;
1062
  else
1063
    stats->imp_withdraws_ignored++;
1064

    
1065
 skip_stats1:
1066

    
1067
  if (new)
1068
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1069
  if (old)
1070
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1071

    
1072
  if (table->config->sorted)
1073
    {
1074
      /* If routes are sorted, just insert new route to appropriate position */
1075
      if (new)
1076
        {
1077
          if (before_old && !rte_better(new, before_old))
1078
            k = &before_old->next;
1079
          else
1080
            k = &net->routes;
1081

    
1082
          for (; *k; k=&(*k)->next)
1083
            if (rte_better(new, *k))
1084
              break;
1085

    
1086
          new->next = *k;
1087
          *k = new;
1088
        }
1089
    }
1090
  else
1091
    {
1092
      /* If routes are not sorted, find the best route and move it on
1093
         the first position. There are several optimized cases. */
1094

    
1095
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1096
        goto do_recalculate;
1097

    
1098
      if (new && rte_better(new, old_best))
1099
        {
1100
          /* The first case - the new route is cleary optimal,
1101
             we link it at the first position */
1102

    
1103
          new->next = net->routes;
1104
          net->routes = new;
1105
        }
1106
      else if (old == old_best)
1107
        {
1108
          /* The second case - the old best route disappeared, we add the
1109
             new route (if we have any) to the list (we don't care about
1110
             position) and then we elect the new optimal route and relink
1111
             that route at the first position and announce it. New optimal
1112
             route might be NULL if there is no more routes */
1113

    
1114
        do_recalculate:
1115
          /* Add the new route to the list */
1116
          if (new)
1117
            {
1118
              new->next = net->routes;
1119
              net->routes = new;
1120
            }
1121

    
1122
          /* Find a new optimal route (if there is any) */
1123
          if (net->routes)
1124
            {
1125
              rte **bp = &net->routes;
1126
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1127
                if (rte_better(*k, *bp))
1128
                  bp = k;
1129

    
1130
              /* And relink it */
1131
              rte *best = *bp;
1132
              *bp = best->next;
1133
              best->next = net->routes;
1134
              net->routes = best;
1135
            }
1136
        }
1137
      else if (new)
1138
        {
1139
          /* The third case - the new route is not better than the old
1140
             best route (therefore old_best != NULL) and the old best
1141
             route was not removed (therefore old_best == net->routes).
1142
             We just link the new route after the old best route. */
1143

    
1144
          ASSERT(net->routes != NULL);
1145
          new->next = net->routes->next;
1146
          net->routes->next = new;
1147
        }
1148
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1149
    }
1150

    
1151
  if (new)
1152
    new->lastmod = now;
1153

    
1154
  /* Log the route change */
1155
  if (p->debug & D_ROUTES)
1156
    {
1157
      if (new_ok)
1158
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1159
      else if (old_ok)
1160
        {
1161
          if (old != old_best)
1162
            rte_trace(p, old, '>', "removed");
1163
          else if (rte_is_ok(net->routes))
1164
            rte_trace(p, old, '>', "removed [replaced]");
1165
          else
1166
            rte_trace(p, old, '>', "removed [sole]");
1167
        }
1168
    }
1169

    
1170
  /* Propagate the route change */
1171
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1172
  if (net->routes != old_best)
1173
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1174
  if (table->config->sorted)
1175
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1176
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1177

    
1178
  if (!net->routes &&
1179
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1180
      (table->gc_time + table->config->gc_min_time <= now))
1181
    rt_schedule_prune(table);
1182

    
1183
  if (old_ok && p->rte_remove)
1184
    p->rte_remove(net, old);
1185
  if (new_ok && p->rte_insert)
1186
    p->rte_insert(net, new);
1187

    
1188
  if (old)
1189
    rte_free_quick(old);
1190
}
1191

    
1192
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1193

    
1194
static inline void
1195
rte_update_lock(void)
1196
{
1197
  rte_update_nest_cnt++;
1198
}
1199

    
1200
static inline void
1201
rte_update_unlock(void)
1202
{
1203
  if (!--rte_update_nest_cnt)
1204
    lp_flush(rte_update_pool);
1205
}
1206

    
1207
static inline void
1208
rte_hide_dummy_routes(net *net, rte **dummy)
1209
{
1210
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1211
  {
1212
    *dummy = net->routes;
1213
    net->routes = (*dummy)->next;
1214
  }
1215
}
1216

    
1217
static inline void
1218
rte_unhide_dummy_routes(net *net, rte **dummy)
1219
{
1220
  if (*dummy)
1221
  {
1222
    (*dummy)->next = net->routes;
1223
    net->routes = *dummy;
1224
  }
1225
}
1226

    
1227
/**
1228
 * rte_update - enter a new update to a routing table
1229
 * @table: table to be updated
1230
 * @c: channel doing the update
1231
 * @net: network node
1232
 * @p: protocol submitting the update
1233
 * @src: protocol originating the update
1234
 * @new: a &rte representing the new route or %NULL for route removal.
1235
 *
1236
 * This function is called by the routing protocols whenever they discover
1237
 * a new route or wish to update/remove an existing route. The right announcement
1238
 * sequence is to build route attributes first (either un-cached with @aflags set
1239
 * to zero or a cached one using rta_lookup(); in this case please note that
1240
 * you need to increase the use count of the attributes yourself by calling
1241
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1242
 * the appropriate data and finally submit the new &rte by calling rte_update().
1243
 *
1244
 * @src specifies the protocol that originally created the route and the meaning
1245
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1246
 * same value as @new->attrs->proto. @p specifies the protocol that called
1247
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1248
 * stores @p in @new->sender;
1249
 *
1250
 * When rte_update() gets any route, it automatically validates it (checks,
1251
 * whether the network and next hop address are valid IP addresses and also
1252
 * whether a normal routing protocol doesn't try to smuggle a host or link
1253
 * scope route to the table), converts all protocol dependent attributes stored
1254
 * in the &rte to temporary extended attributes, consults import filters of the
1255
 * protocol to see if the route should be accepted and/or its attributes modified,
1256
 * stores the temporary attributes back to the &rte.
1257
 *
1258
 * Now, having a "public" version of the route, we
1259
 * automatically find any old route defined by the protocol @src
1260
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1261
 * recalculate the optimal route for this destination and finally broadcast
1262
 * the change (if any) to all routing protocols by calling rte_announce().
1263
 *
1264
 * All memory used for attribute lists and other temporary allocations is taken
1265
 * from a special linear pool @rte_update_pool and freed when rte_update()
1266
 * finishes.
1267
 */
1268

    
1269
void
1270
rte_update2(struct channel *c, net *net, rte *new, struct rte_src *src)
1271
{
1272
  struct proto *p = c->proto;
1273
  struct proto_stats *stats = &c->stats;
1274
  struct filter *filter = c->in_filter;
1275
  ea_list *tmpa = NULL;
1276
  rte *dummy = NULL;
1277

    
1278
  ASSERT(c->channel_state == CS_UP);
1279

    
1280
  rte_update_lock();
1281
  if (new)
1282
    {
1283
      new->sender = c;
1284

    
1285
      if (!new->pref)
1286
        new->pref = c->preference;
1287

    
1288
      stats->imp_updates_received++;
1289
      if (!rte_validate(new))
1290
        {
1291
          rte_trace_in(D_FILTERS, p, new, "invalid");
1292
          stats->imp_updates_invalid++;
1293
          goto drop;
1294
        }
1295

    
1296
      if (filter == FILTER_REJECT)
1297
        {
1298
          stats->imp_updates_filtered++;
1299
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1300

    
1301
          if (! c->in_keep_filtered)
1302
            goto drop;
1303

    
1304
          /* new is a private copy, i could modify it */
1305
          new->flags |= REF_FILTERED;
1306
        }
1307
      else
1308
        {
1309
          tmpa = make_tmp_attrs(new, rte_update_pool);
1310
          if (filter && (filter != FILTER_REJECT))
1311
            {
1312
              ea_list *old_tmpa = tmpa;
1313
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1314
              if (fr > F_ACCEPT)
1315
                {
1316
                  stats->imp_updates_filtered++;
1317
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1318

    
1319
                  if (! c->in_keep_filtered)
1320
                    goto drop;
1321

    
1322
                  new->flags |= REF_FILTERED;
1323
                }
1324
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1325
                src->proto->store_tmp_attrs(new, tmpa);
1326
            }
1327
        }
1328
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1329
        new->attrs = rta_lookup(new->attrs);
1330
      new->flags |= REF_COW;
1331
    }
1332
  else
1333
    {
1334
      stats->imp_withdraws_received++;
1335

    
1336
      if (!net || !src)
1337
        {
1338
          stats->imp_withdraws_ignored++;
1339
          rte_update_unlock();
1340
          return;
1341
        }
1342
    }
1343

    
1344
 recalc:
1345
  rte_hide_dummy_routes(net, &dummy);
1346
  rte_recalculate(c, net, new, src);
1347
  rte_unhide_dummy_routes(net, &dummy);
1348
  rte_update_unlock();
1349
  return;
1350

    
1351
 drop:
1352
  rte_free(new);
1353
  new = NULL;
1354
  goto recalc;
1355
}
1356

    
1357
/* Independent call to rte_announce(), used from next hop
1358
   recalculation, outside of rte_update(). new must be non-NULL */
1359
static inline void 
1360
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1361
               rte *new_best, rte *old_best)
1362
{
1363
  rte_update_lock();
1364
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1365
  rte_update_unlock();
1366
}
1367

    
1368
void
1369
rte_discard(rtable *t, rte *old)        /* Non-filtered route deletion, used during garbage collection */
1370
{
1371
  rte_update_lock();
1372
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1373
  rte_update_unlock();
1374
}
1375

    
1376
/* Check rtable for best route to given net whether it would be exported do p */
1377
int
1378
rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1379
{
1380
  net *n = net_find(t, a);
1381
  rte *rt = n ? n->routes : NULL;
1382

    
1383
  if (!rte_is_valid(rt))
1384
    return 0;
1385

    
1386
  rte_update_lock();
1387

    
1388
  /* Rest is stripped down export_filter() */
1389
  ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1390
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1391
  if (v == RIC_PROCESS)
1392
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1393

    
1394
   /* Discard temporary rte */
1395
  if (rt != n->routes)
1396
    rte_free(rt);
1397

    
1398
  rte_update_unlock();
1399

    
1400
  return v > 0;
1401
}
1402

    
1403

    
1404
/**
1405
 * rt_refresh_begin - start a refresh cycle
1406
 * @t: related routing table
1407
 * @c related channel
1408
 *
1409
 * This function starts a refresh cycle for given routing table and announce
1410
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1411
 * routes to the routing table (by rte_update()). After that, all protocol
1412
 * routes (more precisely routes with @c as @sender) not sent during the
1413
 * refresh cycle but still in the table from the past are pruned. This is
1414
 * implemented by marking all related routes as stale by REF_STALE flag in
1415
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1416
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1417
 */
1418
void
1419
rt_refresh_begin(rtable *t, struct channel *c)
1420
{
1421
  FIB_WALK(&t->fib, net, n)
1422
    {
1423
      rte *e;
1424
      for (e = n->routes; e; e = e->next)
1425
        if (e->sender == c)
1426
          e->flags |= REF_STALE;
1427
    }
1428
  FIB_WALK_END;
1429
}
1430

    
1431
/**
1432
 * rt_refresh_end - end a refresh cycle
1433
 * @t: related routing table
1434
 * @c: related channel
1435
 *
1436
 * This function ends a refresh cycle for given routing table and announce
1437
 * hook. See rt_refresh_begin() for description of refresh cycles.
1438
 */
1439
void
1440
rt_refresh_end(rtable *t, struct channel *c)
1441
{
1442
  int prune = 0;
1443

    
1444
  FIB_WALK(&t->fib, net, n)
1445
    {
1446
      rte *e;
1447
      for (e = n->routes; e; e = e->next)
1448
        if ((e->sender == c) && (e->flags & REF_STALE))
1449
          {
1450
            e->flags |= REF_DISCARD;
1451
            prune = 1;
1452
          }
1453
    }
1454
  FIB_WALK_END;
1455

    
1456
  if (prune)
1457
    rt_schedule_prune(t);
1458
}
1459

    
1460

    
1461
/**
1462
 * rte_dump - dump a route
1463
 * @e: &rte to be dumped
1464
 *
1465
 * This functions dumps contents of a &rte to debug output.
1466
 */
1467
void
1468
rte_dump(rte *e)
1469
{
1470
  net *n = e->net;
1471
  debug("%-1N ", n->n.addr);
1472
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1473
  rta_dump(e->attrs);
1474
  if (e->attrs->src->proto->proto->dump_attrs)
1475
    e->attrs->src->proto->proto->dump_attrs(e);
1476
  debug("\n");
1477
}
1478

    
1479
/**
1480
 * rt_dump - dump a routing table
1481
 * @t: routing table to be dumped
1482
 *
1483
 * This function dumps contents of a given routing table to debug output.
1484
 */
1485
void
1486
rt_dump(rtable *t)
1487
{
1488
  debug("Dump of routing table <%s>\n", t->name);
1489
#ifdef DEBUGGING
1490
  fib_check(&t->fib);
1491
#endif
1492
  FIB_WALK(&t->fib, net, n)
1493
    {
1494
      rte *e;
1495
      for(e=n->routes; e; e=e->next)
1496
        rte_dump(e);
1497
    }
1498
  FIB_WALK_END;
1499
  debug("\n");
1500
}
1501

    
1502
/**
1503
 * rt_dump_all - dump all routing tables
1504
 *
1505
 * This function dumps contents of all routing tables to debug output.
1506
 */
1507
void
1508
rt_dump_all(void)
1509
{
1510
  rtable *t;
1511

    
1512
  WALK_LIST(t, routing_tables)
1513
    rt_dump(t);
1514
}
1515

    
1516
static inline void
1517
rt_schedule_hcu(rtable *tab)
1518
{
1519
  if (tab->hcu_scheduled)
1520
    return;
1521

    
1522
  tab->hcu_scheduled = 1;
1523
  ev_schedule(tab->rt_event);
1524
}
1525

    
1526
static inline void
1527
rt_schedule_nhu(rtable *tab)
1528
{
1529
  if (tab->nhu_state == 0)
1530
    ev_schedule(tab->rt_event);
1531

    
1532
  /* state change 0->1, 2->3 */
1533
  tab->nhu_state |= 1;
1534
}
1535

    
1536
void
1537
rt_schedule_prune(rtable *tab)
1538
{
1539
  if (tab->prune_state == 0)
1540
    ev_schedule(tab->rt_event);
1541

    
1542
  /* state change 0->1, 2->3 */
1543
  tab->prune_state |= 1;
1544
}
1545

    
1546

    
1547
static void
1548
rt_event(void *ptr)
1549
{
1550
  rtable *tab = ptr;
1551

    
1552
  if (tab->hcu_scheduled)
1553
    rt_update_hostcache(tab);
1554

    
1555
  if (tab->nhu_state)
1556
    rt_next_hop_update(tab);
1557

    
1558
  if (tab->prune_state)
1559
    rt_prune_table(tab);
1560
}
1561

    
1562
void
1563
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1564
{
1565
  bzero(t, sizeof(*t));
1566
  t->name = name;
1567
  t->config = cf;
1568
  t->addr_type = cf ? cf->addr_type : NET_IP4;
1569
  fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1570
  init_list(&t->channels);
1571

    
1572
  if (cf)
1573
    {
1574
      t->rt_event = ev_new(p);
1575
      t->rt_event->hook = rt_event;
1576
      t->rt_event->data = t;
1577
      t->gc_time = now;
1578
    }
1579
}
1580

    
1581
/**
1582
 * rt_init - initialize routing tables
1583
 *
1584
 * This function is called during BIRD startup. It initializes the
1585
 * routing table module.
1586
 */
1587
void
1588
rt_init(void)
1589
{
1590
  rta_init();
1591
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1592
  rte_update_pool = lp_new(rt_table_pool, 4080);
1593
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1594
  init_list(&routing_tables);
1595
}
1596

    
1597

    
1598
/**
1599
 * rt_prune_table - prune a routing table
1600
 *
1601
 * The prune loop scans routing tables and removes routes belonging to flushing
1602
 * protocols, discarded routes and also stale network entries. It is called from
1603
 * rt_event(). The event is rescheduled if the current iteration do not finish
1604
 * the table. The pruning is directed by the prune state (@prune_state),
1605
 * specifying whether the prune cycle is scheduled or running, and there
1606
 * is also a persistent pruning iterator (@prune_fit).
1607
 *
1608
 * The prune loop is used also for channel flushing. For this purpose, the
1609
 * channels to flush are marked before the iteration and notified after the
1610
 * iteration.
1611
 */
1612
static void
1613
rt_prune_table(rtable *tab)
1614
{
1615
  struct fib_iterator *fit = &tab->prune_fit;
1616
  int limit = 512;
1617

    
1618
  struct channel *c;
1619
  node *n, *x;
1620

    
1621
  DBG("Pruning route table %s\n", tab->name);
1622
#ifdef DEBUGGING
1623
  fib_check(&tab->fib);
1624
#endif
1625

    
1626
  if (tab->prune_state == 0)
1627
    return;
1628

    
1629
  if (tab->prune_state == 1)
1630
  {
1631
    /* Mark channels to flush */
1632
    WALK_LIST2(c, n, tab->channels, table_node)
1633
      if (c->channel_state == CS_FLUSHING)
1634
        c->flush_active = 1;
1635

    
1636
    FIB_ITERATE_INIT(fit, &tab->fib);
1637
    tab->prune_state = 2;
1638
  }
1639

    
1640
again:
1641
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1642
    {
1643
      rte *e;
1644

    
1645
    rescan:
1646
      for (e=n->routes; e; e=e->next)
1647
        if (e->sender->flush_active || (e->flags & REF_DISCARD))
1648
          {
1649
            if (limit <= 0)
1650
              {
1651
                FIB_ITERATE_PUT(fit);
1652
                ev_schedule(tab->rt_event);
1653
                return;
1654
              }
1655

    
1656
            rte_discard(tab, e);
1657
            limit--;
1658

    
1659
            goto rescan;
1660
          }
1661

    
1662
      if (!n->routes)                /* Orphaned FIB entry */
1663
        {
1664
          FIB_ITERATE_PUT(fit);
1665
          fib_delete(&tab->fib, n);
1666
          goto again;
1667
        }
1668
    }
1669
  FIB_ITERATE_END;
1670

    
1671
#ifdef DEBUGGING
1672
  fib_check(&tab->fib);
1673
#endif
1674

    
1675
  tab->gc_counter = 0;
1676
  tab->gc_time = now;
1677

    
1678
  /* state change 2->0, 3->1 */
1679
  tab->prune_state &= 1;
1680

    
1681
  if (tab->prune_state > 0)
1682
    ev_schedule(tab->rt_event);
1683

    
1684
  /* FIXME: This should be handled in a better way */
1685
  rt_prune_sources();
1686

    
1687
  /* Close flushed channels */
1688
  WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1689
    if (c->flush_active)
1690
      {
1691
        c->flush_active = 0;
1692
        channel_set_state(c, CS_DOWN);
1693
      }
1694

    
1695
  return;
1696
}
1697

    
1698
void
1699
rt_preconfig(struct config *c)
1700
{
1701
  init_list(&c->tables);
1702

    
1703
  rt_new_table(cf_get_symbol("master4"), NET_IP4);
1704
  rt_new_table(cf_get_symbol("master6"), NET_IP6);
1705
}
1706

    
1707

    
1708
/*
1709
 * Some functions for handing internal next hop updates
1710
 * triggered by rt_schedule_nhu().
1711
 */
1712

    
1713
static inline int
1714
rta_next_hop_outdated(rta *a)
1715
{
1716
  struct hostentry *he = a->hostentry;
1717

    
1718
  if (!he)
1719
    return 0;
1720

    
1721
  if (!he->src)
1722
    return a->dest != RTD_UNREACHABLE;
1723

    
1724
  return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1725
    (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1726
    !mpnh_same(a->nexthops, he->src->nexthops);
1727
}
1728

    
1729
static inline void
1730
rta_apply_hostentry(rta *a, struct hostentry *he)
1731
{
1732
  a->hostentry = he;
1733
  a->iface = he->src ? he->src->iface : NULL;
1734
  a->gw = he->gw;
1735
  a->dest = he->dest;
1736
  a->igp_metric = he->igp_metric;
1737
  a->nexthops = he->src ? he->src->nexthops : NULL;
1738
}
1739

    
1740
static inline rte *
1741
rt_next_hop_update_rte(rtable *tab, rte *old)
1742
{
1743
  rta a;
1744
  memcpy(&a, old->attrs, sizeof(rta));
1745
  rta_apply_hostentry(&a, old->attrs->hostentry);
1746
  a.aflags = 0;
1747

    
1748
  rte *e = sl_alloc(rte_slab);
1749
  memcpy(e, old, sizeof(rte));
1750
  e->attrs = rta_lookup(&a);
1751

    
1752
  return e;
1753
}
1754

    
1755
static inline int
1756
rt_next_hop_update_net(rtable *tab, net *n)
1757
{
1758
  rte **k, *e, *new, *old_best, **new_best;
1759
  int count = 0;
1760
  int free_old_best = 0;
1761

    
1762
  old_best = n->routes;
1763
  if (!old_best)
1764
    return 0;
1765

    
1766
  for (k = &n->routes; e = *k; k = &e->next)
1767
    if (rta_next_hop_outdated(e->attrs))
1768
      {
1769
        new = rt_next_hop_update_rte(tab, e);
1770
        *k = new;
1771

    
1772
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1773
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1774

    
1775
        /* Call a pre-comparison hook */
1776
        /* Not really an efficient way to compute this */
1777
        if (e->attrs->src->proto->rte_recalculate)
1778
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1779

    
1780
        if (e != old_best)
1781
          rte_free_quick(e);
1782
        else /* Freeing of the old best rte is postponed */
1783
          free_old_best = 1;
1784

    
1785
        e = new;
1786
        count++;
1787
      }
1788

    
1789
  if (!count)
1790
    return 0;
1791

    
1792
  /* Find the new best route */
1793
  new_best = NULL;
1794
  for (k = &n->routes; e = *k; k = &e->next)
1795
    {
1796
      if (!new_best || rte_better(e, *new_best))
1797
        new_best = k;
1798
    }
1799

    
1800
  /* Relink the new best route to the first position */
1801
  new = *new_best;
1802
  if (new != n->routes)
1803
    {
1804
      *new_best = new->next;
1805
      new->next = n->routes;
1806
      n->routes = new;
1807
    }
1808

    
1809
  /* Announce the new best route */
1810
  if (new != old_best)
1811
    {
1812
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1813
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1814
    }
1815

    
1816
  /* FIXME: Better announcement of merged routes */
1817
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1818

    
1819
   if (free_old_best)
1820
    rte_free_quick(old_best);
1821

    
1822
  return count;
1823
}
1824

    
1825
static void
1826
rt_next_hop_update(rtable *tab)
1827
{
1828
  struct fib_iterator *fit = &tab->nhu_fit;
1829
  int max_feed = 32;
1830

    
1831
  if (tab->nhu_state == 0)
1832
    return;
1833

    
1834
  if (tab->nhu_state == 1)
1835
    {
1836
      FIB_ITERATE_INIT(fit, &tab->fib);
1837
      tab->nhu_state = 2;
1838
    }
1839

    
1840
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1841
    {
1842
      if (max_feed <= 0)
1843
        {
1844
          FIB_ITERATE_PUT(fit);
1845
          ev_schedule(tab->rt_event);
1846
          return;
1847
        }
1848
      max_feed -= rt_next_hop_update_net(tab, n);
1849
    }
1850
  FIB_ITERATE_END;
1851

    
1852
  /* state change 2->0, 3->1 */
1853
  tab->nhu_state &= 1;
1854

    
1855
  if (tab->nhu_state > 0)
1856
    ev_schedule(tab->rt_event);
1857
}
1858

    
1859

    
1860
struct rtable_config *
1861
rt_new_table(struct symbol *s, uint addr_type)
1862
{
1863
  /* Hack that allows to 'redefine' the master table */
1864
  if ((s->class == SYM_TABLE) &&
1865
      (s->def == new_config->def_tables[addr_type]) &&
1866
      ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1867
    return s->def;
1868

    
1869
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1870

    
1871
  cf_define_symbol(s, SYM_TABLE, c);
1872
  c->name = s->name;
1873
  c->addr_type = addr_type;
1874
  c->gc_max_ops = 1000;
1875
  c->gc_min_time = 5;
1876

    
1877
  add_tail(&new_config->tables, &c->n);
1878

    
1879
  /* First table of each type is kept as default */
1880
  if (! new_config->def_tables[addr_type])
1881
    new_config->def_tables[addr_type] = c;
1882

    
1883
  return c;
1884
}
1885

    
1886
/**
1887
 * rt_lock_table - lock a routing table
1888
 * @r: routing table to be locked
1889
 *
1890
 * Lock a routing table, because it's in use by a protocol,
1891
 * preventing it from being freed when it gets undefined in a new
1892
 * configuration.
1893
 */
1894
void
1895
rt_lock_table(rtable *r)
1896
{
1897
  r->use_count++;
1898
}
1899

    
1900
/**
1901
 * rt_unlock_table - unlock a routing table
1902
 * @r: routing table to be unlocked
1903
 *
1904
 * Unlock a routing table formerly locked by rt_lock_table(),
1905
 * that is decrease its use count and delete it if it's scheduled
1906
 * for deletion by configuration changes.
1907
 */
1908
void
1909
rt_unlock_table(rtable *r)
1910
{
1911
  if (!--r->use_count && r->deleted)
1912
    {
1913
      struct config *conf = r->deleted;
1914
      DBG("Deleting routing table %s\n", r->name);
1915
      r->config->table = NULL;
1916
      if (r->hostcache)
1917
        rt_free_hostcache(r);
1918
      rem_node(&r->n);
1919
      fib_free(&r->fib);
1920
      rfree(r->rt_event);
1921
      mb_free(r);
1922
      config_del_obstacle(conf);
1923
    }
1924
}
1925

    
1926
/**
1927
 * rt_commit - commit new routing table configuration
1928
 * @new: new configuration
1929
 * @old: original configuration or %NULL if it's boot time config
1930
 *
1931
 * Scan differences between @old and @new configuration and modify
1932
 * the routing tables according to these changes. If @new defines a
1933
 * previously unknown table, create it, if it omits a table existing
1934
 * in @old, schedule it for deletion (it gets deleted when all protocols
1935
 * disconnect from it by calling rt_unlock_table()), if it exists
1936
 * in both configurations, leave it unchanged.
1937
 */
1938
void
1939
rt_commit(struct config *new, struct config *old)
1940
{
1941
  struct rtable_config *o, *r;
1942

    
1943
  DBG("rt_commit:\n");
1944
  if (old)
1945
    {
1946
      WALK_LIST(o, old->tables)
1947
        {
1948
          rtable *ot = o->table;
1949
          if (!ot->deleted)
1950
            {
1951
              struct symbol *sym = cf_find_symbol(new, o->name);
1952
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
1953
                {
1954
                  DBG("\t%s: same\n", o->name);
1955
                  r = sym->def;
1956
                  r->table = ot;
1957
                  ot->name = r->name;
1958
                  ot->config = r;
1959
                  if (o->sorted != r->sorted)
1960
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1961
                }
1962
              else
1963
                {
1964
                  DBG("\t%s: deleted\n", o->name);
1965
                  ot->deleted = old;
1966
                  config_add_obstacle(old);
1967
                  rt_lock_table(ot);
1968
                  rt_unlock_table(ot);
1969
                }
1970
            }
1971
        }
1972
    }
1973

    
1974
  WALK_LIST(r, new->tables)
1975
    if (!r->table)
1976
      {
1977
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1978
        DBG("\t%s: created\n", r->name);
1979
        rt_setup(rt_table_pool, t, r->name, r);
1980
        add_tail(&routing_tables, &t->n);
1981
        r->table = t;
1982
      }
1983
  DBG("\tdone\n");
1984
}
1985

    
1986
static inline void
1987
do_feed_channel(struct channel *c, net *n, rte *e)
1988
{
1989
  rte_update_lock();
1990
  if (c->ra_mode == RA_ACCEPTED)
1991
    rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
1992
  else if (c->ra_mode == RA_MERGED)
1993
    rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
1994
  else /* RA_BASIC */
1995
    rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
1996
  rte_update_unlock();
1997
}
1998

    
1999
/**
2000
 * rt_feed_channel - advertise all routes to a channel
2001
 * @c: channel to be fed
2002
 *
2003
 * This function performs one pass of advertisement of routes to a channel that
2004
 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2005
 * has something to do. (We avoid transferring all the routes in single pass in
2006
 * order not to monopolize CPU time.)
2007
 */
2008
int
2009
rt_feed_channel(struct channel *c)
2010
{
2011
  struct fib_iterator *fit = &c->feed_fit;
2012
  int max_feed = 256;
2013

    
2014
  ASSERT(c->export_state == ES_FEEDING);
2015

    
2016
  if (!c->feed_active)
2017
    {
2018
      FIB_ITERATE_INIT(fit, &c->table->fib);
2019
      c->feed_active = 1;
2020
    }
2021

    
2022
  FIB_ITERATE_START(&c->table->fib, fit, net, n)
2023
    {
2024
      rte *e = n->routes;
2025
      if (max_feed <= 0)
2026
        {
2027
          FIB_ITERATE_PUT(fit);
2028
          return 0;
2029
        }
2030

    
2031
      /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2032

    
2033
      if ((c->ra_mode == RA_OPTIMAL) ||
2034
          (c->ra_mode == RA_ACCEPTED) ||
2035
          (c->ra_mode == RA_MERGED))
2036
        if (rte_is_valid(e))
2037
          {
2038
            /* In the meantime, the protocol may fell down */
2039
            if (c->export_state != ES_FEEDING)
2040
              goto done;
2041

    
2042
            do_feed_channel(c, n, e);
2043
            max_feed--;
2044
          }
2045

    
2046
      if (c->ra_mode == RA_ANY)
2047
        for(e = n->routes; e; e = e->next)
2048
          {
2049
            /* In the meantime, the protocol may fell down */
2050
            if (c->export_state != ES_FEEDING)
2051
              goto done;
2052

    
2053
            if (!rte_is_valid(e))
2054
              continue;
2055

    
2056
            do_feed_channel(c, n, e);
2057
            max_feed--;
2058
          }
2059
    }
2060
  FIB_ITERATE_END;
2061

    
2062
done:
2063
  c->feed_active = 0;
2064
  return 1;
2065
}
2066

    
2067
/**
2068
 * rt_feed_baby_abort - abort protocol feeding
2069
 * @c: channel
2070
 *
2071
 * This function is called by the protocol code when the protocol stops or
2072
 * ceases to exist during the feeding.
2073
 */
2074
void
2075
rt_feed_channel_abort(struct channel *c)
2076
{
2077
  if (c->feed_active)
2078
    {
2079
      /* Unlink the iterator */
2080
      fit_get(&c->table->fib, &c->feed_fit);
2081
      c->feed_active = 0;
2082
    }
2083
}
2084

    
2085
static inline unsigned
2086
ptr_hash(void *ptr)
2087
{
2088
  uintptr_t p = (uintptr_t) ptr;
2089
  return p ^ (p << 8) ^ (p >> 16);
2090
}
2091

    
2092
static inline u32
2093
hc_hash(ip_addr a, rtable *dep)
2094
{
2095
  return ipa_hash(a) ^ ptr_hash(dep);
2096
}
2097

    
2098
static inline void
2099
hc_insert(struct hostcache *hc, struct hostentry *he)
2100
{
2101
  uint k = he->hash_key >> hc->hash_shift;
2102
  he->next = hc->hash_table[k];
2103
  hc->hash_table[k] = he;
2104
}
2105

    
2106
static inline void
2107
hc_remove(struct hostcache *hc, struct hostentry *he)
2108
{
2109
  struct hostentry **hep;
2110
  uint k = he->hash_key >> hc->hash_shift;
2111

    
2112
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2113
  *hep = he->next;
2114
}
2115

    
2116
#define HC_DEF_ORDER 10
2117
#define HC_HI_MARK *4
2118
#define HC_HI_STEP 2
2119
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2120
#define HC_LO_MARK /5
2121
#define HC_LO_STEP 2
2122
#define HC_LO_ORDER 10
2123

    
2124
static void
2125
hc_alloc_table(struct hostcache *hc, unsigned order)
2126
{
2127
  unsigned hsize = 1 << order;
2128
  hc->hash_order = order;
2129
  hc->hash_shift = 32 - order;
2130
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
2131
  hc->hash_min = (order <= HC_LO_ORDER) ?  0 : (hsize HC_LO_MARK);
2132

    
2133
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2134
}
2135

    
2136
static void
2137
hc_resize(struct hostcache *hc, unsigned new_order)
2138
{
2139
  unsigned old_size = 1 << hc->hash_order;
2140
  struct hostentry **old_table = hc->hash_table;
2141
  struct hostentry *he, *hen;
2142
  int i;
2143

    
2144
  hc_alloc_table(hc, new_order);
2145
  for (i = 0; i < old_size; i++)
2146
    for (he = old_table[i]; he != NULL; he=hen)
2147
      {
2148
        hen = he->next;
2149
        hc_insert(hc, he);
2150
      }
2151
  mb_free(old_table);
2152
}
2153

    
2154
static struct hostentry *
2155
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2156
{
2157
  struct hostentry *he = sl_alloc(hc->slab);
2158

    
2159
  he->addr = a;
2160
  he->link = ll;
2161
  he->tab = dep;
2162
  he->hash_key = k;
2163
  he->uc = 0;
2164
  he->src = NULL;
2165

    
2166
  add_tail(&hc->hostentries, &he->ln);
2167
  hc_insert(hc, he);
2168

    
2169
  hc->hash_items++;
2170
  if (hc->hash_items > hc->hash_max)
2171
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2172

    
2173
  return he;
2174
}
2175

    
2176
static void
2177
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2178
{
2179
  rta_free(he->src);
2180

    
2181
  rem_node(&he->ln);
2182
  hc_remove(hc, he);
2183
  sl_free(hc->slab, he);
2184

    
2185
  hc->hash_items--;
2186
  if (hc->hash_items < hc->hash_min)
2187
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2188
}
2189

    
2190
static void
2191
rt_init_hostcache(rtable *tab)
2192
{
2193
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2194
  init_list(&hc->hostentries);
2195

    
2196
  hc->hash_items = 0;
2197
  hc_alloc_table(hc, HC_DEF_ORDER);
2198
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2199

    
2200
  hc->lp = lp_new(rt_table_pool, 1008);
2201
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2202

    
2203
  tab->hostcache = hc;
2204
}
2205

    
2206
static void
2207
rt_free_hostcache(rtable *tab)
2208
{
2209
  struct hostcache *hc = tab->hostcache;
2210

    
2211
  node *n;
2212
  WALK_LIST(n, hc->hostentries)
2213
    {
2214
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2215
      rta_free(he->src);
2216

    
2217
      if (he->uc)
2218
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2219
    }
2220

    
2221
  rfree(hc->slab);
2222
  rfree(hc->lp);
2223
  mb_free(hc->hash_table);
2224
  mb_free(hc);
2225
}
2226

    
2227
static void
2228
rt_notify_hostcache(rtable *tab, net *net)
2229
{
2230
  if (tab->hcu_scheduled)
2231
    return;
2232

    
2233
  if (trie_match_net(tab->hostcache->trie, net->n.addr))
2234
    rt_schedule_hcu(tab);
2235
}
2236

    
2237
static int
2238
if_local_addr(ip_addr a, struct iface *i)
2239
{
2240
  struct ifa *b;
2241

    
2242
  WALK_LIST(b, i->addrs)
2243
    if (ipa_equal(a, b->ip))
2244
      return 1;
2245

    
2246
  return 0;
2247
}
2248

    
2249
static u32 
2250
rt_get_igp_metric(rte *rt)
2251
{
2252
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2253

    
2254
  if (ea)
2255
    return ea->u.data;
2256

    
2257
  rta *a = rt->attrs;
2258

    
2259
#ifdef CONFIG_OSPF
2260
  if ((a->source == RTS_OSPF) ||
2261
      (a->source == RTS_OSPF_IA) ||
2262
      (a->source == RTS_OSPF_EXT1))
2263
    return rt->u.ospf.metric1;
2264
#endif
2265

    
2266
#ifdef CONFIG_RIP
2267
  if (a->source == RTS_RIP)
2268
    return rt->u.rip.metric;
2269
#endif
2270

    
2271
  /* Device routes */
2272
  if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2273
    return 0;
2274

    
2275
  return IGP_METRIC_UNKNOWN;
2276
}
2277

    
2278
static int
2279
rt_update_hostentry(rtable *tab, struct hostentry *he)
2280
{
2281
  rta *old_src = he->src;
2282
  int pxlen = 0;
2283

    
2284
  /* Reset the hostentry */
2285
  he->src = NULL;
2286
  he->gw = IPA_NONE;
2287
  he->dest = RTD_UNREACHABLE;
2288
  he->igp_metric = 0;
2289

    
2290
  net_addr he_addr;
2291
  net_fill_ip_host(&he_addr, he->addr);
2292
  net *n = net_route(tab, &he_addr);
2293
  if (n)
2294
    {
2295
      rte *e = n->routes;
2296
      rta *a = e->attrs;
2297
      pxlen = n->n.addr->pxlen;
2298

    
2299
      if (a->hostentry)
2300
        {
2301
          /* Recursive route should not depend on another recursive route */
2302
          log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2303
              he->addr, n->n.addr);
2304
          goto done;
2305
        }
2306

    
2307
      if (a->dest == RTD_DEVICE)
2308
        {
2309
          if (if_local_addr(he->addr, a->iface))
2310
            {
2311
              /* The host address is a local address, this is not valid */
2312
              log(L_WARN "Next hop address %I is a local address of iface %s",
2313
                  he->addr, a->iface->name);
2314
              goto done;
2315
                  }
2316

    
2317
          /* The host is directly reachable, use link as a gateway */
2318
          he->gw = he->link;
2319
          he->dest = RTD_ROUTER;
2320
        }
2321
      else
2322
        {
2323
          /* The host is reachable through some route entry */
2324
          he->gw = a->gw;
2325
          he->dest = a->dest;
2326
        }
2327

    
2328
      he->src = rta_clone(a);
2329
      he->igp_metric = rt_get_igp_metric(e);
2330
    }
2331

    
2332
 done:
2333
  /* Add a prefix range to the trie */
2334
  trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2335

    
2336
  rta_free(old_src);
2337
  return old_src != he->src;
2338
}
2339

    
2340
static void
2341
rt_update_hostcache(rtable *tab)
2342
{
2343
  struct hostcache *hc = tab->hostcache;
2344
  struct hostentry *he;
2345
  node *n, *x;
2346

    
2347
  /* Reset the trie */
2348
  lp_flush(hc->lp);
2349
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2350

    
2351
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2352
    {
2353
      he = SKIP_BACK(struct hostentry, ln, n);
2354
      if (!he->uc)
2355
        {
2356
          hc_delete_hostentry(hc, he);
2357
          continue;
2358
        }
2359

    
2360
      if (rt_update_hostentry(tab, he))
2361
        rt_schedule_nhu(he->tab);
2362
    }
2363

    
2364
  tab->hcu_scheduled = 0;
2365
}
2366

    
2367
static struct hostentry *
2368
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2369
{
2370
  struct hostentry *he;
2371

    
2372
  if (!tab->hostcache)
2373
    rt_init_hostcache(tab);
2374

    
2375
  u32 k = hc_hash(a, dep);
2376
  struct hostcache *hc = tab->hostcache;
2377
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2378
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2379
      return he;
2380

    
2381
  he = hc_new_hostentry(hc, a, ll, dep, k);
2382
  rt_update_hostentry(tab, he);
2383
  return he;
2384
}
2385

    
2386
void
2387
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2388
{
2389
  rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2390
}
2391

    
2392

    
2393
/*
2394
 *  CLI commands
2395
 */
2396

    
2397
static void
2398
rt_format_via(rte *e, byte *via)
2399
{
2400
  rta *a = e->attrs;
2401

    
2402
  switch (a->dest)
2403
    {
2404
    case RTD_ROUTER:        bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2405
    case RTD_DEVICE:        bsprintf(via, "dev %s", a->iface->name); break;
2406
    case RTD_BLACKHOLE:        bsprintf(via, "blackhole"); break;
2407
    case RTD_UNREACHABLE:        bsprintf(via, "unreachable"); break;
2408
    case RTD_PROHIBIT:        bsprintf(via, "prohibited"); break;
2409
    case RTD_MULTIPATH:        bsprintf(via, "multipath"); break;
2410
    default:                bsprintf(via, "???");
2411
    }
2412
}
2413

    
2414
static void
2415
rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2416
{
2417
  byte via[IPA_MAX_TEXT_LENGTH+32];
2418
  byte from[IPA_MAX_TEXT_LENGTH+8];
2419
  byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2420
  rta *a = e->attrs;
2421
  int primary = (e->net->routes == e);
2422
  int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2423
  void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2424
  struct mpnh *nh;
2425

    
2426
  rt_format_via(e, via);
2427
  tm_format_datetime(tm, &config->tf_route, e->lastmod);
2428
  if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2429
    bsprintf(from, " from %I", a->from);
2430
  else
2431
    from[0] = 0;
2432

    
2433
  get_route_info = a->src->proto->proto->get_route_info;
2434
  if (get_route_info || d->verbose)
2435
    {
2436
      /* Need to normalize the extended attributes */
2437
      ea_list *t = tmpa;
2438
      t = ea_append(t, a->eattrs);
2439
      tmpa = alloca(ea_scan(t));
2440
      ea_merge(t, tmpa);
2441
      ea_sort(tmpa);
2442
    }
2443
  if (get_route_info)
2444
    get_route_info(e, info, tmpa);
2445
  else
2446
    bsprintf(info, " (%d)", e->pref);
2447
  cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name,
2448
             tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2449
  for (nh = a->nexthops; nh; nh = nh->next)
2450
    cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2451
  if (d->verbose)
2452
    rta_show(c, a, tmpa);
2453
}
2454

    
2455
static void
2456
rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2457
{
2458
  rte *e, *ee;
2459
  byte ia[NET_MAX_TEXT_LENGTH+1];
2460
  struct ea_list *tmpa;
2461
  struct channel *ec = d->export_channel;
2462
  int first = 1;
2463
  int pass = 0;
2464

    
2465
  bsprintf(ia, "%N", n->n.addr);
2466

    
2467

    
2468
  for (e = n->routes; e; e = e->next)
2469
    {
2470
      if (rte_is_filtered(e) != d->filtered)
2471
        continue;
2472

    
2473
      d->rt_counter++;
2474
      d->net_counter += first;
2475
      first = 0;
2476

    
2477
      if (pass)
2478
        continue;
2479

    
2480
      ee = e;
2481
      rte_update_lock();                /* We use the update buffer for filtering */
2482
      tmpa = make_tmp_attrs(e, rte_update_pool);
2483

    
2484
      /* Special case for merged export */
2485
      if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED))
2486
        {
2487
          rte *rt_free;
2488
          e = rt_export_merged(ec, n, &rt_free, &tmpa, 1);
2489
          pass = 1;
2490

    
2491
          if (!e)
2492
          { e = ee; goto skip; }
2493
        }
2494
      else if (d->export_mode)
2495
        {
2496
          struct proto *ep = d->export_protocol;
2497
          int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2498

    
2499
          if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED)
2500
            pass = 1;
2501

    
2502
          if (ic < 0)
2503
            goto skip;
2504

    
2505
          if (d->export_mode > RSEM_PREEXPORT)
2506
            {
2507
              /*
2508
               * FIXME - This shows what should be exported according to current
2509
               * filters, but not what was really exported. 'configure soft'
2510
               * command may change the export filter and do not update routes.
2511
               */
2512
              int do_export = (ic > 0) ||
2513
                (f_run(ec->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2514

    
2515
              if (do_export != (d->export_mode == RSEM_EXPORT))
2516
                goto skip;
2517

    
2518
              if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_ACCEPTED))
2519
                pass = 1;
2520
            }
2521
        }
2522

    
2523
      if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2524
        goto skip;
2525

    
2526
      if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2527
        goto skip;
2528

    
2529
      d->show_counter++;
2530
      if (d->stats < 2)
2531
        rt_show_rte(c, ia, e, d, tmpa);
2532
      ia[0] = 0;
2533

    
2534
    skip:
2535
      if (e != ee)
2536
      {
2537
        rte_free(e);
2538
        e = ee;
2539
      }
2540
      rte_update_unlock();
2541

    
2542
      if (d->primary_only)
2543
        break;
2544
    }
2545
}
2546

    
2547
static struct channel *
2548
rt_show_export_channel(struct rt_show_data *d)
2549
{
2550
  if (! d->export_protocol->rt_notify)
2551
    return NULL;
2552

    
2553
  return proto_find_channel_by_table(d->export_protocol, d->table);
2554
}
2555

    
2556
static void
2557
rt_show_cont(struct cli *c)
2558
{
2559
  struct rt_show_data *d = c->rover;
2560
#ifdef DEBUGGING
2561
  unsigned max = 4;
2562
#else
2563
  unsigned max = 64;
2564
#endif
2565
  struct fib *fib = &d->table->fib;
2566
  struct fib_iterator *it = &d->fit;
2567

    
2568
  if (d->export_mode)
2569
    {
2570
      /* Ensure we have current export channel */
2571
      d->export_channel = rt_show_export_channel(d);
2572
      if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2573
        {
2574
          cli_printf(c, 8005, "Channel is down");
2575
          goto done;
2576
        }
2577
    }
2578

    
2579
  FIB_ITERATE_START(fib, it, net, n)
2580
    {
2581
      if (!max--)
2582
        {
2583
          FIB_ITERATE_PUT(it);
2584
          return;
2585
        }
2586
      rt_show_net(c, n, d);
2587
    }
2588
  FIB_ITERATE_END;
2589
  if (d->stats)
2590
    cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2591
  else
2592
    cli_printf(c, 0, "");
2593
done:
2594
  c->cont = c->cleanup = NULL;
2595
}
2596

    
2597
static void
2598
rt_show_cleanup(struct cli *c)
2599
{
2600
  struct rt_show_data *d = c->rover;
2601

    
2602
  /* Unlink the iterator */
2603
  fit_get(&d->table->fib, &d->fit);
2604
}
2605

    
2606
static inline rtable *
2607
rt_show_get_table(struct proto *p)
2608
{
2609
  /* FIXME: Use a better way to handle multi-channel protocols */
2610

    
2611
  if (p->main_channel)
2612
    return p->main_channel->table;
2613

    
2614
  if (!EMPTY_LIST(p->channels))
2615
    return ((struct channel *) HEAD(p->channels))->table;
2616

    
2617
  return NULL;
2618
}
2619

    
2620
void
2621
rt_show(struct rt_show_data *d)
2622
{
2623
  net *n;
2624

    
2625
  /* Default is either a master table or a table related to a respective protocol */
2626
  if (!d->table && d->export_protocol) d->table = rt_show_get_table(d->export_protocol);
2627
  if (!d->table && d->show_protocol) d->table = rt_show_get_table(d->show_protocol);
2628
  if (!d->table) d->table = config->def_tables[NET_IP4]->table; /* FIXME: iterate through all tables ? */
2629

    
2630
  /* Filtered routes are neither exported nor have sensible ordering */
2631
  if (d->filtered && (d->export_mode || d->primary_only))
2632
    cli_msg(0, "");
2633

    
2634
  if (!d->addr)
2635
    {
2636
      FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2637
      this_cli->cont = rt_show_cont;
2638
      this_cli->cleanup = rt_show_cleanup;
2639
      this_cli->rover = d;
2640
    }
2641
  else
2642
    {
2643
      if (d->export_mode)
2644
        {
2645
          /* Find channel associated with the export protocol */
2646
          d->export_channel = rt_show_export_channel(d);
2647
          if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2648
            {
2649
              cli_msg(8005, "Channel is down");
2650
              return;
2651
            }
2652
        }
2653

    
2654
      if (d->show_for)
2655
        n = net_route(d->table, d->addr);
2656
      else
2657
        n = net_find(d->table, d->addr);
2658

    
2659
      if (n)
2660
        rt_show_net(this_cli, n, d);
2661

    
2662
      if (d->rt_counter)
2663
        cli_msg(0, "");
2664
      else
2665
        cli_msg(8001, "Network not in table");
2666
    }
2667
}
2668

    
2669
/*
2670
 *  Documentation for functions declared inline in route.h
2671
 */
2672
#if 0
2673

2674
/**
2675
 * net_find - find a network entry
2676
 * @tab: a routing table
2677
 * @addr: address of the network
2678
 *
2679
 * net_find() looks up the given network in routing table @tab and
2680
 * returns a pointer to its &net entry or %NULL if no such network
2681
 * exists.
2682
 */
2683
static inline net *net_find(rtable *tab, net_addr *addr)
2684
{ DUMMY; }
2685

2686
/**
2687
 * net_get - obtain a network entry
2688
 * @tab: a routing table
2689
 * @addr: address of the network
2690
 *
2691
 * net_get() looks up the given network in routing table @tab and
2692
 * returns a pointer to its &net entry. If no such entry exists, it's
2693
 * created.
2694
 */
2695
static inline net *net_get(rtable *tab, net_addr *addr)
2696
{ DUMMY; }
2697

2698
/**
2699
 * rte_cow - copy a route for writing
2700
 * @r: a route entry to be copied
2701
 *
2702
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2703
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2704
 * just returned unchanged, else a new temporary entry with the same contents
2705
 * is created.
2706
 *
2707
 * The primary use of this function is inside the filter machinery -- when
2708
 * a filter wants to modify &rte contents (to change the preference or to
2709
 * attach another set of attributes), it must ensure that the &rte is not
2710
 * shared with anyone else (and especially that it isn't stored in any routing
2711
 * table).
2712
 *
2713
 * Result: a pointer to the new writable &rte.
2714
 */
2715
static inline rte * rte_cow(rte *r)
2716
{ DUMMY; }
2717

2718
#endif