Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ d14f8c3c

History | View | Annotate | Download (67.6 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/cli.h"
37
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
45

    
46
pool *rt_table_pool;
47

    
48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

    
51
static list routing_tables;
52

    
53
static byte *rt_format_via(rte *e);
54
static void rt_free_hostcache(rtable *tab);
55
static void rt_notify_hostcache(rtable *tab, net *net);
56
static void rt_update_hostcache(rtable *tab);
57
static void rt_next_hop_update(rtable *tab);
58
static inline void rt_prune_table(rtable *tab);
59

    
60

    
61
static inline struct ea_list *
62
make_tmp_attrs(struct rte *rt, struct linpool *pool)
63
{
64
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
65
  mta = rt->attrs->src->proto->make_tmp_attrs;
66
  return mta ? mta(rt, pool) : NULL;
67
}
68

    
69

    
70
/* Like fib_route(), but skips empty net entries */
71
static inline void *
72
net_route_ip4(struct fib *f, net_addr_ip4 *n)
73
{
74
  net *r;
75

    
76
  while (r = fib_find(f, (net_addr *) n),
77
         !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
78
  {
79
    n->pxlen--;
80
    ip4_clrbit(&n->prefix, n->pxlen);
81
  }
82

    
83
  return r;
84
}
85

    
86
static inline void *
87
net_route_ip6(struct fib *f, net_addr_ip6 *n)
88
{
89
  net *r;
90

    
91
  while (r = fib_find(f, (net_addr *) n),
92
         !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
93
  {
94
    n->pxlen--;
95
    ip6_clrbit(&n->prefix, n->pxlen);
96
  }
97

    
98
  return r;
99
}
100

    
101
void *
102
net_route(rtable *tab, const net_addr *n)
103
{
104
  ASSERT(tab->addr_type == n->type);
105

    
106
  net_addr *n0 = alloca(n->length);
107
  net_copy(n0, n);
108

    
109
  switch (n->type)
110
  {
111
  case NET_IP4:
112
  case NET_VPN4:
113
  case NET_ROA4:
114
    return net_route_ip4(&tab->fib, (net_addr_ip4 *) n0);
115

    
116
  case NET_IP6:
117
  case NET_VPN6:
118
  case NET_ROA6:
119
    return net_route_ip6(&tab->fib, (net_addr_ip6 *) n0);
120

    
121
  default:
122
    return NULL;
123
  }
124
}
125

    
126

    
127
static int
128
net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
129
{
130
  struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
131
  struct fib_node *fn;
132
  int anything = 0;
133

    
134
  while (1)
135
  {
136
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
137
    {
138
      net_addr_roa4 *roa = (void *) fn->addr;
139
      net *r = fib_node_to_user(&tab->fib, fn);
140

    
141
      if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
142
      {
143
        anything = 1;
144
        if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
145
          return ROA_VALID;
146
      }
147
    }
148

    
149
    if (n.pxlen == 0)
150
      break;
151

    
152
    n.pxlen--;
153
    ip4_clrbit(&n.prefix, n.pxlen);
154
  }
155

    
156
  return anything ? ROA_INVALID : ROA_UNKNOWN;
157
}
158

    
159
static int
160
net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
161
{
162
  struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
163
  struct fib_node *fn;
164
  int anything = 0;
165

    
166
  while (1)
167
  {
168
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
169
    {
170
      net_addr_roa6 *roa = (void *) fn->addr;
171
      net *r = fib_node_to_user(&tab->fib, fn);
172

    
173
      if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
174
      {
175
        anything = 1;
176
        if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
177
          return ROA_VALID;
178
      }
179
    }
180

    
181
    if (n.pxlen == 0)
182
      break;
183

    
184
    n.pxlen--;
185
    ip6_clrbit(&n.prefix, n.pxlen);
186
  }
187

    
188
  return anything ? ROA_INVALID : ROA_UNKNOWN;
189
}
190

    
191
/**
192
 * roa_check - check validity of route origination in a ROA table
193
 * @tab: ROA table
194
 * @n: network prefix to check
195
 * @asn: AS number of network prefix
196
 *
197
 * Implements RFC 6483 route validation for the given network prefix. The
198
 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
199
 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
200
 * a candidate ROA with matching ASN and maxlen field greater than or equal to
201
 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
202
 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
203
 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
204
 * must have type NET_IP4 or NET_IP6, respectively.
205
 */
206
int
207
net_roa_check(rtable *tab, const net_addr *n, u32 asn)
208
{
209
  if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
210
    return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
211
  else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
212
    return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
213
  else
214
    return ROA_UNKNOWN;        /* Should not happen */
215
}
216

    
217
/**
218
 * rte_find - find a route
219
 * @net: network node
220
 * @src: route source
221
 *
222
 * The rte_find() function returns a route for destination @net
223
 * which is from route source @src.
224
 */
225
rte *
226
rte_find(net *net, struct rte_src *src)
227
{
228
  rte *e = net->routes;
229

    
230
  while (e && e->attrs->src != src)
231
    e = e->next;
232
  return e;
233
}
234

    
235
/**
236
 * rte_get_temp - get a temporary &rte
237
 * @a: attributes to assign to the new route (a &rta; in case it's
238
 * un-cached, rte_update() will create a cached copy automatically)
239
 *
240
 * Create a temporary &rte and bind it with the attributes @a.
241
 * Also set route preference to the default preference set for
242
 * the protocol.
243
 */
244
rte *
245
rte_get_temp(rta *a)
246
{
247
  rte *e = sl_alloc(rte_slab);
248

    
249
  e->attrs = a;
250
  e->flags = 0;
251
  e->pref = 0;
252
  return e;
253
}
254

    
255
rte *
256
rte_do_cow(rte *r)
257
{
258
  rte *e = sl_alloc(rte_slab);
259

    
260
  memcpy(e, r, sizeof(rte));
261
  e->attrs = rta_clone(r->attrs);
262
  e->flags = 0;
263
  return e;
264
}
265

    
266
/**
267
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
268
 * @r: a route entry to be copied
269
 * @lp: a linpool from which to allocate &rta
270
 *
271
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
272
 * modification. There are three possibilities: First, both &rte and &rta are
273
 * private copies, in that case they are returned unchanged.  Second, &rte is
274
 * private copy, but &rta is cached, in that case &rta is duplicated using
275
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
276
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
277
 *
278
 * Note that in the second case, cached &rta loses one reference, while private
279
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
280
 * nexthops, ...) with it. To work properly, original shared &rta should have
281
 * another reference during the life of created private copy.
282
 *
283
 * Result: a pointer to the new writable &rte with writable &rta.
284
 */
285
rte *
286
rte_cow_rta(rte *r, linpool *lp)
287
{
288
  if (!rta_is_cached(r->attrs))
289
    return r;
290

    
291
  rte *e = rte_cow(r);
292
  rta *a = rta_do_cow(r->attrs, lp);
293
  rta_free(e->attrs);
294
  e->attrs = a;
295
  return e;
296
}
297

    
298
static int                                /* Actually better or at least as good as */
299
rte_better(rte *new, rte *old)
300
{
301
  int (*better)(rte *, rte *);
302

    
303
  if (!rte_is_valid(old))
304
    return 1;
305
  if (!rte_is_valid(new))
306
    return 0;
307

    
308
  if (new->pref > old->pref)
309
    return 1;
310
  if (new->pref < old->pref)
311
    return 0;
312
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
313
    {
314
      /*
315
       *  If the user has configured protocol preferences, so that two different protocols
316
       *  have the same preference, try to break the tie by comparing addresses. Not too
317
       *  useful, but keeps the ordering of routes unambiguous.
318
       */
319
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
320
    }
321
  if (better = new->attrs->src->proto->rte_better)
322
    return better(new, old);
323
  return 0;
324
}
325

    
326
static int
327
rte_mergable(rte *pri, rte *sec)
328
{
329
  int (*mergable)(rte *, rte *);
330

    
331
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
332
    return 0;
333

    
334
  if (pri->pref != sec->pref)
335
    return 0;
336

    
337
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
338
    return 0;
339

    
340
  if (mergable = pri->attrs->src->proto->rte_mergable)
341
    return mergable(pri, sec);
342

    
343
  return 0;
344
}
345

    
346
static void
347
rte_trace(struct proto *p, rte *e, int dir, char *msg)
348
{
349
  log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rt_format_via(e));
350
}
351

    
352
static inline void
353
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
354
{
355
  if (p->debug & flag)
356
    rte_trace(p, e, '>', msg);
357
}
358

    
359
static inline void
360
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
361
{
362
  if (p->debug & flag)
363
    rte_trace(p, e, '<', msg);
364
}
365

    
366
static rte *
367
export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
368
{
369
  struct proto *p = c->proto;
370
  struct filter *filter = c->out_filter;
371
  struct proto_stats *stats = &c->stats;
372
  ea_list *tmpb = NULL;
373
  rte *rt;
374
  int v;
375

    
376
  rt = rt0;
377
  *rt_free = NULL;
378

    
379
  if (!tmpa)
380
    tmpa = &tmpb;
381

    
382
  *tmpa = make_tmp_attrs(rt, pool);
383

    
384
  v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0;
385
  if (v < 0)
386
    {
387
      if (silent)
388
        goto reject;
389

    
390
      stats->exp_updates_rejected++;
391
      if (v == RIC_REJECT)
392
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
393
      goto reject;
394
    }
395
  if (v > 0)
396
    {
397
      if (!silent)
398
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
399
      goto accept;
400
    }
401

    
402
  v = filter && ((filter == FILTER_REJECT) ||
403
                 (f_run(filter, &rt, tmpa, pool, FF_FORCE_TMPATTR) > F_ACCEPT));
404
  if (v)
405
    {
406
      if (silent)
407
        goto reject;
408

    
409
      stats->exp_updates_filtered++;
410
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
411
      goto reject;
412
    }
413

    
414
 accept:
415
  if (rt != rt0)
416
    *rt_free = rt;
417
  return rt;
418

    
419
 reject:
420
  /* Discard temporary rte */
421
  if (rt != rt0)
422
    rte_free(rt);
423
  return NULL;
424
}
425

    
426
static inline rte *
427
export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
428
{
429
  return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent);
430
}
431

    
432
static void
433
do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
434
{
435
  struct proto *p = c->proto;
436
  struct proto_stats *stats = &c->stats;
437

    
438

    
439
  /*
440
   * First, apply export limit.
441
   *
442
   * Export route limits has several problems. Because exp_routes
443
   * counter is reset before refeed, we don't really know whether
444
   * limit is breached and whether the update is new or not. Therefore
445
   * the number of really exported routes may exceed the limit
446
   * temporarily (routes exported before and new routes in refeed).
447
   *
448
   * Minor advantage is that if the limit is decreased and refeed is
449
   * requested, the number of exported routes really decrease.
450
   *
451
   * Second problem is that with export limits, we don't know whether
452
   * old was really exported (it might be blocked by limit). When a
453
   * withdraw is exported, we announce it even when the previous
454
   * update was blocked. This is not a big issue, but the same problem
455
   * is in updating exp_routes counter. Therefore, to be consistent in
456
   * increases and decreases of exp_routes, we count exported routes
457
   * regardless of blocking by limits.
458
   *
459
   * Similar problem is in handling updates - when a new route is
460
   * received and blocking is active, the route would be blocked, but
461
   * when an update for the route will be received later, the update
462
   * would be propagated (as old != NULL). Therefore, we have to block
463
   * also non-new updates (contrary to import blocking).
464
   */
465

    
466
  struct channel_limit *l = &c->out_limit;
467
  if (l->action && new)
468
    {
469
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
470
        channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
471

    
472
      if (l->state == PLS_BLOCKED)
473
        {
474
          stats->exp_routes++;        /* see note above */
475
          stats->exp_updates_rejected++;
476
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
477
          new = NULL;
478

    
479
          if (!old)
480
            return;
481
        }
482
    }
483

    
484

    
485
  if (new)
486
    stats->exp_updates_accepted++;
487
  else
488
    stats->exp_withdraws_accepted++;
489

    
490
  /* Hack: We do not decrease exp_routes during refeed, we instead
491
     reset exp_routes at the start of refeed. */
492
  if (new)
493
    stats->exp_routes++;
494
  if (old && !refeed)
495
    stats->exp_routes--;
496

    
497
  if (p->debug & D_ROUTES)
498
    {
499
      if (new && old)
500
        rte_trace_out(D_ROUTES, p, new, "replaced");
501
      else if (new)
502
        rte_trace_out(D_ROUTES, p, new, "added");
503
      else if (old)
504
        rte_trace_out(D_ROUTES, p, old, "removed");
505
    }
506
  if (!new)
507
    p->rt_notify(p, c, net, NULL, old, NULL);
508
  else if (tmpa)
509
    {
510
      ea_list *t = tmpa;
511
      while (t->next)
512
        t = t->next;
513
      t->next = new->attrs->eattrs;
514
      p->rt_notify(p, c, net, new, old, tmpa);
515
      t->next = NULL;
516
    }
517
  else
518
    p->rt_notify(p, c, net, new, old, new->attrs->eattrs);
519
}
520

    
521
static void
522
rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
523
{
524
  struct proto *p = c->proto;
525

    
526
  rte *new = new0;
527
  rte *old = old0;
528
  rte *new_free = NULL;
529
  rte *old_free = NULL;
530
  ea_list *tmpa = NULL;
531

    
532
  if (new)
533
    c->stats.exp_updates_received++;
534
  else
535
    c->stats.exp_withdraws_received++;
536

    
537
  /*
538
   * This is a tricky part - we don't know whether route 'old' was
539
   * exported to protocol 'p' or was filtered by the export filter.
540
   * We try to run the export filter to know this to have a correct
541
   * value in 'old' argument of rte_update (and proper filter value)
542
   *
543
   * FIXME - this is broken because 'configure soft' may change
544
   * filters but keep routes. Refeed is expected to be called after
545
   * change of the filters and with old == new, therefore we do not
546
   * even try to run the filter on an old route, This may lead to
547
   * 'spurious withdraws' but ensure that there are no 'missing
548
   * withdraws'.
549
   *
550
   * This is not completely safe as there is a window between
551
   * reconfiguration and the end of refeed - if a newly filtered
552
   * route disappears during this period, proper withdraw is not
553
   * sent (because old would be also filtered) and the route is
554
   * not refeeded (because it disappeared before that).
555
   */
556

    
557
  if (new)
558
    new = export_filter(c, new, &new_free, &tmpa, 0);
559

    
560
  if (old && !refeed)
561
    old = export_filter(c, old, &old_free, NULL, 1);
562

    
563
  if (!new && !old)
564
  {
565
    /*
566
     * As mentioned above, 'old' value may be incorrect in some race conditions.
567
     * We generally ignore it with the exception of withdraw to pipe protocol.
568
     * In that case we rather propagate unfiltered withdraws regardless of
569
     * export filters to ensure that when a protocol is flushed, its routes are
570
     * removed from all tables. Possible spurious unfiltered withdraws are not
571
     * problem here as they are ignored if there is no corresponding route at
572
     * the other end of the pipe. We directly call rt_notify() hook instead of
573
     * do_rt_notify() to avoid logging and stat counters.
574
     */
575

    
576
#ifdef CONFIG_PIPE
577
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
578
      p->rt_notify(p, c, net, NULL, old0, NULL);
579
#endif
580

    
581
    return;
582
  }
583

    
584
  do_rt_notify(c, net, new, old, tmpa, refeed);
585

    
586
  /* Discard temporary rte's */
587
  if (new_free)
588
    rte_free(new_free);
589
  if (old_free)
590
    rte_free(old_free);
591
}
592

    
593
static void
594
rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
595
{
596
  // struct proto *p = c->proto;
597

    
598
  rte *r;
599
  rte *new_best = NULL;
600
  rte *old_best = NULL;
601
  rte *new_free = NULL;
602
  rte *old_free = NULL;
603
  ea_list *tmpa = NULL;
604

    
605
  /* Used to track whether we met old_changed position. If before_old is NULL
606
     old_changed was the first and we met it implicitly before current best route. */
607
  int old_meet = old_changed && !before_old;
608

    
609
  /* Note that before_old is either NULL or valid (not rejected) route.
610
     If old_changed is valid, before_old have to be too. If old changed route
611
     was not valid, caller must use NULL for both old_changed and before_old. */
612

    
613
  if (new_changed)
614
    c->stats.exp_updates_received++;
615
  else
616
    c->stats.exp_withdraws_received++;
617

    
618
  /* First, find the new_best route - first accepted by filters */
619
  for (r=net->routes; rte_is_valid(r); r=r->next)
620
    {
621
      if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
622
        break;
623

    
624
      /* Note if we walked around the position of old_changed route */
625
      if (r == before_old)
626
        old_meet = 1;
627
    }
628

    
629
  /* 
630
   * Second, handle the feed case. That means we do not care for
631
   * old_best. It is NULL for feed, and the new_best for refeed. 
632
   * For refeed, there is a hack similar to one in rt_notify_basic()
633
   * to ensure withdraws in case of changed filters
634
   */
635
  if (feed)
636
    {
637
      if (feed == 2)        /* refeed */
638
        old_best = new_best ? new_best :
639
          (rte_is_valid(net->routes) ? net->routes : NULL);
640
      else
641
        old_best = NULL;
642

    
643
      if (!new_best && !old_best)
644
        return;
645

    
646
      goto found;
647
    }
648

    
649
  /*
650
   * Now, we find the old_best route. Generally, it is the same as the
651
   * new_best, unless new_best is the same as new_changed or
652
   * old_changed is accepted before new_best.
653
   *
654
   * There are four cases:
655
   *
656
   * - We would find and accept old_changed before new_best, therefore
657
   *   old_changed is old_best. In remaining cases we suppose this
658
   *   is not true.
659
   *
660
   * - We found no new_best, therefore there is also no old_best and
661
   *   we ignore this withdraw.
662
   *
663
   * - We found new_best different than new_changed, therefore
664
   *   old_best is the same as new_best and we ignore this update.
665
   *
666
   * - We found new_best the same as new_changed, therefore it cannot
667
   *   be old_best and we have to continue search for old_best.
668
   */
669

    
670
  /* First case */
671
  if (old_meet)
672
    if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
673
      goto found;
674

    
675
  /* Second case */
676
  if (!new_best)
677
    return;
678

    
679
  /* Third case, we use r instead of new_best, because export_filter() could change it */
680
  if (r != new_changed)
681
    {
682
      if (new_free)
683
        rte_free(new_free);
684
      return;
685
    }
686

    
687
  /* Fourth case */
688
  for (r=r->next; rte_is_valid(r); r=r->next)
689
    {
690
      if (old_best = export_filter(c, r, &old_free, NULL, 1))
691
        goto found;
692

    
693
      if (r == before_old)
694
        if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
695
          goto found;
696
    }
697

    
698
  /* Implicitly, old_best is NULL and new_best is non-NULL */
699

    
700
 found:
701
  do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
702

    
703
  /* Discard temporary rte's */
704
  if (new_free)
705
    rte_free(new_free);
706
  if (old_free)
707
    rte_free(old_free);
708
}
709

    
710

    
711
static struct nexthop *
712
nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
713
{
714
  return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
715
}
716

    
717
rte *
718
rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
719
{
720
  // struct proto *p = c->proto;
721
  struct nexthop *nhs = NULL;
722
  rte *best0, *best, *rt0, *rt, *tmp;
723

    
724
  best0 = net->routes;
725
  *rt_free = NULL;
726

    
727
  if (!rte_is_valid(best0))
728
    return NULL;
729

    
730
  best = export_filter_(c, best0, rt_free, tmpa, pool, silent);
731

    
732
  if (!best || !rte_is_reachable(best))
733
    return best;
734

    
735
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
736
  {
737
    if (!rte_mergable(best0, rt0))
738
      continue;
739

    
740
    rt = export_filter_(c, rt0, &tmp, NULL, pool, 1);
741

    
742
    if (!rt)
743
      continue;
744

    
745
    if (rte_is_reachable(rt))
746
      nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
747

    
748
    if (tmp)
749
      rte_free(tmp);
750
  }
751

    
752
  if (nhs)
753
  {
754
    nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
755

    
756
    if (nhs->next)
757
    {
758
      best = rte_cow_rta(best, pool);
759
      nexthop_link(best->attrs, nhs);
760
    }
761
  }
762

    
763
  if (best != best0)
764
    *rt_free = best;
765

    
766
  return best;
767
}
768

    
769

    
770
static void
771
rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
772
                 rte *new_best, rte*old_best, int refeed)
773
{
774
  // struct proto *p = c->proto;
775

    
776
  rte *new_best_free = NULL;
777
  rte *old_best_free = NULL;
778
  rte *new_changed_free = NULL;
779
  rte *old_changed_free = NULL;
780
  ea_list *tmpa = NULL;
781

    
782
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
783

    
784
  /* This check should be done by the caller */
785
  if (!new_best && !old_best)
786
    return;
787

    
788
  /* Check whether the change is relevant to the merged route */
789
  if ((new_best == old_best) && !refeed)
790
  {
791
    new_changed = rte_mergable(new_best, new_changed) ?
792
      export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
793

    
794
    old_changed = rte_mergable(old_best, old_changed) ?
795
      export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
796

    
797
    if (!new_changed && !old_changed)
798
      return;
799
  }
800

    
801
  if (new_best)
802
    c->stats.exp_updates_received++;
803
  else
804
    c->stats.exp_withdraws_received++;
805

    
806
  /* Prepare new merged route */
807
  if (new_best)
808
    new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0);
809

    
810
  /* Prepare old merged route (without proper merged next hops) */
811
  /* There are some issues with running filter on old route - see rt_notify_basic() */
812
  if (old_best && !refeed)
813
    old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
814

    
815
  if (new_best || old_best)
816
    do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
817

    
818
  /* Discard temporary rte's */
819
  if (new_best_free)
820
    rte_free(new_best_free);
821
  if (old_best_free)
822
    rte_free(old_best_free);
823
  if (new_changed_free)
824
    rte_free(new_changed_free);
825
  if (old_changed_free)
826
    rte_free(old_changed_free);
827
}
828

    
829

    
830
/**
831
 * rte_announce - announce a routing table change
832
 * @tab: table the route has been added to
833
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
834
 * @net: network in question
835
 * @new: the new route to be announced
836
 * @old: the previous route for the same network
837
 * @new_best: the new best route for the same network
838
 * @old_best: the previous best route for the same network
839
 * @before_old: The previous route before @old for the same network.
840
 *                 If @before_old is NULL @old was the first.
841
 *
842
 * This function gets a routing table update and announces it
843
 * to all protocols that acccepts given type of route announcement
844
 * and are connected to the same table by their announcement hooks.
845
 *
846
 * Route announcement of type %RA_OPTIMAL si generated when optimal
847
 * route (in routing table @tab) changes. In that case @old stores the
848
 * old optimal route.
849
 *
850
 * Route announcement of type %RA_ANY si generated when any route (in
851
 * routing table @tab) changes In that case @old stores the old route
852
 * from the same protocol.
853
 *
854
 * For each appropriate protocol, we first call its import_control()
855
 * hook which performs basic checks on the route (each protocol has a
856
 * right to veto or force accept of the route before any filter is
857
 * asked) and adds default values of attributes specific to the new
858
 * protocol (metrics, tags etc.).  Then it consults the protocol's
859
 * export filter and if it accepts the route, the rt_notify() hook of
860
 * the protocol gets called.
861
 */
862
static void
863
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
864
             rte *new_best, rte *old_best, rte *before_old)
865
{
866
  if (!rte_is_valid(new))
867
    new = NULL;
868

    
869
  if (!rte_is_valid(old))
870
    old = before_old = NULL;
871

    
872
  if (!rte_is_valid(new_best))
873
    new_best = NULL;
874

    
875
  if (!rte_is_valid(old_best))
876
    old_best = NULL;
877

    
878
  if (!old && !new)
879
    return;
880

    
881
  if ((type == RA_OPTIMAL) && tab->hostcache)
882
    rt_notify_hostcache(tab, net);
883

    
884
  struct channel *c; node *n;
885
  WALK_LIST2(c, n, tab->channels, table_node)
886
    {
887
      if (c->export_state == ES_DOWN)
888
        continue;
889

    
890
      if (c->ra_mode == type)
891
        if (type == RA_ACCEPTED)
892
          rt_notify_accepted(c, net, new, old, before_old, 0);
893
        else if (type == RA_MERGED)
894
          rt_notify_merged(c, net, new, old, new_best, old_best, 0);
895
        else
896
          rt_notify_basic(c, net, new, old, 0);
897
    }
898
}
899

    
900
static inline int
901
rte_validate(rte *e)
902
{
903
  int c;
904
  net *n = e->net;
905

    
906
  // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
907
  if (!net_validate(n->n.addr))
908
  {
909
    log(L_WARN "Ignoring bogus prefix %N received via %s",
910
        n->n.addr, e->sender->proto->name);
911
    return 0;
912
  }
913

    
914
  c = net_classify(n->n.addr);
915
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
916
  {
917
    log(L_WARN "Ignoring bogus route %N received via %s",
918
        n->n.addr, e->sender->proto->name);
919
    return 0;
920
  }
921

    
922
  if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
923
    {
924
      log(L_WARN "Ignoring unsorted multipath route %N received via %s",
925
          n->n.addr, e->sender->proto->name);
926
      return 0;
927
    }
928

    
929
  return 1;
930
}
931

    
932
/**
933
 * rte_free - delete a &rte
934
 * @e: &rte to be deleted
935
 *
936
 * rte_free() deletes the given &rte from the routing table it's linked to.
937
 */
938
void
939
rte_free(rte *e)
940
{
941
  if (rta_is_cached(e->attrs))
942
    rta_free(e->attrs);
943
  sl_free(rte_slab, e);
944
}
945

    
946
static inline void
947
rte_free_quick(rte *e)
948
{
949
  rta_free(e->attrs);
950
  sl_free(rte_slab, e);
951
}
952

    
953
static int
954
rte_same(rte *x, rte *y)
955
{
956
  return
957
    x->attrs == y->attrs &&
958
    x->flags == y->flags &&
959
    x->pflags == y->pflags &&
960
    x->pref == y->pref &&
961
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
962
}
963

    
964
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
965

    
966
static void
967
rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
968
{
969
  struct proto *p = c->proto;
970
  struct rtable *table = c->table;
971
  struct proto_stats *stats = &c->stats;
972
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
973
  rte *before_old = NULL;
974
  rte *old_best = net->routes;
975
  rte *old = NULL;
976
  rte **k;
977

    
978
  k = &net->routes;                        /* Find and remove original route from the same protocol */
979
  while (old = *k)
980
    {
981
      if (old->attrs->src == src)
982
        {
983
          /* If there is the same route in the routing table but from
984
           * a different sender, then there are two paths from the
985
           * source protocol to this routing table through transparent
986
           * pipes, which is not allowed.
987
           *
988
           * We log that and ignore the route. If it is withdraw, we
989
           * ignore it completely (there might be 'spurious withdraws',
990
           * see FIXME in do_rte_announce())
991
           */
992
          if (old->sender->proto != p)
993
            {
994
              if (new)
995
                {
996
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
997
                      net->n.addr, table->name);
998
                  rte_free_quick(new);
999
                }
1000
              return;
1001
            }
1002

    
1003
          if (new && rte_same(old, new))
1004
            {
1005
              /* No changes, ignore the new route */
1006

    
1007
              if (!rte_is_filtered(new))
1008
                {
1009
                  stats->imp_updates_ignored++;
1010
                  rte_trace_in(D_ROUTES, p, new, "ignored");
1011
                }
1012

    
1013
              rte_free_quick(new);
1014
              return;
1015
            }
1016
          *k = old->next;
1017
          break;
1018
        }
1019
      k = &old->next;
1020
      before_old = old;
1021
    }
1022

    
1023
  if (!old)
1024
    before_old = NULL;
1025

    
1026
  if (!old && !new)
1027
    {
1028
      stats->imp_withdraws_ignored++;
1029
      return;
1030
    }
1031

    
1032
  int new_ok = rte_is_ok(new);
1033
  int old_ok = rte_is_ok(old);
1034

    
1035
  struct channel_limit *l = &c->rx_limit;
1036
  if (l->action && !old && new)
1037
    {
1038
      u32 all_routes = stats->imp_routes + stats->filt_routes;
1039

    
1040
      if (all_routes >= l->limit)
1041
        channel_notify_limit(c, l, PLD_RX, all_routes);
1042

    
1043
      if (l->state == PLS_BLOCKED)
1044
        {
1045
          /* In receive limit the situation is simple, old is NULL so
1046
             we just free new and exit like nothing happened */
1047

    
1048
          stats->imp_updates_ignored++;
1049
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1050
          rte_free_quick(new);
1051
          return;
1052
        }
1053
    }
1054

    
1055
  l = &c->in_limit;
1056
  if (l->action && !old_ok && new_ok)
1057
    {
1058
      if (stats->imp_routes >= l->limit)
1059
        channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1060

    
1061
      if (l->state == PLS_BLOCKED)
1062
        {
1063
          /* In import limit the situation is more complicated. We
1064
             shouldn't just drop the route, we should handle it like
1065
             it was filtered. We also have to continue the route
1066
             processing if old or new is non-NULL, but we should exit
1067
             if both are NULL as this case is probably assumed to be
1068
             already handled. */
1069

    
1070
          stats->imp_updates_ignored++;
1071
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1072

    
1073
          if (c->in_keep_filtered)
1074
            new->flags |= REF_FILTERED;
1075
          else
1076
            { rte_free_quick(new); new = NULL; }
1077

    
1078
          /* Note that old && !new could be possible when
1079
             c->in_keep_filtered changed in the recent past. */
1080

    
1081
          if (!old && !new)
1082
            return;
1083

    
1084
          new_ok = 0;
1085
          goto skip_stats1;
1086
        }
1087
    }
1088

    
1089
  if (new_ok)
1090
    stats->imp_updates_accepted++;
1091
  else if (old_ok)
1092
    stats->imp_withdraws_accepted++;
1093
  else
1094
    stats->imp_withdraws_ignored++;
1095

    
1096
 skip_stats1:
1097

    
1098
  if (new)
1099
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1100
  if (old)
1101
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1102

    
1103
  if (table->config->sorted)
1104
    {
1105
      /* If routes are sorted, just insert new route to appropriate position */
1106
      if (new)
1107
        {
1108
          if (before_old && !rte_better(new, before_old))
1109
            k = &before_old->next;
1110
          else
1111
            k = &net->routes;
1112

    
1113
          for (; *k; k=&(*k)->next)
1114
            if (rte_better(new, *k))
1115
              break;
1116

    
1117
          new->next = *k;
1118
          *k = new;
1119
        }
1120
    }
1121
  else
1122
    {
1123
      /* If routes are not sorted, find the best route and move it on
1124
         the first position. There are several optimized cases. */
1125

    
1126
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1127
        goto do_recalculate;
1128

    
1129
      if (new && rte_better(new, old_best))
1130
        {
1131
          /* The first case - the new route is cleary optimal,
1132
             we link it at the first position */
1133

    
1134
          new->next = net->routes;
1135
          net->routes = new;
1136
        }
1137
      else if (old == old_best)
1138
        {
1139
          /* The second case - the old best route disappeared, we add the
1140
             new route (if we have any) to the list (we don't care about
1141
             position) and then we elect the new optimal route and relink
1142
             that route at the first position and announce it. New optimal
1143
             route might be NULL if there is no more routes */
1144

    
1145
        do_recalculate:
1146
          /* Add the new route to the list */
1147
          if (new)
1148
            {
1149
              new->next = net->routes;
1150
              net->routes = new;
1151
            }
1152

    
1153
          /* Find a new optimal route (if there is any) */
1154
          if (net->routes)
1155
            {
1156
              rte **bp = &net->routes;
1157
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1158
                if (rte_better(*k, *bp))
1159
                  bp = k;
1160

    
1161
              /* And relink it */
1162
              rte *best = *bp;
1163
              *bp = best->next;
1164
              best->next = net->routes;
1165
              net->routes = best;
1166
            }
1167
        }
1168
      else if (new)
1169
        {
1170
          /* The third case - the new route is not better than the old
1171
             best route (therefore old_best != NULL) and the old best
1172
             route was not removed (therefore old_best == net->routes).
1173
             We just link the new route after the old best route. */
1174

    
1175
          ASSERT(net->routes != NULL);
1176
          new->next = net->routes->next;
1177
          net->routes->next = new;
1178
        }
1179
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1180
    }
1181

    
1182
  if (new)
1183
    new->lastmod = now;
1184

    
1185
  /* Log the route change */
1186
  if (p->debug & D_ROUTES)
1187
    {
1188
      if (new_ok)
1189
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1190
      else if (old_ok)
1191
        {
1192
          if (old != old_best)
1193
            rte_trace(p, old, '>', "removed");
1194
          else if (rte_is_ok(net->routes))
1195
            rte_trace(p, old, '>', "removed [replaced]");
1196
          else
1197
            rte_trace(p, old, '>', "removed [sole]");
1198
        }
1199
    }
1200

    
1201
  /* Propagate the route change */
1202
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1203
  if (net->routes != old_best)
1204
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1205
  if (table->config->sorted)
1206
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1207
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1208

    
1209
  if (!net->routes &&
1210
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1211
      (table->gc_time + table->config->gc_min_time <= now))
1212
    rt_schedule_prune(table);
1213

    
1214
  if (old_ok && p->rte_remove)
1215
    p->rte_remove(net, old);
1216
  if (new_ok && p->rte_insert)
1217
    p->rte_insert(net, new);
1218

    
1219
  if (old)
1220
    rte_free_quick(old);
1221
}
1222

    
1223
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1224

    
1225
static inline void
1226
rte_update_lock(void)
1227
{
1228
  rte_update_nest_cnt++;
1229
}
1230

    
1231
static inline void
1232
rte_update_unlock(void)
1233
{
1234
  if (!--rte_update_nest_cnt)
1235
    lp_flush(rte_update_pool);
1236
}
1237

    
1238
static inline void
1239
rte_hide_dummy_routes(net *net, rte **dummy)
1240
{
1241
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1242
  {
1243
    *dummy = net->routes;
1244
    net->routes = (*dummy)->next;
1245
  }
1246
}
1247

    
1248
static inline void
1249
rte_unhide_dummy_routes(net *net, rte **dummy)
1250
{
1251
  if (*dummy)
1252
  {
1253
    (*dummy)->next = net->routes;
1254
    net->routes = *dummy;
1255
  }
1256
}
1257

    
1258
/**
1259
 * rte_update - enter a new update to a routing table
1260
 * @table: table to be updated
1261
 * @c: channel doing the update
1262
 * @net: network node
1263
 * @p: protocol submitting the update
1264
 * @src: protocol originating the update
1265
 * @new: a &rte representing the new route or %NULL for route removal.
1266
 *
1267
 * This function is called by the routing protocols whenever they discover
1268
 * a new route or wish to update/remove an existing route. The right announcement
1269
 * sequence is to build route attributes first (either un-cached with @aflags set
1270
 * to zero or a cached one using rta_lookup(); in this case please note that
1271
 * you need to increase the use count of the attributes yourself by calling
1272
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1273
 * the appropriate data and finally submit the new &rte by calling rte_update().
1274
 *
1275
 * @src specifies the protocol that originally created the route and the meaning
1276
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1277
 * same value as @new->attrs->proto. @p specifies the protocol that called
1278
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1279
 * stores @p in @new->sender;
1280
 *
1281
 * When rte_update() gets any route, it automatically validates it (checks,
1282
 * whether the network and next hop address are valid IP addresses and also
1283
 * whether a normal routing protocol doesn't try to smuggle a host or link
1284
 * scope route to the table), converts all protocol dependent attributes stored
1285
 * in the &rte to temporary extended attributes, consults import filters of the
1286
 * protocol to see if the route should be accepted and/or its attributes modified,
1287
 * stores the temporary attributes back to the &rte.
1288
 *
1289
 * Now, having a "public" version of the route, we
1290
 * automatically find any old route defined by the protocol @src
1291
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1292
 * recalculate the optimal route for this destination and finally broadcast
1293
 * the change (if any) to all routing protocols by calling rte_announce().
1294
 *
1295
 * All memory used for attribute lists and other temporary allocations is taken
1296
 * from a special linear pool @rte_update_pool and freed when rte_update()
1297
 * finishes.
1298
 */
1299

    
1300
void
1301
rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1302
{
1303
  struct proto *p = c->proto;
1304
  struct proto_stats *stats = &c->stats;
1305
  struct filter *filter = c->in_filter;
1306
  ea_list *tmpa = NULL;
1307
  rte *dummy = NULL;
1308
  net *nn;
1309

    
1310
  ASSERT(c->channel_state == CS_UP);
1311

    
1312
  rte_update_lock();
1313
  if (new)
1314
    {
1315
      nn = net_get(c->table, n);
1316

    
1317
      new->net = nn;
1318
      new->sender = c;
1319

    
1320
      if (!new->pref)
1321
        new->pref = c->preference;
1322

    
1323
      stats->imp_updates_received++;
1324
      if (!rte_validate(new))
1325
        {
1326
          rte_trace_in(D_FILTERS, p, new, "invalid");
1327
          stats->imp_updates_invalid++;
1328
          goto drop;
1329
        }
1330

    
1331
      if (filter == FILTER_REJECT)
1332
        {
1333
          stats->imp_updates_filtered++;
1334
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1335

    
1336
          if (! c->in_keep_filtered)
1337
            goto drop;
1338

    
1339
          /* new is a private copy, i could modify it */
1340
          new->flags |= REF_FILTERED;
1341
        }
1342
      else
1343
        {
1344
          tmpa = make_tmp_attrs(new, rte_update_pool);
1345
          if (filter && (filter != FILTER_REJECT))
1346
            {
1347
              ea_list *old_tmpa = tmpa;
1348
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1349
              if (fr > F_ACCEPT)
1350
                {
1351
                  stats->imp_updates_filtered++;
1352
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1353

    
1354
                  if (! c->in_keep_filtered)
1355
                    goto drop;
1356

    
1357
                  new->flags |= REF_FILTERED;
1358
                }
1359
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1360
                src->proto->store_tmp_attrs(new, tmpa);
1361
            }
1362
        }
1363
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1364
        new->attrs = rta_lookup(new->attrs);
1365
      new->flags |= REF_COW;
1366
    }
1367
  else
1368
    {
1369
      stats->imp_withdraws_received++;
1370

    
1371
      if (!(nn = net_find(c->table, n)) || !src)
1372
        {
1373
          stats->imp_withdraws_ignored++;
1374
          rte_update_unlock();
1375
          return;
1376
        }
1377
    }
1378

    
1379
 recalc:
1380
  rte_hide_dummy_routes(nn, &dummy);
1381
  rte_recalculate(c, nn, new, src);
1382
  rte_unhide_dummy_routes(nn, &dummy);
1383
  rte_update_unlock();
1384
  return;
1385

    
1386
 drop:
1387
  rte_free(new);
1388
  new = NULL;
1389
  goto recalc;
1390
}
1391

    
1392
/* Independent call to rte_announce(), used from next hop
1393
   recalculation, outside of rte_update(). new must be non-NULL */
1394
static inline void 
1395
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1396
               rte *new_best, rte *old_best)
1397
{
1398
  rte_update_lock();
1399
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1400
  rte_update_unlock();
1401
}
1402

    
1403
static inline void
1404
rte_discard(rte *old)        /* Non-filtered route deletion, used during garbage collection */
1405
{
1406
  rte_update_lock();
1407
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1408
  rte_update_unlock();
1409
}
1410

    
1411
/* Check rtable for best route to given net whether it would be exported do p */
1412
int
1413
rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1414
{
1415
  net *n = net_find(t, a);
1416
  rte *rt = n ? n->routes : NULL;
1417

    
1418
  if (!rte_is_valid(rt))
1419
    return 0;
1420

    
1421
  rte_update_lock();
1422

    
1423
  /* Rest is stripped down export_filter() */
1424
  ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1425
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1426
  if (v == RIC_PROCESS)
1427
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1428

    
1429
   /* Discard temporary rte */
1430
  if (rt != n->routes)
1431
    rte_free(rt);
1432

    
1433
  rte_update_unlock();
1434

    
1435
  return v > 0;
1436
}
1437

    
1438

    
1439
/**
1440
 * rt_refresh_begin - start a refresh cycle
1441
 * @t: related routing table
1442
 * @c related channel
1443
 *
1444
 * This function starts a refresh cycle for given routing table and announce
1445
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1446
 * routes to the routing table (by rte_update()). After that, all protocol
1447
 * routes (more precisely routes with @c as @sender) not sent during the
1448
 * refresh cycle but still in the table from the past are pruned. This is
1449
 * implemented by marking all related routes as stale by REF_STALE flag in
1450
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1451
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1452
 */
1453
void
1454
rt_refresh_begin(rtable *t, struct channel *c)
1455
{
1456
  FIB_WALK(&t->fib, net, n)
1457
    {
1458
      rte *e;
1459
      for (e = n->routes; e; e = e->next)
1460
        if (e->sender == c)
1461
          e->flags |= REF_STALE;
1462
    }
1463
  FIB_WALK_END;
1464
}
1465

    
1466
/**
1467
 * rt_refresh_end - end a refresh cycle
1468
 * @t: related routing table
1469
 * @c: related channel
1470
 *
1471
 * This function ends a refresh cycle for given routing table and announce
1472
 * hook. See rt_refresh_begin() for description of refresh cycles.
1473
 */
1474
void
1475
rt_refresh_end(rtable *t, struct channel *c)
1476
{
1477
  int prune = 0;
1478

    
1479
  FIB_WALK(&t->fib, net, n)
1480
    {
1481
      rte *e;
1482
      for (e = n->routes; e; e = e->next)
1483
        if ((e->sender == c) && (e->flags & REF_STALE))
1484
          {
1485
            e->flags |= REF_DISCARD;
1486
            prune = 1;
1487
          }
1488
    }
1489
  FIB_WALK_END;
1490

    
1491
  if (prune)
1492
    rt_schedule_prune(t);
1493
}
1494

    
1495

    
1496
/**
1497
 * rte_dump - dump a route
1498
 * @e: &rte to be dumped
1499
 *
1500
 * This functions dumps contents of a &rte to debug output.
1501
 */
1502
void
1503
rte_dump(rte *e)
1504
{
1505
  net *n = e->net;
1506
  debug("%-1N ", n->n.addr);
1507
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1508
  rta_dump(e->attrs);
1509
  if (e->attrs->src->proto->proto->dump_attrs)
1510
    e->attrs->src->proto->proto->dump_attrs(e);
1511
  debug("\n");
1512
}
1513

    
1514
/**
1515
 * rt_dump - dump a routing table
1516
 * @t: routing table to be dumped
1517
 *
1518
 * This function dumps contents of a given routing table to debug output.
1519
 */
1520
void
1521
rt_dump(rtable *t)
1522
{
1523
  debug("Dump of routing table <%s>\n", t->name);
1524
#ifdef DEBUGGING
1525
  fib_check(&t->fib);
1526
#endif
1527
  FIB_WALK(&t->fib, net, n)
1528
    {
1529
      rte *e;
1530
      for(e=n->routes; e; e=e->next)
1531
        rte_dump(e);
1532
    }
1533
  FIB_WALK_END;
1534
  debug("\n");
1535
}
1536

    
1537
/**
1538
 * rt_dump_all - dump all routing tables
1539
 *
1540
 * This function dumps contents of all routing tables to debug output.
1541
 */
1542
void
1543
rt_dump_all(void)
1544
{
1545
  rtable *t;
1546

    
1547
  WALK_LIST(t, routing_tables)
1548
    rt_dump(t);
1549
}
1550

    
1551
static inline void
1552
rt_schedule_hcu(rtable *tab)
1553
{
1554
  if (tab->hcu_scheduled)
1555
    return;
1556

    
1557
  tab->hcu_scheduled = 1;
1558
  ev_schedule(tab->rt_event);
1559
}
1560

    
1561
static inline void
1562
rt_schedule_nhu(rtable *tab)
1563
{
1564
  if (tab->nhu_state == 0)
1565
    ev_schedule(tab->rt_event);
1566

    
1567
  /* state change 0->1, 2->3 */
1568
  tab->nhu_state |= 1;
1569
}
1570

    
1571
void
1572
rt_schedule_prune(rtable *tab)
1573
{
1574
  if (tab->prune_state == 0)
1575
    ev_schedule(tab->rt_event);
1576

    
1577
  /* state change 0->1, 2->3 */
1578
  tab->prune_state |= 1;
1579
}
1580

    
1581

    
1582
static void
1583
rt_event(void *ptr)
1584
{
1585
  rtable *tab = ptr;
1586

    
1587
  rt_lock_table(tab);
1588

    
1589
  if (tab->hcu_scheduled)
1590
    rt_update_hostcache(tab);
1591

    
1592
  if (tab->nhu_state)
1593
    rt_next_hop_update(tab);
1594

    
1595
  if (tab->prune_state)
1596
    rt_prune_table(tab);
1597

    
1598
  rt_unlock_table(tab);
1599
}
1600

    
1601
void
1602
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1603
{
1604
  bzero(t, sizeof(*t));
1605
  t->name = name;
1606
  t->config = cf;
1607
  t->addr_type = cf ? cf->addr_type : NET_IP4;
1608
  fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1609
  init_list(&t->channels);
1610

    
1611
  if (cf)
1612
    {
1613
      t->rt_event = ev_new(p);
1614
      t->rt_event->hook = rt_event;
1615
      t->rt_event->data = t;
1616
      t->gc_time = now;
1617
    }
1618
}
1619

    
1620
/**
1621
 * rt_init - initialize routing tables
1622
 *
1623
 * This function is called during BIRD startup. It initializes the
1624
 * routing table module.
1625
 */
1626
void
1627
rt_init(void)
1628
{
1629
  rta_init();
1630
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1631
  rte_update_pool = lp_new(rt_table_pool, 4080);
1632
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1633
  init_list(&routing_tables);
1634
}
1635

    
1636

    
1637
/**
1638
 * rt_prune_table - prune a routing table
1639
 *
1640
 * The prune loop scans routing tables and removes routes belonging to flushing
1641
 * protocols, discarded routes and also stale network entries. It is called from
1642
 * rt_event(). The event is rescheduled if the current iteration do not finish
1643
 * the table. The pruning is directed by the prune state (@prune_state),
1644
 * specifying whether the prune cycle is scheduled or running, and there
1645
 * is also a persistent pruning iterator (@prune_fit).
1646
 *
1647
 * The prune loop is used also for channel flushing. For this purpose, the
1648
 * channels to flush are marked before the iteration and notified after the
1649
 * iteration.
1650
 */
1651
static void
1652
rt_prune_table(rtable *tab)
1653
{
1654
  struct fib_iterator *fit = &tab->prune_fit;
1655
  int limit = 512;
1656

    
1657
  struct channel *c;
1658
  node *n, *x;
1659

    
1660
  DBG("Pruning route table %s\n", tab->name);
1661
#ifdef DEBUGGING
1662
  fib_check(&tab->fib);
1663
#endif
1664

    
1665
  if (tab->prune_state == 0)
1666
    return;
1667

    
1668
  if (tab->prune_state == 1)
1669
  {
1670
    /* Mark channels to flush */
1671
    WALK_LIST2(c, n, tab->channels, table_node)
1672
      if (c->channel_state == CS_FLUSHING)
1673
        c->flush_active = 1;
1674

    
1675
    FIB_ITERATE_INIT(fit, &tab->fib);
1676
    tab->prune_state = 2;
1677
  }
1678

    
1679
again:
1680
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1681
    {
1682
      rte *e;
1683

    
1684
    rescan:
1685
      for (e=n->routes; e; e=e->next)
1686
        if (e->sender->flush_active || (e->flags & REF_DISCARD))
1687
          {
1688
            if (limit <= 0)
1689
              {
1690
                FIB_ITERATE_PUT(fit);
1691
                ev_schedule(tab->rt_event);
1692
                return;
1693
              }
1694

    
1695
            rte_discard(e);
1696
            limit--;
1697

    
1698
            goto rescan;
1699
          }
1700

    
1701
      if (!n->routes)                /* Orphaned FIB entry */
1702
        {
1703
          FIB_ITERATE_PUT(fit);
1704
          fib_delete(&tab->fib, n);
1705
          goto again;
1706
        }
1707
    }
1708
  FIB_ITERATE_END;
1709

    
1710
#ifdef DEBUGGING
1711
  fib_check(&tab->fib);
1712
#endif
1713

    
1714
  tab->gc_counter = 0;
1715
  tab->gc_time = now;
1716

    
1717
  /* state change 2->0, 3->1 */
1718
  tab->prune_state &= 1;
1719

    
1720
  if (tab->prune_state > 0)
1721
    ev_schedule(tab->rt_event);
1722

    
1723
  /* FIXME: This should be handled in a better way */
1724
  rt_prune_sources();
1725

    
1726
  /* Close flushed channels */
1727
  WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1728
    if (c->flush_active)
1729
      {
1730
        c->flush_active = 0;
1731
        channel_set_state(c, CS_DOWN);
1732
      }
1733

    
1734
  return;
1735
}
1736

    
1737
void
1738
rt_preconfig(struct config *c)
1739
{
1740
  init_list(&c->tables);
1741

    
1742
  rt_new_table(cf_get_symbol("master4"), NET_IP4);
1743
  rt_new_table(cf_get_symbol("master6"), NET_IP6);
1744
}
1745

    
1746

    
1747
/*
1748
 * Some functions for handing internal next hop updates
1749
 * triggered by rt_schedule_nhu().
1750
 */
1751

    
1752
static inline int
1753
rta_next_hop_outdated(rta *a)
1754
{
1755
  struct hostentry *he = a->hostentry;
1756

    
1757
  if (!he)
1758
    return 0;
1759

    
1760
  if (!he->src)
1761
    return a->dest != RTD_UNREACHABLE;
1762

    
1763
  return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1764
    !nexthop_same(&(a->nh), he->nh);
1765
}
1766

    
1767
static inline void
1768
rta_apply_hostentry(rta *a, struct hostentry *he)
1769
{
1770
  a->hostentry = he;
1771

    
1772
  a->nh.gw = ipa_nonzero(he->nh->gw) ? he->nh->gw : he->link;
1773
  a->nh.iface = he->nh->iface;
1774
  a->nh.weight = he->nh->weight;
1775
  a->nh.next = he->nh->next;
1776
  
1777
  a->dest = he->dest;
1778
  a->igp_metric = he->igp_metric;
1779
}
1780

    
1781
static inline rte *
1782
rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1783
{
1784
  rta a;
1785
  memcpy(&a, old->attrs, rta_size(old->attrs));
1786
  rta_apply_hostentry(&a, old->attrs->hostentry);
1787
  a.aflags = 0;
1788

    
1789
  rte *e = sl_alloc(rte_slab);
1790
  memcpy(e, old, sizeof(rte));
1791
  e->attrs = rta_lookup(&a);
1792

    
1793
  return e;
1794
}
1795

    
1796
static inline int
1797
rt_next_hop_update_net(rtable *tab, net *n)
1798
{
1799
  rte **k, *e, *new, *old_best, **new_best;
1800
  int count = 0;
1801
  int free_old_best = 0;
1802

    
1803
  old_best = n->routes;
1804
  if (!old_best)
1805
    return 0;
1806

    
1807
  for (k = &n->routes; e = *k; k = &e->next)
1808
    if (rta_next_hop_outdated(e->attrs))
1809
      {
1810
        new = rt_next_hop_update_rte(tab, e);
1811
        *k = new;
1812

    
1813
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1814
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1815

    
1816
        /* Call a pre-comparison hook */
1817
        /* Not really an efficient way to compute this */
1818
        if (e->attrs->src->proto->rte_recalculate)
1819
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1820

    
1821
        if (e != old_best)
1822
          rte_free_quick(e);
1823
        else /* Freeing of the old best rte is postponed */
1824
          free_old_best = 1;
1825

    
1826
        e = new;
1827
        count++;
1828
      }
1829

    
1830
  if (!count)
1831
    return 0;
1832

    
1833
  /* Find the new best route */
1834
  new_best = NULL;
1835
  for (k = &n->routes; e = *k; k = &e->next)
1836
    {
1837
      if (!new_best || rte_better(e, *new_best))
1838
        new_best = k;
1839
    }
1840

    
1841
  /* Relink the new best route to the first position */
1842
  new = *new_best;
1843
  if (new != n->routes)
1844
    {
1845
      *new_best = new->next;
1846
      new->next = n->routes;
1847
      n->routes = new;
1848
    }
1849

    
1850
  /* Announce the new best route */
1851
  if (new != old_best)
1852
    {
1853
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1854
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1855
    }
1856

    
1857
  /* FIXME: Better announcement of merged routes */
1858
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1859

    
1860
  if (free_old_best)
1861
    rte_free_quick(old_best);
1862

    
1863
  return count;
1864
}
1865

    
1866
static void
1867
rt_next_hop_update(rtable *tab)
1868
{
1869
  struct fib_iterator *fit = &tab->nhu_fit;
1870
  int max_feed = 32;
1871

    
1872
  if (tab->nhu_state == 0)
1873
    return;
1874

    
1875
  if (tab->nhu_state == 1)
1876
    {
1877
      FIB_ITERATE_INIT(fit, &tab->fib);
1878
      tab->nhu_state = 2;
1879
    }
1880

    
1881
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1882
    {
1883
      if (max_feed <= 0)
1884
        {
1885
          FIB_ITERATE_PUT(fit);
1886
          ev_schedule(tab->rt_event);
1887
          return;
1888
        }
1889
      max_feed -= rt_next_hop_update_net(tab, n);
1890
    }
1891
  FIB_ITERATE_END;
1892

    
1893
  /* state change 2->0, 3->1 */
1894
  tab->nhu_state &= 1;
1895

    
1896
  if (tab->nhu_state > 0)
1897
    ev_schedule(tab->rt_event);
1898
}
1899

    
1900

    
1901
struct rtable_config *
1902
rt_new_table(struct symbol *s, uint addr_type)
1903
{
1904
  /* Hack that allows to 'redefine' the master table */
1905
  if ((s->class == SYM_TABLE) &&
1906
      (s->def == new_config->def_tables[addr_type]) &&
1907
      ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1908
    return s->def;
1909

    
1910
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1911

    
1912
  cf_define_symbol(s, SYM_TABLE, c);
1913
  c->name = s->name;
1914
  c->addr_type = addr_type;
1915
  c->gc_max_ops = 1000;
1916
  c->gc_min_time = 5;
1917

    
1918
  add_tail(&new_config->tables, &c->n);
1919

    
1920
  /* First table of each type is kept as default */
1921
  if (! new_config->def_tables[addr_type])
1922
    new_config->def_tables[addr_type] = c;
1923

    
1924
  return c;
1925
}
1926

    
1927
/**
1928
 * rt_lock_table - lock a routing table
1929
 * @r: routing table to be locked
1930
 *
1931
 * Lock a routing table, because it's in use by a protocol,
1932
 * preventing it from being freed when it gets undefined in a new
1933
 * configuration.
1934
 */
1935
void
1936
rt_lock_table(rtable *r)
1937
{
1938
  r->use_count++;
1939
}
1940

    
1941
/**
1942
 * rt_unlock_table - unlock a routing table
1943
 * @r: routing table to be unlocked
1944
 *
1945
 * Unlock a routing table formerly locked by rt_lock_table(),
1946
 * that is decrease its use count and delete it if it's scheduled
1947
 * for deletion by configuration changes.
1948
 */
1949
void
1950
rt_unlock_table(rtable *r)
1951
{
1952
  if (!--r->use_count && r->deleted)
1953
    {
1954
      struct config *conf = r->deleted;
1955
      DBG("Deleting routing table %s\n", r->name);
1956
      r->config->table = NULL;
1957
      if (r->hostcache)
1958
        rt_free_hostcache(r);
1959
      rem_node(&r->n);
1960
      fib_free(&r->fib);
1961
      rfree(r->rt_event);
1962
      mb_free(r);
1963
      config_del_obstacle(conf);
1964
    }
1965
}
1966

    
1967
/**
1968
 * rt_commit - commit new routing table configuration
1969
 * @new: new configuration
1970
 * @old: original configuration or %NULL if it's boot time config
1971
 *
1972
 * Scan differences between @old and @new configuration and modify
1973
 * the routing tables according to these changes. If @new defines a
1974
 * previously unknown table, create it, if it omits a table existing
1975
 * in @old, schedule it for deletion (it gets deleted when all protocols
1976
 * disconnect from it by calling rt_unlock_table()), if it exists
1977
 * in both configurations, leave it unchanged.
1978
 */
1979
void
1980
rt_commit(struct config *new, struct config *old)
1981
{
1982
  struct rtable_config *o, *r;
1983

    
1984
  DBG("rt_commit:\n");
1985
  if (old)
1986
    {
1987
      WALK_LIST(o, old->tables)
1988
        {
1989
          rtable *ot = o->table;
1990
          if (!ot->deleted)
1991
            {
1992
              struct symbol *sym = cf_find_symbol(new, o->name);
1993
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
1994
                {
1995
                  DBG("\t%s: same\n", o->name);
1996
                  r = sym->def;
1997
                  r->table = ot;
1998
                  ot->name = r->name;
1999
                  ot->config = r;
2000
                  if (o->sorted != r->sorted)
2001
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2002
                }
2003
              else
2004
                {
2005
                  DBG("\t%s: deleted\n", o->name);
2006
                  ot->deleted = old;
2007
                  config_add_obstacle(old);
2008
                  rt_lock_table(ot);
2009
                  rt_unlock_table(ot);
2010
                }
2011
            }
2012
        }
2013
    }
2014

    
2015
  WALK_LIST(r, new->tables)
2016
    if (!r->table)
2017
      {
2018
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2019
        DBG("\t%s: created\n", r->name);
2020
        rt_setup(rt_table_pool, t, r->name, r);
2021
        add_tail(&routing_tables, &t->n);
2022
        r->table = t;
2023
      }
2024
  DBG("\tdone\n");
2025
}
2026

    
2027
static inline void
2028
do_feed_channel(struct channel *c, net *n, rte *e)
2029
{
2030
  rte_update_lock();
2031
  if (c->ra_mode == RA_ACCEPTED)
2032
    rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2033
  else if (c->ra_mode == RA_MERGED)
2034
    rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2035
  else /* RA_BASIC */
2036
    rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2037
  rte_update_unlock();
2038
}
2039

    
2040
/**
2041
 * rt_feed_channel - advertise all routes to a channel
2042
 * @c: channel to be fed
2043
 *
2044
 * This function performs one pass of advertisement of routes to a channel that
2045
 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2046
 * has something to do. (We avoid transferring all the routes in single pass in
2047
 * order not to monopolize CPU time.)
2048
 */
2049
int
2050
rt_feed_channel(struct channel *c)
2051
{
2052
  struct fib_iterator *fit = &c->feed_fit;
2053
  int max_feed = 256;
2054

    
2055
  ASSERT(c->export_state == ES_FEEDING);
2056

    
2057
  if (!c->feed_active)
2058
    {
2059
      FIB_ITERATE_INIT(fit, &c->table->fib);
2060
      c->feed_active = 1;
2061
    }
2062

    
2063
  FIB_ITERATE_START(&c->table->fib, fit, net, n)
2064
    {
2065
      rte *e = n->routes;
2066
      if (max_feed <= 0)
2067
        {
2068
          FIB_ITERATE_PUT(fit);
2069
          return 0;
2070
        }
2071

    
2072
      /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2073

    
2074
      if ((c->ra_mode == RA_OPTIMAL) ||
2075
          (c->ra_mode == RA_ACCEPTED) ||
2076
          (c->ra_mode == RA_MERGED))
2077
        if (rte_is_valid(e))
2078
          {
2079
            /* In the meantime, the protocol may fell down */
2080
            if (c->export_state != ES_FEEDING)
2081
              goto done;
2082

    
2083
            do_feed_channel(c, n, e);
2084
            max_feed--;
2085
          }
2086

    
2087
      if (c->ra_mode == RA_ANY)
2088
        for(e = n->routes; e; e = e->next)
2089
          {
2090
            /* In the meantime, the protocol may fell down */
2091
            if (c->export_state != ES_FEEDING)
2092
              goto done;
2093

    
2094
            if (!rte_is_valid(e))
2095
              continue;
2096

    
2097
            do_feed_channel(c, n, e);
2098
            max_feed--;
2099
          }
2100
    }
2101
  FIB_ITERATE_END;
2102

    
2103
done:
2104
  c->feed_active = 0;
2105
  return 1;
2106
}
2107

    
2108
/**
2109
 * rt_feed_baby_abort - abort protocol feeding
2110
 * @c: channel
2111
 *
2112
 * This function is called by the protocol code when the protocol stops or
2113
 * ceases to exist during the feeding.
2114
 */
2115
void
2116
rt_feed_channel_abort(struct channel *c)
2117
{
2118
  if (c->feed_active)
2119
    {
2120
      /* Unlink the iterator */
2121
      fit_get(&c->table->fib, &c->feed_fit);
2122
      c->feed_active = 0;
2123
    }
2124
}
2125

    
2126
static inline unsigned
2127
ptr_hash(void *ptr)
2128
{
2129
  uintptr_t p = (uintptr_t) ptr;
2130
  return p ^ (p << 8) ^ (p >> 16);
2131
}
2132

    
2133
static inline u32
2134
hc_hash(ip_addr a, rtable *dep)
2135
{
2136
  return ipa_hash(a) ^ ptr_hash(dep);
2137
}
2138

    
2139
static inline void
2140
hc_insert(struct hostcache *hc, struct hostentry *he)
2141
{
2142
  uint k = he->hash_key >> hc->hash_shift;
2143
  he->next = hc->hash_table[k];
2144
  hc->hash_table[k] = he;
2145
}
2146

    
2147
static inline void
2148
hc_remove(struct hostcache *hc, struct hostentry *he)
2149
{
2150
  struct hostentry **hep;
2151
  uint k = he->hash_key >> hc->hash_shift;
2152

    
2153
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2154
  *hep = he->next;
2155
}
2156

    
2157
#define HC_DEF_ORDER 10
2158
#define HC_HI_MARK *4
2159
#define HC_HI_STEP 2
2160
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2161
#define HC_LO_MARK /5
2162
#define HC_LO_STEP 2
2163
#define HC_LO_ORDER 10
2164

    
2165
static void
2166
hc_alloc_table(struct hostcache *hc, unsigned order)
2167
{
2168
  uint hsize = 1 << order;
2169
  hc->hash_order = order;
2170
  hc->hash_shift = 32 - order;
2171
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2172
  hc->hash_min = (order <= HC_LO_ORDER) ?  0U : (hsize HC_LO_MARK);
2173

    
2174
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2175
}
2176

    
2177
static void
2178
hc_resize(struct hostcache *hc, unsigned new_order)
2179
{
2180
  struct hostentry **old_table = hc->hash_table;
2181
  struct hostentry *he, *hen;
2182
  uint old_size = 1 << hc->hash_order;
2183
  uint i;
2184

    
2185
  hc_alloc_table(hc, new_order);
2186
  for (i = 0; i < old_size; i++)
2187
    for (he = old_table[i]; he != NULL; he=hen)
2188
      {
2189
        hen = he->next;
2190
        hc_insert(hc, he);
2191
      }
2192
  mb_free(old_table);
2193
}
2194

    
2195
static struct hostentry *
2196
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2197
{
2198
  struct hostentry *he = sl_alloc(hc->slab);
2199

    
2200
  he->addr = a;
2201
  he->link = ll;
2202
  he->tab = dep;
2203
  he->hash_key = k;
2204
  he->uc = 0;
2205
  he->src = NULL;
2206

    
2207
  add_tail(&hc->hostentries, &he->ln);
2208
  hc_insert(hc, he);
2209

    
2210
  hc->hash_items++;
2211
  if (hc->hash_items > hc->hash_max)
2212
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2213

    
2214
  return he;
2215
}
2216

    
2217
static void
2218
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2219
{
2220
  rta_free(he->src);
2221

    
2222
  rem_node(&he->ln);
2223
  hc_remove(hc, he);
2224
  sl_free(hc->slab, he);
2225

    
2226
  hc->hash_items--;
2227
  if (hc->hash_items < hc->hash_min)
2228
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2229
}
2230

    
2231
static void
2232
rt_init_hostcache(rtable *tab)
2233
{
2234
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2235
  init_list(&hc->hostentries);
2236

    
2237
  hc->hash_items = 0;
2238
  hc_alloc_table(hc, HC_DEF_ORDER);
2239
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2240

    
2241
  hc->lp = lp_new(rt_table_pool, 1008);
2242
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2243

    
2244
  tab->hostcache = hc;
2245
}
2246

    
2247
static void
2248
rt_free_hostcache(rtable *tab)
2249
{
2250
  struct hostcache *hc = tab->hostcache;
2251

    
2252
  node *n;
2253
  WALK_LIST(n, hc->hostentries)
2254
    {
2255
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2256
      rta_free(he->src);
2257

    
2258
      if (he->uc)
2259
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2260
    }
2261

    
2262
  rfree(hc->slab);
2263
  rfree(hc->lp);
2264
  mb_free(hc->hash_table);
2265
  mb_free(hc);
2266
}
2267

    
2268
static void
2269
rt_notify_hostcache(rtable *tab, net *net)
2270
{
2271
  if (tab->hcu_scheduled)
2272
    return;
2273

    
2274
  if (trie_match_net(tab->hostcache->trie, net->n.addr))
2275
    rt_schedule_hcu(tab);
2276
}
2277

    
2278
static int
2279
if_local_addr(ip_addr a, struct iface *i)
2280
{
2281
  struct ifa *b;
2282

    
2283
  WALK_LIST(b, i->addrs)
2284
    if (ipa_equal(a, b->ip))
2285
      return 1;
2286

    
2287
  return 0;
2288
}
2289

    
2290
static u32 
2291
rt_get_igp_metric(rte *rt)
2292
{
2293
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2294

    
2295
  if (ea)
2296
    return ea->u.data;
2297

    
2298
  rta *a = rt->attrs;
2299

    
2300
#ifdef CONFIG_OSPF
2301
  if ((a->source == RTS_OSPF) ||
2302
      (a->source == RTS_OSPF_IA) ||
2303
      (a->source == RTS_OSPF_EXT1))
2304
    return rt->u.ospf.metric1;
2305
#endif
2306

    
2307
#ifdef CONFIG_RIP
2308
  if (a->source == RTS_RIP)
2309
    return rt->u.rip.metric;
2310
#endif
2311

    
2312
  if (a->source == RTS_DEVICE)
2313
    return 0;
2314

    
2315
  return IGP_METRIC_UNKNOWN;
2316
}
2317

    
2318
static int
2319
rt_update_hostentry(rtable *tab, struct hostentry *he)
2320
{
2321
  rta *old_src = he->src;
2322
  int pxlen = 0;
2323

    
2324
  /* Reset the hostentry */
2325
  he->src = NULL;
2326
  he->dest = RTD_UNREACHABLE;
2327
  he->igp_metric = 0;
2328

    
2329
  net_addr he_addr;
2330
  net_fill_ip_host(&he_addr, he->addr);
2331
  net *n = net_route(tab, &he_addr);
2332
  if (n)
2333
    {
2334
      rte *e = n->routes;
2335
      rta *a = e->attrs;
2336
      pxlen = n->n.addr->pxlen;
2337

    
2338
      if (a->hostentry)
2339
        {
2340
          /* Recursive route should not depend on another recursive route */
2341
          log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2342
              he->addr, n->n.addr);
2343
          goto done;
2344
        }
2345

    
2346
      if ((a->dest == RTD_UNICAST) && ipa_zero(a->nh.gw) && !a->next)
2347
        { /* We have singlepath device route */
2348
          if (if_local_addr(he->addr, a->nh.iface))
2349
            {
2350
              /* The host address is a local address, this is not valid */
2351
              log(L_WARN "Next hop address %I is a local address of iface %s",
2352
                  he->addr, a->nh.iface->name);
2353
              goto done;
2354
                  }
2355

    
2356
          /* The host is directly reachable, use link as a gateway */
2357
          he->nh = NULL;
2358
          he->dest = RTD_UNICAST;
2359
        }
2360
      else
2361
        {
2362
          /* The host is reachable through some route entry */
2363
          he->nh = (&a->nh);
2364
          he->dest = a->dest;
2365
        }
2366

    
2367
      he->src = rta_clone(a);
2368
      he->igp_metric = rt_get_igp_metric(e);
2369
    }
2370

    
2371
 done:
2372
  /* Add a prefix range to the trie */
2373
  trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2374

    
2375
  rta_free(old_src);
2376
  return old_src != he->src;
2377
}
2378

    
2379
static void
2380
rt_update_hostcache(rtable *tab)
2381
{
2382
  struct hostcache *hc = tab->hostcache;
2383
  struct hostentry *he;
2384
  node *n, *x;
2385

    
2386
  /* Reset the trie */
2387
  lp_flush(hc->lp);
2388
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2389

    
2390
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2391
    {
2392
      he = SKIP_BACK(struct hostentry, ln, n);
2393
      if (!he->uc)
2394
        {
2395
          hc_delete_hostentry(hc, he);
2396
          continue;
2397
        }
2398

    
2399
      if (rt_update_hostentry(tab, he))
2400
        rt_schedule_nhu(he->tab);
2401
    }
2402

    
2403
  tab->hcu_scheduled = 0;
2404
}
2405

    
2406
static struct hostentry *
2407
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2408
{
2409
  struct hostentry *he;
2410

    
2411
  if (!tab->hostcache)
2412
    rt_init_hostcache(tab);
2413

    
2414
  u32 k = hc_hash(a, dep);
2415
  struct hostcache *hc = tab->hostcache;
2416
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2417
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2418
      return he;
2419

    
2420
  he = hc_new_hostentry(hc, a, ll, dep, k);
2421
  rt_update_hostentry(tab, he);
2422
  return he;
2423
}
2424

    
2425
void
2426
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll)
2427
{
2428
  rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep));
2429
}
2430

    
2431

    
2432
/*
2433
 *  CLI commands
2434
 */
2435

    
2436
static byte *
2437
rt_format_via(rte *e)
2438
{
2439
  rta *a = e->attrs;
2440

    
2441
  /* Max text length w/o IP addr and interface name is 16 */
2442
  static byte via[IPA_MAX_TEXT_LENGTH+sizeof(a->nh.iface->name)+16];
2443

    
2444
  switch (a->dest)
2445
    {
2446
    case RTD_UNICAST:        bsprintf(via, "unicast"); break;
2447
    case RTD_BLACKHOLE:        bsprintf(via, "blackhole"); break;
2448
    case RTD_UNREACHABLE:        bsprintf(via, "unreachable"); break;
2449
    case RTD_PROHIBIT:        bsprintf(via, "prohibited"); break;
2450
    default:                bsprintf(via, "???");
2451
    }
2452
  return via;
2453
}
2454

    
2455
static void
2456
rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2457
{
2458
  byte from[IPA_MAX_TEXT_LENGTH+8];
2459
  byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2460
  rta *a = e->attrs;
2461
  int primary = (e->net->routes == e);
2462
  int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2463
  void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2464
  struct nexthop *nh;
2465

    
2466
  tm_format_datetime(tm, &config->tf_route, e->lastmod);
2467
  if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw))
2468
    bsprintf(from, " from %I", a->from);
2469
  else
2470
    from[0] = 0;
2471

    
2472
  get_route_info = a->src->proto->proto->get_route_info;
2473
  if (get_route_info || d->verbose)
2474
    {
2475
      /* Need to normalize the extended attributes */
2476
      ea_list *t = tmpa;
2477
      t = ea_append(t, a->eattrs);
2478
      tmpa = alloca(ea_scan(t));
2479
      ea_merge(t, tmpa);
2480
      ea_sort(tmpa);
2481
    }
2482
  if (get_route_info)
2483
    get_route_info(e, info, tmpa);
2484
  else
2485
    bsprintf(info, " (%d)", e->pref);
2486
  cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name,
2487
             tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2488
  for (nh = &(a->nh); nh; nh = nh->next)
2489
    {
2490
      char ls[MPLS_MAX_LABEL_STACK*8 + 5]; char *lsp = ls;
2491
      if (nh->labels)
2492
        {
2493
          lsp += bsprintf(lsp, " mpls %d", nh->label[0]);
2494
          for (int i=1;i<nh->labels; i++)
2495
            lsp += bsprintf(lsp, "/%d", nh->label[i]);
2496
          *lsp++ = '\0';
2497
        }
2498
      if (a->nh.next)
2499
        cli_printf(c, -1007, "\tvia %I%s on %s weight %d", nh->gw, (nh->labels ? ls : ""), nh->iface->name, nh->weight + 1);
2500
      else
2501
        cli_printf(c, -1007, "\tvia %I%s on %s", nh->gw, (nh->labels ? ls : ""), nh->iface->name);
2502
    }
2503
  if (d->verbose)
2504
    rta_show(c, a, tmpa);
2505

    
2506
}
2507

    
2508
static void
2509
rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2510
{
2511
  rte *e, *ee;
2512
  byte ia[NET_MAX_TEXT_LENGTH+1];
2513
  struct ea_list *tmpa;
2514
  struct channel *ec = d->export_channel;
2515
  int first = 1;
2516
  int pass = 0;
2517

    
2518
  bsnprintf(ia, sizeof(ia), "%N", n->n.addr);
2519

    
2520
  for (e = n->routes; e; e = e->next)
2521
    {
2522
      if (rte_is_filtered(e) != d->filtered)
2523
        continue;
2524

    
2525
      d->rt_counter++;
2526
      d->net_counter += first;
2527
      first = 0;
2528

    
2529
      if (pass)
2530
        continue;
2531

    
2532
      ee = e;
2533
      rte_update_lock();                /* We use the update buffer for filtering */
2534
      tmpa = make_tmp_attrs(e, rte_update_pool);
2535

    
2536
      /* Special case for merged export */
2537
      if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED))
2538
        {
2539
          rte *rt_free;
2540
          e = rt_export_merged(ec, n, &rt_free, &tmpa, rte_update_pool, 1);
2541
          pass = 1;
2542

    
2543
          if (!e)
2544
          { e = ee; goto skip; }
2545
        }
2546
      else if (d->export_mode)
2547
        {
2548
          struct proto *ep = d->export_protocol;
2549
          int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2550

    
2551
          if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED)
2552
            pass = 1;
2553

    
2554
          if (ic < 0)
2555
            goto skip;
2556

    
2557
          if (d->export_mode > RSEM_PREEXPORT)
2558
            {
2559
              /*
2560
               * FIXME - This shows what should be exported according to current
2561
               * filters, but not what was really exported. 'configure soft'
2562
               * command may change the export filter and do not update routes.
2563
               */
2564
              int do_export = (ic > 0) ||
2565
                (f_run(ec->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2566

    
2567
              if (do_export != (d->export_mode == RSEM_EXPORT))
2568
                goto skip;
2569

    
2570
              if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_ACCEPTED))
2571
                pass = 1;
2572
            }
2573
        }
2574

    
2575
      if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2576
        goto skip;
2577

    
2578
      if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2579
        goto skip;
2580

    
2581
      d->show_counter++;
2582
      if (d->stats < 2)
2583
        rt_show_rte(c, ia, e, d, tmpa);
2584
      ia[0] = 0;
2585

    
2586
    skip:
2587
      if (e != ee)
2588
      {
2589
        rte_free(e);
2590
        e = ee;
2591
      }
2592
      rte_update_unlock();
2593

    
2594
      if (d->primary_only)
2595
        break;
2596
    }
2597
}
2598

    
2599
static struct channel *
2600
rt_show_export_channel(struct rt_show_data *d)
2601
{
2602
  if (! d->export_protocol->rt_notify)
2603
    return NULL;
2604

    
2605
  return proto_find_channel_by_table(d->export_protocol, d->table);
2606
}
2607

    
2608
static void
2609
rt_show_cont(struct cli *c)
2610
{
2611
  struct rt_show_data *d = c->rover;
2612
#ifdef DEBUGGING
2613
  unsigned max = 4;
2614
#else
2615
  unsigned max = 64;
2616
#endif
2617
  struct fib *fib = &d->table->fib;
2618
  struct fib_iterator *it = &d->fit;
2619

    
2620
  if (d->export_mode)
2621
    {
2622
      /* Ensure we have current export channel */
2623
      d->export_channel = rt_show_export_channel(d);
2624
      if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2625
        {
2626
          cli_printf(c, 8005, "Channel is down");
2627
          goto done;
2628
        }
2629
    }
2630

    
2631
  FIB_ITERATE_START(fib, it, net, n)
2632
    {
2633
      if (!max--)
2634
        {
2635
          FIB_ITERATE_PUT(it);
2636
          return;
2637
        }
2638
      rt_show_net(c, n, d);
2639
    }
2640
  FIB_ITERATE_END;
2641
  if (d->stats)
2642
    cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2643
  else
2644
    cli_printf(c, 0, "");
2645
done:
2646
  c->cont = c->cleanup = NULL;
2647
}
2648

    
2649
static void
2650
rt_show_cleanup(struct cli *c)
2651
{
2652
  struct rt_show_data *d = c->rover;
2653

    
2654
  /* Unlink the iterator */
2655
  fit_get(&d->table->fib, &d->fit);
2656
}
2657

    
2658
static inline rtable *
2659
rt_show_get_table(struct proto *p)
2660
{
2661
  /* FIXME: Use a better way to handle multi-channel protocols */
2662

    
2663
  if (p->main_channel)
2664
    return p->main_channel->table;
2665

    
2666
  if (!EMPTY_LIST(p->channels))
2667
    return ((struct channel *) HEAD(p->channels))->table;
2668

    
2669
  return NULL;
2670
}
2671

    
2672
void
2673
rt_show(struct rt_show_data *d)
2674
{
2675
  net *n;
2676

    
2677
  /* Default is either a master table or a table related to a respective protocol */
2678
  if (!d->table && d->export_protocol) d->table = rt_show_get_table(d->export_protocol);
2679
  if (!d->table && d->show_protocol) d->table = rt_show_get_table(d->show_protocol);
2680
  if (!d->table) d->table = config->def_tables[NET_IP4]->table; /* FIXME: iterate through all tables ? */
2681

    
2682
  /* Filtered routes are neither exported nor have sensible ordering */
2683
  if (d->filtered && (d->export_mode || d->primary_only))
2684
    cli_msg(0, "");
2685

    
2686
  if (!d->addr)
2687
    {
2688
      FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2689
      this_cli->cont = rt_show_cont;
2690
      this_cli->cleanup = rt_show_cleanup;
2691
      this_cli->rover = d;
2692
    }
2693
  else
2694
    {
2695
      if (d->export_mode)
2696
        {
2697
          /* Find channel associated with the export protocol */
2698
          d->export_channel = rt_show_export_channel(d);
2699
          if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2700
            {
2701
              cli_msg(8005, "Channel is down");
2702
              return;
2703
            }
2704
        }
2705

    
2706
      if (d->table->addr_type != d->addr->type)
2707
      {
2708
        cli_msg(8001, "Incompatible type of prefix/ip with table");
2709
        return;
2710
      }
2711

    
2712
      if (d->show_for)
2713
        n = net_route(d->table, d->addr);
2714
      else
2715
        n = net_find(d->table, d->addr);
2716

    
2717
      if (n)
2718
        rt_show_net(this_cli, n, d);
2719

    
2720
      if (d->rt_counter)
2721
        cli_msg(0, "");
2722
      else
2723
        cli_msg(8001, "Network not in table");
2724
    }
2725
}
2726

    
2727
/*
2728
 *  Documentation for functions declared inline in route.h
2729
 */
2730
#if 0
2731

2732
/**
2733
 * net_find - find a network entry
2734
 * @tab: a routing table
2735
 * @addr: address of the network
2736
 *
2737
 * net_find() looks up the given network in routing table @tab and
2738
 * returns a pointer to its &net entry or %NULL if no such network
2739
 * exists.
2740
 */
2741
static inline net *net_find(rtable *tab, net_addr *addr)
2742
{ DUMMY; }
2743

2744
/**
2745
 * net_get - obtain a network entry
2746
 * @tab: a routing table
2747
 * @addr: address of the network
2748
 *
2749
 * net_get() looks up the given network in routing table @tab and
2750
 * returns a pointer to its &net entry. If no such entry exists, it's
2751
 * created.
2752
 */
2753
static inline net *net_get(rtable *tab, net_addr *addr)
2754
{ DUMMY; }
2755

2756
/**
2757
 * rte_cow - copy a route for writing
2758
 * @r: a route entry to be copied
2759
 *
2760
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2761
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2762
 * just returned unchanged, else a new temporary entry with the same contents
2763
 * is created.
2764
 *
2765
 * The primary use of this function is inside the filter machinery -- when
2766
 * a filter wants to modify &rte contents (to change the preference or to
2767
 * attach another set of attributes), it must ensure that the &rte is not
2768
 * shared with anyone else (and especially that it isn't stored in any routing
2769
 * table).
2770
 *
2771
 * Result: a pointer to the new writable &rte.
2772
 */
2773
static inline rte * rte_cow(rte *r)
2774
{ DUMMY; }
2775

2776
#endif