Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ d47c3d64

History | View | Annotate | Download (68.6 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/cli.h"
37
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
44
#include "lib/alloca.h"
45

    
46
pool *rt_table_pool;
47

    
48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

    
51
static list routing_tables;
52

    
53
static byte *rt_format_via(rte *e);
54
static void rt_free_hostcache(rtable *tab);
55
static void rt_notify_hostcache(rtable *tab, net *net);
56
static void rt_update_hostcache(rtable *tab);
57
static void rt_next_hop_update(rtable *tab);
58
static inline void rt_prune_table(rtable *tab);
59

    
60

    
61
static inline struct ea_list *
62
make_tmp_attrs(struct rte *rt, struct linpool *pool)
63
{
64
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
65
  mta = rt->attrs->src->proto->make_tmp_attrs;
66
  return mta ? mta(rt, pool) : NULL;
67
}
68

    
69

    
70
/* Like fib_route(), but skips empty net entries */
71
static inline void *
72
net_route_ip4(struct fib *f, net_addr_ip4 *n)
73
{
74
  net *r;
75

    
76
  while (r = fib_find(f, (net_addr *) n),
77
         !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
78
  {
79
    n->pxlen--;
80
    ip4_clrbit(&n->prefix, n->pxlen);
81
  }
82

    
83
  return r;
84
}
85

    
86
static inline void *
87
net_route_ip6(struct fib *f, net_addr_ip6 *n)
88
{
89
  net *r;
90

    
91
  while (r = fib_find(f, (net_addr *) n),
92
         !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
93
  {
94
    n->pxlen--;
95
    ip6_clrbit(&n->prefix, n->pxlen);
96
  }
97

    
98
  return r;
99
}
100

    
101
void *
102
net_route(rtable *tab, const net_addr *n)
103
{
104
  ASSERT(tab->addr_type == n->type);
105

    
106
  net_addr *n0 = alloca(n->length);
107
  net_copy(n0, n);
108

    
109
  switch (n->type)
110
  {
111
  case NET_IP4:
112
  case NET_VPN4:
113
  case NET_ROA4:
114
    return net_route_ip4(&tab->fib, (net_addr_ip4 *) n0);
115

    
116
  case NET_IP6:
117
  case NET_VPN6:
118
  case NET_ROA6:
119
    return net_route_ip6(&tab->fib, (net_addr_ip6 *) n0);
120

    
121
  default:
122
    return NULL;
123
  }
124
}
125

    
126

    
127
static int
128
net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
129
{
130
  struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
131
  struct fib_node *fn;
132
  int anything = 0;
133

    
134
  while (1)
135
  {
136
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
137
    {
138
      net_addr_roa4 *roa = (void *) fn->addr;
139
      net *r = fib_node_to_user(&tab->fib, fn);
140

    
141
      if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
142
      {
143
        anything = 1;
144
        if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
145
          return ROA_VALID;
146
      }
147
    }
148

    
149
    if (n.pxlen == 0)
150
      break;
151

    
152
    n.pxlen--;
153
    ip4_clrbit(&n.prefix, n.pxlen);
154
  }
155

    
156
  return anything ? ROA_INVALID : ROA_UNKNOWN;
157
}
158

    
159
static int
160
net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
161
{
162
  struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
163
  struct fib_node *fn;
164
  int anything = 0;
165

    
166
  while (1)
167
  {
168
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
169
    {
170
      net_addr_roa6 *roa = (void *) fn->addr;
171
      net *r = fib_node_to_user(&tab->fib, fn);
172

    
173
      if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
174
      {
175
        anything = 1;
176
        if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
177
          return ROA_VALID;
178
      }
179
    }
180

    
181
    if (n.pxlen == 0)
182
      break;
183

    
184
    n.pxlen--;
185
    ip6_clrbit(&n.prefix, n.pxlen);
186
  }
187

    
188
  return anything ? ROA_INVALID : ROA_UNKNOWN;
189
}
190

    
191
/**
192
 * roa_check - check validity of route origination in a ROA table
193
 * @tab: ROA table
194
 * @n: network prefix to check
195
 * @asn: AS number of network prefix
196
 *
197
 * Implements RFC 6483 route validation for the given network prefix. The
198
 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
199
 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
200
 * a candidate ROA with matching ASN and maxlen field greater than or equal to
201
 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
202
 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
203
 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
204
 * must have type NET_IP4 or NET_IP6, respectively.
205
 */
206
int
207
net_roa_check(rtable *tab, const net_addr *n, u32 asn)
208
{
209
  if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
210
    return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
211
  else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
212
    return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
213
  else
214
    return ROA_UNKNOWN;        /* Should not happen */
215
}
216

    
217
/**
218
 * rte_find - find a route
219
 * @net: network node
220
 * @src: route source
221
 *
222
 * The rte_find() function returns a route for destination @net
223
 * which is from route source @src.
224
 */
225
rte *
226
rte_find(net *net, struct rte_src *src)
227
{
228
  rte *e = net->routes;
229

    
230
  while (e && e->attrs->src != src)
231
    e = e->next;
232
  return e;
233
}
234

    
235
/**
236
 * rte_get_temp - get a temporary &rte
237
 * @a: attributes to assign to the new route (a &rta; in case it's
238
 * un-cached, rte_update() will create a cached copy automatically)
239
 *
240
 * Create a temporary &rte and bind it with the attributes @a.
241
 * Also set route preference to the default preference set for
242
 * the protocol.
243
 */
244
rte *
245
rte_get_temp(rta *a)
246
{
247
  rte *e = sl_alloc(rte_slab);
248

    
249
  e->attrs = a;
250
  e->flags = 0;
251
  e->pref = 0;
252
  return e;
253
}
254

    
255
rte *
256
rte_do_cow(rte *r)
257
{
258
  rte *e = sl_alloc(rte_slab);
259

    
260
  memcpy(e, r, sizeof(rte));
261
  e->attrs = rta_clone(r->attrs);
262
  e->flags = 0;
263
  return e;
264
}
265

    
266
/**
267
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
268
 * @r: a route entry to be copied
269
 * @lp: a linpool from which to allocate &rta
270
 *
271
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
272
 * modification. There are three possibilities: First, both &rte and &rta are
273
 * private copies, in that case they are returned unchanged.  Second, &rte is
274
 * private copy, but &rta is cached, in that case &rta is duplicated using
275
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
276
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
277
 *
278
 * Note that in the second case, cached &rta loses one reference, while private
279
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
280
 * nexthops, ...) with it. To work properly, original shared &rta should have
281
 * another reference during the life of created private copy.
282
 *
283
 * Result: a pointer to the new writable &rte with writable &rta.
284
 */
285
rte *
286
rte_cow_rta(rte *r, linpool *lp)
287
{
288
  if (!rta_is_cached(r->attrs))
289
    return r;
290

    
291
  rte *e = rte_cow(r);
292
  rta *a = rta_do_cow(r->attrs, lp);
293
  rta_free(e->attrs);
294
  e->attrs = a;
295
  return e;
296
}
297

    
298
static int                                /* Actually better or at least as good as */
299
rte_better(rte *new, rte *old)
300
{
301
  int (*better)(rte *, rte *);
302

    
303
  if (!rte_is_valid(old))
304
    return 1;
305
  if (!rte_is_valid(new))
306
    return 0;
307

    
308
  if (new->pref > old->pref)
309
    return 1;
310
  if (new->pref < old->pref)
311
    return 0;
312
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
313
    {
314
      /*
315
       *  If the user has configured protocol preferences, so that two different protocols
316
       *  have the same preference, try to break the tie by comparing addresses. Not too
317
       *  useful, but keeps the ordering of routes unambiguous.
318
       */
319
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
320
    }
321
  if (better = new->attrs->src->proto->rte_better)
322
    return better(new, old);
323
  return 0;
324
}
325

    
326
static int
327
rte_mergable(rte *pri, rte *sec)
328
{
329
  int (*mergable)(rte *, rte *);
330

    
331
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
332
    return 0;
333

    
334
  if (pri->pref != sec->pref)
335
    return 0;
336

    
337
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
338
    return 0;
339

    
340
  if (mergable = pri->attrs->src->proto->rte_mergable)
341
    return mergable(pri, sec);
342

    
343
  return 0;
344
}
345

    
346
static void
347
rte_trace(struct proto *p, rte *e, int dir, char *msg)
348
{
349
  log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rt_format_via(e));
350
}
351

    
352
static inline void
353
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
354
{
355
  if (p->debug & flag)
356
    rte_trace(p, e, '>', msg);
357
}
358

    
359
static inline void
360
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
361
{
362
  if (p->debug & flag)
363
    rte_trace(p, e, '<', msg);
364
}
365

    
366
static rte *
367
export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
368
{
369
  struct proto *p = c->proto;
370
  struct filter *filter = c->out_filter;
371
  struct proto_stats *stats = &c->stats;
372
  ea_list *tmpb = NULL;
373
  rte *rt;
374
  int v;
375

    
376
  rt = rt0;
377
  *rt_free = NULL;
378

    
379
  if (!tmpa)
380
    tmpa = &tmpb;
381

    
382
  *tmpa = make_tmp_attrs(rt, pool);
383

    
384
  v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0;
385
  if (v < 0)
386
    {
387
      if (silent)
388
        goto reject;
389

    
390
      stats->exp_updates_rejected++;
391
      if (v == RIC_REJECT)
392
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
393
      goto reject;
394
    }
395
  if (v > 0)
396
    {
397
      if (!silent)
398
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
399
      goto accept;
400
    }
401

    
402
  v = filter && ((filter == FILTER_REJECT) ||
403
                 (f_run(filter, &rt, tmpa, pool, FF_FORCE_TMPATTR) > F_ACCEPT));
404
  if (v)
405
    {
406
      if (silent)
407
        goto reject;
408

    
409
      stats->exp_updates_filtered++;
410
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
411
      goto reject;
412
    }
413

    
414
 accept:
415
  if (rt != rt0)
416
    *rt_free = rt;
417
  return rt;
418

    
419
 reject:
420
  /* Discard temporary rte */
421
  if (rt != rt0)
422
    rte_free(rt);
423
  return NULL;
424
}
425

    
426
static inline rte *
427
export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
428
{
429
  return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent);
430
}
431

    
432
static void
433
do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
434
{
435
  struct proto *p = c->proto;
436
  struct proto_stats *stats = &c->stats;
437

    
438

    
439
  /*
440
   * First, apply export limit.
441
   *
442
   * Export route limits has several problems. Because exp_routes
443
   * counter is reset before refeed, we don't really know whether
444
   * limit is breached and whether the update is new or not. Therefore
445
   * the number of really exported routes may exceed the limit
446
   * temporarily (routes exported before and new routes in refeed).
447
   *
448
   * Minor advantage is that if the limit is decreased and refeed is
449
   * requested, the number of exported routes really decrease.
450
   *
451
   * Second problem is that with export limits, we don't know whether
452
   * old was really exported (it might be blocked by limit). When a
453
   * withdraw is exported, we announce it even when the previous
454
   * update was blocked. This is not a big issue, but the same problem
455
   * is in updating exp_routes counter. Therefore, to be consistent in
456
   * increases and decreases of exp_routes, we count exported routes
457
   * regardless of blocking by limits.
458
   *
459
   * Similar problem is in handling updates - when a new route is
460
   * received and blocking is active, the route would be blocked, but
461
   * when an update for the route will be received later, the update
462
   * would be propagated (as old != NULL). Therefore, we have to block
463
   * also non-new updates (contrary to import blocking).
464
   */
465

    
466
  struct channel_limit *l = &c->out_limit;
467
  if (l->action && new)
468
    {
469
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
470
        channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
471

    
472
      if (l->state == PLS_BLOCKED)
473
        {
474
          stats->exp_routes++;        /* see note above */
475
          stats->exp_updates_rejected++;
476
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
477
          new = NULL;
478

    
479
          if (!old)
480
            return;
481
        }
482
    }
483

    
484

    
485
  if (new)
486
    stats->exp_updates_accepted++;
487
  else
488
    stats->exp_withdraws_accepted++;
489

    
490
  /* Hack: We do not decrease exp_routes during refeed, we instead
491
     reset exp_routes at the start of refeed. */
492
  if (new)
493
    stats->exp_routes++;
494
  if (old && !refeed)
495
    stats->exp_routes--;
496

    
497
  if (p->debug & D_ROUTES)
498
    {
499
      if (new && old)
500
        rte_trace_out(D_ROUTES, p, new, "replaced");
501
      else if (new)
502
        rte_trace_out(D_ROUTES, p, new, "added");
503
      else if (old)
504
        rte_trace_out(D_ROUTES, p, old, "removed");
505
    }
506
  if (!new)
507
    p->rt_notify(p, c, net, NULL, old, NULL);
508
  else if (tmpa)
509
    {
510
      ea_list *t = tmpa;
511
      while (t->next)
512
        t = t->next;
513
      t->next = new->attrs->eattrs;
514
      p->rt_notify(p, c, net, new, old, tmpa);
515
      t->next = NULL;
516
    }
517
  else
518
    p->rt_notify(p, c, net, new, old, new->attrs->eattrs);
519
}
520

    
521
static void
522
rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
523
{
524
  struct proto *p = c->proto;
525

    
526
  rte *new = new0;
527
  rte *old = old0;
528
  rte *new_free = NULL;
529
  rte *old_free = NULL;
530
  ea_list *tmpa = NULL;
531

    
532
  if (new)
533
    c->stats.exp_updates_received++;
534
  else
535
    c->stats.exp_withdraws_received++;
536

    
537
  /*
538
   * This is a tricky part - we don't know whether route 'old' was
539
   * exported to protocol 'p' or was filtered by the export filter.
540
   * We try to run the export filter to know this to have a correct
541
   * value in 'old' argument of rte_update (and proper filter value)
542
   *
543
   * FIXME - this is broken because 'configure soft' may change
544
   * filters but keep routes. Refeed is expected to be called after
545
   * change of the filters and with old == new, therefore we do not
546
   * even try to run the filter on an old route, This may lead to
547
   * 'spurious withdraws' but ensure that there are no 'missing
548
   * withdraws'.
549
   *
550
   * This is not completely safe as there is a window between
551
   * reconfiguration and the end of refeed - if a newly filtered
552
   * route disappears during this period, proper withdraw is not
553
   * sent (because old would be also filtered) and the route is
554
   * not refeeded (because it disappeared before that).
555
   */
556

    
557
  if (new)
558
    new = export_filter(c, new, &new_free, &tmpa, 0);
559

    
560
  if (old && !refeed)
561
    old = export_filter(c, old, &old_free, NULL, 1);
562

    
563
  if (!new && !old)
564
  {
565
    /*
566
     * As mentioned above, 'old' value may be incorrect in some race conditions.
567
     * We generally ignore it with the exception of withdraw to pipe protocol.
568
     * In that case we rather propagate unfiltered withdraws regardless of
569
     * export filters to ensure that when a protocol is flushed, its routes are
570
     * removed from all tables. Possible spurious unfiltered withdraws are not
571
     * problem here as they are ignored if there is no corresponding route at
572
     * the other end of the pipe. We directly call rt_notify() hook instead of
573
     * do_rt_notify() to avoid logging and stat counters.
574
     */
575

    
576
#ifdef CONFIG_PIPE
577
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
578
      p->rt_notify(p, c, net, NULL, old0, NULL);
579
#endif
580

    
581
    return;
582
  }
583

    
584
  do_rt_notify(c, net, new, old, tmpa, refeed);
585

    
586
  /* Discard temporary rte's */
587
  if (new_free)
588
    rte_free(new_free);
589
  if (old_free)
590
    rte_free(old_free);
591
}
592

    
593
static void
594
rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
595
{
596
  // struct proto *p = c->proto;
597

    
598
  rte *r;
599
  rte *new_best = NULL;
600
  rte *old_best = NULL;
601
  rte *new_free = NULL;
602
  rte *old_free = NULL;
603
  ea_list *tmpa = NULL;
604

    
605
  /* Used to track whether we met old_changed position. If before_old is NULL
606
     old_changed was the first and we met it implicitly before current best route. */
607
  int old_meet = old_changed && !before_old;
608

    
609
  /* Note that before_old is either NULL or valid (not rejected) route.
610
     If old_changed is valid, before_old have to be too. If old changed route
611
     was not valid, caller must use NULL for both old_changed and before_old. */
612

    
613
  if (new_changed)
614
    c->stats.exp_updates_received++;
615
  else
616
    c->stats.exp_withdraws_received++;
617

    
618
  /* First, find the new_best route - first accepted by filters */
619
  for (r=net->routes; rte_is_valid(r); r=r->next)
620
    {
621
      if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
622
        break;
623

    
624
      /* Note if we walked around the position of old_changed route */
625
      if (r == before_old)
626
        old_meet = 1;
627
    }
628

    
629
  /* 
630
   * Second, handle the feed case. That means we do not care for
631
   * old_best. It is NULL for feed, and the new_best for refeed. 
632
   * For refeed, there is a hack similar to one in rt_notify_basic()
633
   * to ensure withdraws in case of changed filters
634
   */
635
  if (feed)
636
    {
637
      if (feed == 2)        /* refeed */
638
        old_best = new_best ? new_best :
639
          (rte_is_valid(net->routes) ? net->routes : NULL);
640
      else
641
        old_best = NULL;
642

    
643
      if (!new_best && !old_best)
644
        return;
645

    
646
      goto found;
647
    }
648

    
649
  /*
650
   * Now, we find the old_best route. Generally, it is the same as the
651
   * new_best, unless new_best is the same as new_changed or
652
   * old_changed is accepted before new_best.
653
   *
654
   * There are four cases:
655
   *
656
   * - We would find and accept old_changed before new_best, therefore
657
   *   old_changed is old_best. In remaining cases we suppose this
658
   *   is not true.
659
   *
660
   * - We found no new_best, therefore there is also no old_best and
661
   *   we ignore this withdraw.
662
   *
663
   * - We found new_best different than new_changed, therefore
664
   *   old_best is the same as new_best and we ignore this update.
665
   *
666
   * - We found new_best the same as new_changed, therefore it cannot
667
   *   be old_best and we have to continue search for old_best.
668
   */
669

    
670
  /* First case */
671
  if (old_meet)
672
    if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
673
      goto found;
674

    
675
  /* Second case */
676
  if (!new_best)
677
    return;
678

    
679
  /* Third case, we use r instead of new_best, because export_filter() could change it */
680
  if (r != new_changed)
681
    {
682
      if (new_free)
683
        rte_free(new_free);
684
      return;
685
    }
686

    
687
  /* Fourth case */
688
  for (r=r->next; rte_is_valid(r); r=r->next)
689
    {
690
      if (old_best = export_filter(c, r, &old_free, NULL, 1))
691
        goto found;
692

    
693
      if (r == before_old)
694
        if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
695
          goto found;
696
    }
697

    
698
  /* Implicitly, old_best is NULL and new_best is non-NULL */
699

    
700
 found:
701
  do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
702

    
703
  /* Discard temporary rte's */
704
  if (new_free)
705
    rte_free(new_free);
706
  if (old_free)
707
    rte_free(old_free);
708
}
709

    
710

    
711
static struct nexthop *
712
nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
713
{
714
  return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
715
}
716

    
717
rte *
718
rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
719
{
720
  // struct proto *p = c->proto;
721
  struct nexthop *nhs = NULL;
722
  rte *best0, *best, *rt0, *rt, *tmp;
723

    
724
  best0 = net->routes;
725
  *rt_free = NULL;
726

    
727
  if (!rte_is_valid(best0))
728
    return NULL;
729

    
730
  best = export_filter_(c, best0, rt_free, tmpa, pool, silent);
731

    
732
  if (!best || !rte_is_reachable(best))
733
    return best;
734

    
735
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
736
  {
737
    if (!rte_mergable(best0, rt0))
738
      continue;
739

    
740
    rt = export_filter_(c, rt0, &tmp, NULL, pool, 1);
741

    
742
    if (!rt)
743
      continue;
744

    
745
    if (rte_is_reachable(rt))
746
      nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
747

    
748
    if (tmp)
749
      rte_free(tmp);
750
  }
751

    
752
  if (nhs)
753
  {
754
    nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
755

    
756
    if (nhs->next)
757
    {
758
      best = rte_cow_rta(best, pool);
759
      nexthop_link(best->attrs, nhs);
760
    }
761
  }
762

    
763
  if (best != best0)
764
    *rt_free = best;
765

    
766
  return best;
767
}
768

    
769

    
770
static void
771
rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
772
                 rte *new_best, rte*old_best, int refeed)
773
{
774
  // struct proto *p = c->proto;
775

    
776
  rte *new_best_free = NULL;
777
  rte *old_best_free = NULL;
778
  rte *new_changed_free = NULL;
779
  rte *old_changed_free = NULL;
780
  ea_list *tmpa = NULL;
781

    
782
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
783

    
784
  /* This check should be done by the caller */
785
  if (!new_best && !old_best)
786
    return;
787

    
788
  /* Check whether the change is relevant to the merged route */
789
  if ((new_best == old_best) && !refeed)
790
  {
791
    new_changed = rte_mergable(new_best, new_changed) ?
792
      export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
793

    
794
    old_changed = rte_mergable(old_best, old_changed) ?
795
      export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
796

    
797
    if (!new_changed && !old_changed)
798
      return;
799
  }
800

    
801
  if (new_best)
802
    c->stats.exp_updates_received++;
803
  else
804
    c->stats.exp_withdraws_received++;
805

    
806
  /* Prepare new merged route */
807
  if (new_best)
808
    new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0);
809

    
810
  /* Prepare old merged route (without proper merged next hops) */
811
  /* There are some issues with running filter on old route - see rt_notify_basic() */
812
  if (old_best && !refeed)
813
    old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
814

    
815
  if (new_best || old_best)
816
    do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
817

    
818
  /* Discard temporary rte's */
819
  if (new_best_free)
820
    rte_free(new_best_free);
821
  if (old_best_free)
822
    rte_free(old_best_free);
823
  if (new_changed_free)
824
    rte_free(new_changed_free);
825
  if (old_changed_free)
826
    rte_free(old_changed_free);
827
}
828

    
829

    
830
/**
831
 * rte_announce - announce a routing table change
832
 * @tab: table the route has been added to
833
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
834
 * @net: network in question
835
 * @new: the new route to be announced
836
 * @old: the previous route for the same network
837
 * @new_best: the new best route for the same network
838
 * @old_best: the previous best route for the same network
839
 * @before_old: The previous route before @old for the same network.
840
 *                 If @before_old is NULL @old was the first.
841
 *
842
 * This function gets a routing table update and announces it
843
 * to all protocols that acccepts given type of route announcement
844
 * and are connected to the same table by their announcement hooks.
845
 *
846
 * Route announcement of type %RA_OPTIMAL si generated when optimal
847
 * route (in routing table @tab) changes. In that case @old stores the
848
 * old optimal route.
849
 *
850
 * Route announcement of type %RA_ANY si generated when any route (in
851
 * routing table @tab) changes In that case @old stores the old route
852
 * from the same protocol.
853
 *
854
 * For each appropriate protocol, we first call its import_control()
855
 * hook which performs basic checks on the route (each protocol has a
856
 * right to veto or force accept of the route before any filter is
857
 * asked) and adds default values of attributes specific to the new
858
 * protocol (metrics, tags etc.).  Then it consults the protocol's
859
 * export filter and if it accepts the route, the rt_notify() hook of
860
 * the protocol gets called.
861
 */
862
static void
863
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
864
             rte *new_best, rte *old_best, rte *before_old)
865
{
866
  if (!rte_is_valid(new))
867
    new = NULL;
868

    
869
  if (!rte_is_valid(old))
870
    old = before_old = NULL;
871

    
872
  if (!rte_is_valid(new_best))
873
    new_best = NULL;
874

    
875
  if (!rte_is_valid(old_best))
876
    old_best = NULL;
877

    
878
  if (!old && !new)
879
    return;
880

    
881
  if ((type == RA_OPTIMAL) && tab->hostcache)
882
    rt_notify_hostcache(tab, net);
883

    
884
  struct channel *c; node *n;
885
  WALK_LIST2(c, n, tab->channels, table_node)
886
    {
887
      if (c->export_state == ES_DOWN)
888
        continue;
889

    
890
      if (c->ra_mode == type)
891
        if (type == RA_ACCEPTED)
892
          rt_notify_accepted(c, net, new, old, before_old, 0);
893
        else if (type == RA_MERGED)
894
          rt_notify_merged(c, net, new, old, new_best, old_best, 0);
895
        else
896
          rt_notify_basic(c, net, new, old, 0);
897
    }
898
}
899

    
900
static inline int
901
rte_validate(rte *e)
902
{
903
  int c;
904
  net *n = e->net;
905

    
906
  // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
907
  if (!net_validate(n->n.addr))
908
  {
909
    log(L_WARN "Ignoring bogus prefix %N received via %s",
910
        n->n.addr, e->sender->proto->name);
911
    return 0;
912
  }
913

    
914
  c = net_classify(n->n.addr);
915
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
916
  {
917
    log(L_WARN "Ignoring bogus route %N received via %s",
918
        n->n.addr, e->sender->proto->name);
919
    return 0;
920
  }
921

    
922
  if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
923
    {
924
      log(L_WARN "Ignoring unsorted multipath route %N received via %s",
925
          n->n.addr, e->sender->proto->name);
926
      return 0;
927
    }
928

    
929
  return 1;
930
}
931

    
932
/**
933
 * rte_free - delete a &rte
934
 * @e: &rte to be deleted
935
 *
936
 * rte_free() deletes the given &rte from the routing table it's linked to.
937
 */
938
void
939
rte_free(rte *e)
940
{
941
  if (rta_is_cached(e->attrs))
942
    rta_free(e->attrs);
943
  sl_free(rte_slab, e);
944
}
945

    
946
static inline void
947
rte_free_quick(rte *e)
948
{
949
  rta_free(e->attrs);
950
  sl_free(rte_slab, e);
951
}
952

    
953
static int
954
rte_same(rte *x, rte *y)
955
{
956
  return
957
    x->attrs == y->attrs &&
958
    x->flags == y->flags &&
959
    x->pflags == y->pflags &&
960
    x->pref == y->pref &&
961
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
962
}
963

    
964
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
965

    
966
static void
967
rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
968
{
969
  struct proto *p = c->proto;
970
  struct rtable *table = c->table;
971
  struct proto_stats *stats = &c->stats;
972
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
973
  rte *before_old = NULL;
974
  rte *old_best = net->routes;
975
  rte *old = NULL;
976
  rte **k;
977

    
978
  k = &net->routes;                        /* Find and remove original route from the same protocol */
979
  while (old = *k)
980
    {
981
      if (old->attrs->src == src)
982
        {
983
          /* If there is the same route in the routing table but from
984
           * a different sender, then there are two paths from the
985
           * source protocol to this routing table through transparent
986
           * pipes, which is not allowed.
987
           *
988
           * We log that and ignore the route. If it is withdraw, we
989
           * ignore it completely (there might be 'spurious withdraws',
990
           * see FIXME in do_rte_announce())
991
           */
992
          if (old->sender->proto != p)
993
            {
994
              if (new)
995
                {
996
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
997
                      net->n.addr, table->name);
998
                  rte_free_quick(new);
999
                }
1000
              return;
1001
            }
1002

    
1003
          if (new && rte_same(old, new))
1004
            {
1005
              /* No changes, ignore the new route */
1006

    
1007
              if (!rte_is_filtered(new))
1008
                {
1009
                  stats->imp_updates_ignored++;
1010
                  rte_trace_in(D_ROUTES, p, new, "ignored");
1011
                }
1012

    
1013
              rte_free_quick(new);
1014
              return;
1015
            }
1016
          *k = old->next;
1017
          break;
1018
        }
1019
      k = &old->next;
1020
      before_old = old;
1021
    }
1022

    
1023
  if (!old)
1024
    before_old = NULL;
1025

    
1026
  if (!old && !new)
1027
    {
1028
      stats->imp_withdraws_ignored++;
1029
      return;
1030
    }
1031

    
1032
  int new_ok = rte_is_ok(new);
1033
  int old_ok = rte_is_ok(old);
1034

    
1035
  struct channel_limit *l = &c->rx_limit;
1036
  if (l->action && !old && new)
1037
    {
1038
      u32 all_routes = stats->imp_routes + stats->filt_routes;
1039

    
1040
      if (all_routes >= l->limit)
1041
        channel_notify_limit(c, l, PLD_RX, all_routes);
1042

    
1043
      if (l->state == PLS_BLOCKED)
1044
        {
1045
          /* In receive limit the situation is simple, old is NULL so
1046
             we just free new and exit like nothing happened */
1047

    
1048
          stats->imp_updates_ignored++;
1049
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1050
          rte_free_quick(new);
1051
          return;
1052
        }
1053
    }
1054

    
1055
  l = &c->in_limit;
1056
  if (l->action && !old_ok && new_ok)
1057
    {
1058
      if (stats->imp_routes >= l->limit)
1059
        channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1060

    
1061
      if (l->state == PLS_BLOCKED)
1062
        {
1063
          /* In import limit the situation is more complicated. We
1064
             shouldn't just drop the route, we should handle it like
1065
             it was filtered. We also have to continue the route
1066
             processing if old or new is non-NULL, but we should exit
1067
             if both are NULL as this case is probably assumed to be
1068
             already handled. */
1069

    
1070
          stats->imp_updates_ignored++;
1071
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1072

    
1073
          if (c->in_keep_filtered)
1074
            new->flags |= REF_FILTERED;
1075
          else
1076
            { rte_free_quick(new); new = NULL; }
1077

    
1078
          /* Note that old && !new could be possible when
1079
             c->in_keep_filtered changed in the recent past. */
1080

    
1081
          if (!old && !new)
1082
            return;
1083

    
1084
          new_ok = 0;
1085
          goto skip_stats1;
1086
        }
1087
    }
1088

    
1089
  if (new_ok)
1090
    stats->imp_updates_accepted++;
1091
  else if (old_ok)
1092
    stats->imp_withdraws_accepted++;
1093
  else
1094
    stats->imp_withdraws_ignored++;
1095

    
1096
 skip_stats1:
1097

    
1098
  if (new)
1099
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1100
  if (old)
1101
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1102

    
1103
  if (table->config->sorted)
1104
    {
1105
      /* If routes are sorted, just insert new route to appropriate position */
1106
      if (new)
1107
        {
1108
          if (before_old && !rte_better(new, before_old))
1109
            k = &before_old->next;
1110
          else
1111
            k = &net->routes;
1112

    
1113
          for (; *k; k=&(*k)->next)
1114
            if (rte_better(new, *k))
1115
              break;
1116

    
1117
          new->next = *k;
1118
          *k = new;
1119
        }
1120
    }
1121
  else
1122
    {
1123
      /* If routes are not sorted, find the best route and move it on
1124
         the first position. There are several optimized cases. */
1125

    
1126
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1127
        goto do_recalculate;
1128

    
1129
      if (new && rte_better(new, old_best))
1130
        {
1131
          /* The first case - the new route is cleary optimal,
1132
             we link it at the first position */
1133

    
1134
          new->next = net->routes;
1135
          net->routes = new;
1136
        }
1137
      else if (old == old_best)
1138
        {
1139
          /* The second case - the old best route disappeared, we add the
1140
             new route (if we have any) to the list (we don't care about
1141
             position) and then we elect the new optimal route and relink
1142
             that route at the first position and announce it. New optimal
1143
             route might be NULL if there is no more routes */
1144

    
1145
        do_recalculate:
1146
          /* Add the new route to the list */
1147
          if (new)
1148
            {
1149
              new->next = net->routes;
1150
              net->routes = new;
1151
            }
1152

    
1153
          /* Find a new optimal route (if there is any) */
1154
          if (net->routes)
1155
            {
1156
              rte **bp = &net->routes;
1157
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1158
                if (rte_better(*k, *bp))
1159
                  bp = k;
1160

    
1161
              /* And relink it */
1162
              rte *best = *bp;
1163
              *bp = best->next;
1164
              best->next = net->routes;
1165
              net->routes = best;
1166
            }
1167
        }
1168
      else if (new)
1169
        {
1170
          /* The third case - the new route is not better than the old
1171
             best route (therefore old_best != NULL) and the old best
1172
             route was not removed (therefore old_best == net->routes).
1173
             We just link the new route after the old best route. */
1174

    
1175
          ASSERT(net->routes != NULL);
1176
          new->next = net->routes->next;
1177
          net->routes->next = new;
1178
        }
1179
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1180
    }
1181

    
1182
  if (new)
1183
    new->lastmod = now;
1184

    
1185
  /* Log the route change */
1186
  if (p->debug & D_ROUTES)
1187
    {
1188
      if (new_ok)
1189
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1190
      else if (old_ok)
1191
        {
1192
          if (old != old_best)
1193
            rte_trace(p, old, '>', "removed");
1194
          else if (rte_is_ok(net->routes))
1195
            rte_trace(p, old, '>', "removed [replaced]");
1196
          else
1197
            rte_trace(p, old, '>', "removed [sole]");
1198
        }
1199
    }
1200

    
1201
  /* Propagate the route change */
1202
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1203
  if (net->routes != old_best)
1204
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1205
  if (table->config->sorted)
1206
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1207
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1208

    
1209
  if (!net->routes &&
1210
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1211
      (table->gc_time + table->config->gc_min_time <= now))
1212
    rt_schedule_prune(table);
1213

    
1214
  if (old_ok && p->rte_remove)
1215
    p->rte_remove(net, old);
1216
  if (new_ok && p->rte_insert)
1217
    p->rte_insert(net, new);
1218

    
1219
  if (old)
1220
    rte_free_quick(old);
1221
}
1222

    
1223
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1224

    
1225
static inline void
1226
rte_update_lock(void)
1227
{
1228
  rte_update_nest_cnt++;
1229
}
1230

    
1231
static inline void
1232
rte_update_unlock(void)
1233
{
1234
  if (!--rte_update_nest_cnt)
1235
    lp_flush(rte_update_pool);
1236
}
1237

    
1238
static inline void
1239
rte_hide_dummy_routes(net *net, rte **dummy)
1240
{
1241
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1242
  {
1243
    *dummy = net->routes;
1244
    net->routes = (*dummy)->next;
1245
  }
1246
}
1247

    
1248
static inline void
1249
rte_unhide_dummy_routes(net *net, rte **dummy)
1250
{
1251
  if (*dummy)
1252
  {
1253
    (*dummy)->next = net->routes;
1254
    net->routes = *dummy;
1255
  }
1256
}
1257

    
1258
/**
1259
 * rte_update - enter a new update to a routing table
1260
 * @table: table to be updated
1261
 * @c: channel doing the update
1262
 * @net: network node
1263
 * @p: protocol submitting the update
1264
 * @src: protocol originating the update
1265
 * @new: a &rte representing the new route or %NULL for route removal.
1266
 *
1267
 * This function is called by the routing protocols whenever they discover
1268
 * a new route or wish to update/remove an existing route. The right announcement
1269
 * sequence is to build route attributes first (either un-cached with @aflags set
1270
 * to zero or a cached one using rta_lookup(); in this case please note that
1271
 * you need to increase the use count of the attributes yourself by calling
1272
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1273
 * the appropriate data and finally submit the new &rte by calling rte_update().
1274
 *
1275
 * @src specifies the protocol that originally created the route and the meaning
1276
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1277
 * same value as @new->attrs->proto. @p specifies the protocol that called
1278
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1279
 * stores @p in @new->sender;
1280
 *
1281
 * When rte_update() gets any route, it automatically validates it (checks,
1282
 * whether the network and next hop address are valid IP addresses and also
1283
 * whether a normal routing protocol doesn't try to smuggle a host or link
1284
 * scope route to the table), converts all protocol dependent attributes stored
1285
 * in the &rte to temporary extended attributes, consults import filters of the
1286
 * protocol to see if the route should be accepted and/or its attributes modified,
1287
 * stores the temporary attributes back to the &rte.
1288
 *
1289
 * Now, having a "public" version of the route, we
1290
 * automatically find any old route defined by the protocol @src
1291
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1292
 * recalculate the optimal route for this destination and finally broadcast
1293
 * the change (if any) to all routing protocols by calling rte_announce().
1294
 *
1295
 * All memory used for attribute lists and other temporary allocations is taken
1296
 * from a special linear pool @rte_update_pool and freed when rte_update()
1297
 * finishes.
1298
 */
1299

    
1300
void
1301
rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1302
{
1303
  struct proto *p = c->proto;
1304
  struct proto_stats *stats = &c->stats;
1305
  struct filter *filter = c->in_filter;
1306
  ea_list *tmpa = NULL;
1307
  rte *dummy = NULL;
1308
  net *nn;
1309

    
1310
  ASSERT(c->channel_state == CS_UP);
1311

    
1312
  rte_update_lock();
1313
  if (new)
1314
    {
1315
      nn = net_get(c->table, n);
1316

    
1317
      new->net = nn;
1318
      new->sender = c;
1319

    
1320
      if (!new->pref)
1321
        new->pref = c->preference;
1322

    
1323
      stats->imp_updates_received++;
1324
      if (!rte_validate(new))
1325
        {
1326
          rte_trace_in(D_FILTERS, p, new, "invalid");
1327
          stats->imp_updates_invalid++;
1328
          goto drop;
1329
        }
1330

    
1331
      if (filter == FILTER_REJECT)
1332
        {
1333
          stats->imp_updates_filtered++;
1334
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1335

    
1336
          if (! c->in_keep_filtered)
1337
            goto drop;
1338

    
1339
          /* new is a private copy, i could modify it */
1340
          new->flags |= REF_FILTERED;
1341
        }
1342
      else
1343
        {
1344
          tmpa = make_tmp_attrs(new, rte_update_pool);
1345
          if (filter && (filter != FILTER_REJECT))
1346
            {
1347
              ea_list *old_tmpa = tmpa;
1348
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1349
              if (fr > F_ACCEPT)
1350
                {
1351
                  stats->imp_updates_filtered++;
1352
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1353

    
1354
                  if (! c->in_keep_filtered)
1355
                    goto drop;
1356

    
1357
                  new->flags |= REF_FILTERED;
1358
                }
1359
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1360
                src->proto->store_tmp_attrs(new, tmpa);
1361
            }
1362
        }
1363
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1364
        new->attrs = rta_lookup(new->attrs);
1365
      new->flags |= REF_COW;
1366
    }
1367
  else
1368
    {
1369
      stats->imp_withdraws_received++;
1370

    
1371
      if (!(nn = net_find(c->table, n)) || !src)
1372
        {
1373
          stats->imp_withdraws_ignored++;
1374
          rte_update_unlock();
1375
          return;
1376
        }
1377
    }
1378

    
1379
 recalc:
1380
  rte_hide_dummy_routes(nn, &dummy);
1381
  rte_recalculate(c, nn, new, src);
1382
  rte_unhide_dummy_routes(nn, &dummy);
1383
  rte_update_unlock();
1384
  return;
1385

    
1386
 drop:
1387
  rte_free(new);
1388
  new = NULL;
1389
  goto recalc;
1390
}
1391

    
1392
/* Independent call to rte_announce(), used from next hop
1393
   recalculation, outside of rte_update(). new must be non-NULL */
1394
static inline void 
1395
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1396
               rte *new_best, rte *old_best)
1397
{
1398
  rte_update_lock();
1399
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1400
  rte_update_unlock();
1401
}
1402

    
1403
static inline void
1404
rte_discard(rte *old)        /* Non-filtered route deletion, used during garbage collection */
1405
{
1406
  rte_update_lock();
1407
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1408
  rte_update_unlock();
1409
}
1410

    
1411
/* Check rtable for best route to given net whether it would be exported do p */
1412
int
1413
rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1414
{
1415
  net *n = net_find(t, a);
1416
  rte *rt = n ? n->routes : NULL;
1417

    
1418
  if (!rte_is_valid(rt))
1419
    return 0;
1420

    
1421
  rte_update_lock();
1422

    
1423
  /* Rest is stripped down export_filter() */
1424
  ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1425
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1426
  if (v == RIC_PROCESS)
1427
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1428

    
1429
   /* Discard temporary rte */
1430
  if (rt != n->routes)
1431
    rte_free(rt);
1432

    
1433
  rte_update_unlock();
1434

    
1435
  return v > 0;
1436
}
1437

    
1438

    
1439
/**
1440
 * rt_refresh_begin - start a refresh cycle
1441
 * @t: related routing table
1442
 * @c related channel
1443
 *
1444
 * This function starts a refresh cycle for given routing table and announce
1445
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1446
 * routes to the routing table (by rte_update()). After that, all protocol
1447
 * routes (more precisely routes with @c as @sender) not sent during the
1448
 * refresh cycle but still in the table from the past are pruned. This is
1449
 * implemented by marking all related routes as stale by REF_STALE flag in
1450
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1451
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1452
 */
1453
void
1454
rt_refresh_begin(rtable *t, struct channel *c)
1455
{
1456
  FIB_WALK(&t->fib, net, n)
1457
    {
1458
      rte *e;
1459
      for (e = n->routes; e; e = e->next)
1460
        if (e->sender == c)
1461
          e->flags |= REF_STALE;
1462
    }
1463
  FIB_WALK_END;
1464
}
1465

    
1466
/**
1467
 * rt_refresh_end - end a refresh cycle
1468
 * @t: related routing table
1469
 * @c: related channel
1470
 *
1471
 * This function ends a refresh cycle for given routing table and announce
1472
 * hook. See rt_refresh_begin() for description of refresh cycles.
1473
 */
1474
void
1475
rt_refresh_end(rtable *t, struct channel *c)
1476
{
1477
  int prune = 0;
1478

    
1479
  FIB_WALK(&t->fib, net, n)
1480
    {
1481
      rte *e;
1482
      for (e = n->routes; e; e = e->next)
1483
        if ((e->sender == c) && (e->flags & REF_STALE))
1484
          {
1485
            e->flags |= REF_DISCARD;
1486
            prune = 1;
1487
          }
1488
    }
1489
  FIB_WALK_END;
1490

    
1491
  if (prune)
1492
    rt_schedule_prune(t);
1493
}
1494

    
1495

    
1496
/**
1497
 * rte_dump - dump a route
1498
 * @e: &rte to be dumped
1499
 *
1500
 * This functions dumps contents of a &rte to debug output.
1501
 */
1502
void
1503
rte_dump(rte *e)
1504
{
1505
  net *n = e->net;
1506
  debug("%-1N ", n->n.addr);
1507
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1508
  rta_dump(e->attrs);
1509
  if (e->attrs->src->proto->proto->dump_attrs)
1510
    e->attrs->src->proto->proto->dump_attrs(e);
1511
  debug("\n");
1512
}
1513

    
1514
/**
1515
 * rt_dump - dump a routing table
1516
 * @t: routing table to be dumped
1517
 *
1518
 * This function dumps contents of a given routing table to debug output.
1519
 */
1520
void
1521
rt_dump(rtable *t)
1522
{
1523
  debug("Dump of routing table <%s>\n", t->name);
1524
#ifdef DEBUGGING
1525
  fib_check(&t->fib);
1526
#endif
1527
  FIB_WALK(&t->fib, net, n)
1528
    {
1529
      rte *e;
1530
      for(e=n->routes; e; e=e->next)
1531
        rte_dump(e);
1532
    }
1533
  FIB_WALK_END;
1534
  debug("\n");
1535
}
1536

    
1537
/**
1538
 * rt_dump_all - dump all routing tables
1539
 *
1540
 * This function dumps contents of all routing tables to debug output.
1541
 */
1542
void
1543
rt_dump_all(void)
1544
{
1545
  rtable *t;
1546

    
1547
  WALK_LIST(t, routing_tables)
1548
    rt_dump(t);
1549
}
1550

    
1551
static inline void
1552
rt_schedule_hcu(rtable *tab)
1553
{
1554
  if (tab->hcu_scheduled)
1555
    return;
1556

    
1557
  tab->hcu_scheduled = 1;
1558
  ev_schedule(tab->rt_event);
1559
}
1560

    
1561
static inline void
1562
rt_schedule_nhu(rtable *tab)
1563
{
1564
  if (tab->nhu_state == 0)
1565
    ev_schedule(tab->rt_event);
1566

    
1567
  /* state change 0->1, 2->3 */
1568
  tab->nhu_state |= 1;
1569
}
1570

    
1571
void
1572
rt_schedule_prune(rtable *tab)
1573
{
1574
  if (tab->prune_state == 0)
1575
    ev_schedule(tab->rt_event);
1576

    
1577
  /* state change 0->1, 2->3 */
1578
  tab->prune_state |= 1;
1579
}
1580

    
1581

    
1582
static void
1583
rt_event(void *ptr)
1584
{
1585
  rtable *tab = ptr;
1586

    
1587
  rt_lock_table(tab);
1588

    
1589
  if (tab->hcu_scheduled)
1590
    rt_update_hostcache(tab);
1591

    
1592
  if (tab->nhu_state)
1593
    rt_next_hop_update(tab);
1594

    
1595
  if (tab->prune_state)
1596
    rt_prune_table(tab);
1597

    
1598
  rt_unlock_table(tab);
1599
}
1600

    
1601
void
1602
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1603
{
1604
  bzero(t, sizeof(*t));
1605
  t->name = name;
1606
  t->config = cf;
1607
  t->addr_type = cf ? cf->addr_type : NET_IP4;
1608
  fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1609
  init_list(&t->channels);
1610

    
1611
  if (cf)
1612
    {
1613
      t->rt_event = ev_new(p);
1614
      t->rt_event->hook = rt_event;
1615
      t->rt_event->data = t;
1616
      t->gc_time = now;
1617
    }
1618
}
1619

    
1620
/**
1621
 * rt_init - initialize routing tables
1622
 *
1623
 * This function is called during BIRD startup. It initializes the
1624
 * routing table module.
1625
 */
1626
void
1627
rt_init(void)
1628
{
1629
  rta_init();
1630
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1631
  rte_update_pool = lp_new(rt_table_pool, 4080);
1632
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1633
  init_list(&routing_tables);
1634
}
1635

    
1636

    
1637
/**
1638
 * rt_prune_table - prune a routing table
1639
 *
1640
 * The prune loop scans routing tables and removes routes belonging to flushing
1641
 * protocols, discarded routes and also stale network entries. It is called from
1642
 * rt_event(). The event is rescheduled if the current iteration do not finish
1643
 * the table. The pruning is directed by the prune state (@prune_state),
1644
 * specifying whether the prune cycle is scheduled or running, and there
1645
 * is also a persistent pruning iterator (@prune_fit).
1646
 *
1647
 * The prune loop is used also for channel flushing. For this purpose, the
1648
 * channels to flush are marked before the iteration and notified after the
1649
 * iteration.
1650
 */
1651
static void
1652
rt_prune_table(rtable *tab)
1653
{
1654
  struct fib_iterator *fit = &tab->prune_fit;
1655
  int limit = 512;
1656

    
1657
  struct channel *c;
1658
  node *n, *x;
1659

    
1660
  DBG("Pruning route table %s\n", tab->name);
1661
#ifdef DEBUGGING
1662
  fib_check(&tab->fib);
1663
#endif
1664

    
1665
  if (tab->prune_state == 0)
1666
    return;
1667

    
1668
  if (tab->prune_state == 1)
1669
  {
1670
    /* Mark channels to flush */
1671
    WALK_LIST2(c, n, tab->channels, table_node)
1672
      if (c->channel_state == CS_FLUSHING)
1673
        c->flush_active = 1;
1674

    
1675
    FIB_ITERATE_INIT(fit, &tab->fib);
1676
    tab->prune_state = 2;
1677
  }
1678

    
1679
again:
1680
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1681
    {
1682
      rte *e;
1683

    
1684
    rescan:
1685
      for (e=n->routes; e; e=e->next)
1686
        if (e->sender->flush_active || (e->flags & REF_DISCARD))
1687
          {
1688
            if (limit <= 0)
1689
              {
1690
                FIB_ITERATE_PUT(fit);
1691
                ev_schedule(tab->rt_event);
1692
                return;
1693
              }
1694

    
1695
            rte_discard(e);
1696
            limit--;
1697

    
1698
            goto rescan;
1699
          }
1700

    
1701
      if (!n->routes)                /* Orphaned FIB entry */
1702
        {
1703
          FIB_ITERATE_PUT(fit);
1704
          fib_delete(&tab->fib, n);
1705
          goto again;
1706
        }
1707
    }
1708
  FIB_ITERATE_END;
1709

    
1710
#ifdef DEBUGGING
1711
  fib_check(&tab->fib);
1712
#endif
1713

    
1714
  tab->gc_counter = 0;
1715
  tab->gc_time = now;
1716

    
1717
  /* state change 2->0, 3->1 */
1718
  tab->prune_state &= 1;
1719

    
1720
  if (tab->prune_state > 0)
1721
    ev_schedule(tab->rt_event);
1722

    
1723
  /* FIXME: This should be handled in a better way */
1724
  rt_prune_sources();
1725

    
1726
  /* Close flushed channels */
1727
  WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1728
    if (c->flush_active)
1729
      {
1730
        c->flush_active = 0;
1731
        channel_set_state(c, CS_DOWN);
1732
      }
1733

    
1734
  return;
1735
}
1736

    
1737
void
1738
rt_preconfig(struct config *c)
1739
{
1740
  init_list(&c->tables);
1741

    
1742
  rt_new_table(cf_get_symbol("master4"), NET_IP4);
1743
  rt_new_table(cf_get_symbol("master6"), NET_IP6);
1744
}
1745

    
1746

    
1747
/*
1748
 * Some functions for handing internal next hop updates
1749
 * triggered by rt_schedule_nhu().
1750
 */
1751

    
1752
static inline int
1753
rta_next_hop_outdated(rta *a)
1754
{
1755
  struct hostentry *he = a->hostentry;
1756

    
1757
  if (!he)
1758
    return 0;
1759

    
1760
  if (!he->src)
1761
    return a->dest != RTD_UNREACHABLE;
1762

    
1763
  return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1764
    !nexthop_same(&(a->nh), he->nh);
1765
}
1766

    
1767
static inline void
1768
rta_apply_hostentry(rta *a, struct hostentry *he)
1769
{
1770
  a->hostentry = he;
1771
  
1772
  a->dest = he->dest;
1773
  a->igp_metric = he->igp_metric;
1774

    
1775
  if (a->nh.labels_append == 0)
1776
  {
1777
    a->nh = *(he->nh);
1778
    a->nh.labels_append = 0;
1779
    return;
1780
  }
1781

    
1782
  int labels_append = a->nh.labels_append;
1783
  u32 label_stack[MPLS_MAX_LABEL_STACK];
1784
  memcpy(label_stack, a->nh.label, labels_append * sizeof(u32));
1785

    
1786
  struct nexthop *nhp = NULL;
1787
  for (struct nexthop *nh = he->nh; nh; nh = nh->next)
1788
  {
1789
    nhp = nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh);
1790
    nhp->gw = ipa_nonzero(nh->gw) ? nh->gw : he->link;
1791
    nhp->iface = nh->iface; /* FIXME: This is at least strange, if not utter nonsense. */
1792
    nhp->weight = nh->weight;
1793
    nhp->labels = nh->labels + labels_append;
1794
    nhp->labels_append = labels_append;
1795
    if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1796
    {
1797
      memcpy(nhp->label, nh->label, nh->labels * sizeof(u32));
1798
      memcpy(&(nhp->label[nh->labels]), label_stack, labels_append * sizeof(u32));
1799
    }
1800
    else
1801
    {
1802
      log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1803
          nh->labels, labels_append, nhp->labels, MPLS_MAX_LABEL_STACK);
1804
      a->dest = RTD_UNREACHABLE;
1805
      break;
1806
    }
1807
  }
1808
}
1809

    
1810
static inline rte *
1811
rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1812
{
1813
  rta *ap = alloca(RTA_MAX_SIZE);
1814
  memcpy(ap, old->attrs, rta_size(old->attrs));
1815
  rta_apply_hostentry(ap, old->attrs->hostentry);
1816
  ap->aflags = 0;
1817

    
1818
  rte *e = sl_alloc(rte_slab);
1819
  memcpy(e, old, sizeof(rte));
1820
  e->attrs = rta_lookup(ap);
1821

    
1822
  return e;
1823
}
1824

    
1825
static inline int
1826
rt_next_hop_update_net(rtable *tab, net *n)
1827
{
1828
  rte **k, *e, *new, *old_best, **new_best;
1829
  int count = 0;
1830
  int free_old_best = 0;
1831

    
1832
  old_best = n->routes;
1833
  if (!old_best)
1834
    return 0;
1835

    
1836
  for (k = &n->routes; e = *k; k = &e->next)
1837
    if (rta_next_hop_outdated(e->attrs))
1838
      {
1839
        new = rt_next_hop_update_rte(tab, e);
1840
        *k = new;
1841

    
1842
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1843
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1844

    
1845
        /* Call a pre-comparison hook */
1846
        /* Not really an efficient way to compute this */
1847
        if (e->attrs->src->proto->rte_recalculate)
1848
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1849

    
1850
        if (e != old_best)
1851
          rte_free_quick(e);
1852
        else /* Freeing of the old best rte is postponed */
1853
          free_old_best = 1;
1854

    
1855
        e = new;
1856
        count++;
1857
      }
1858

    
1859
  if (!count)
1860
    return 0;
1861

    
1862
  /* Find the new best route */
1863
  new_best = NULL;
1864
  for (k = &n->routes; e = *k; k = &e->next)
1865
    {
1866
      if (!new_best || rte_better(e, *new_best))
1867
        new_best = k;
1868
    }
1869

    
1870
  /* Relink the new best route to the first position */
1871
  new = *new_best;
1872
  if (new != n->routes)
1873
    {
1874
      *new_best = new->next;
1875
      new->next = n->routes;
1876
      n->routes = new;
1877
    }
1878

    
1879
  /* Announce the new best route */
1880
  if (new != old_best)
1881
    {
1882
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1883
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1884
    }
1885

    
1886
  /* FIXME: Better announcement of merged routes */
1887
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1888

    
1889
  if (free_old_best)
1890
    rte_free_quick(old_best);
1891

    
1892
  return count;
1893
}
1894

    
1895
static void
1896
rt_next_hop_update(rtable *tab)
1897
{
1898
  struct fib_iterator *fit = &tab->nhu_fit;
1899
  int max_feed = 32;
1900

    
1901
  if (tab->nhu_state == 0)
1902
    return;
1903

    
1904
  if (tab->nhu_state == 1)
1905
    {
1906
      FIB_ITERATE_INIT(fit, &tab->fib);
1907
      tab->nhu_state = 2;
1908
    }
1909

    
1910
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1911
    {
1912
      if (max_feed <= 0)
1913
        {
1914
          FIB_ITERATE_PUT(fit);
1915
          ev_schedule(tab->rt_event);
1916
          return;
1917
        }
1918
      max_feed -= rt_next_hop_update_net(tab, n);
1919
    }
1920
  FIB_ITERATE_END;
1921

    
1922
  /* state change 2->0, 3->1 */
1923
  tab->nhu_state &= 1;
1924

    
1925
  if (tab->nhu_state > 0)
1926
    ev_schedule(tab->rt_event);
1927
}
1928

    
1929

    
1930
struct rtable_config *
1931
rt_new_table(struct symbol *s, uint addr_type)
1932
{
1933
  /* Hack that allows to 'redefine' the master table */
1934
  if ((s->class == SYM_TABLE) &&
1935
      (s->def == new_config->def_tables[addr_type]) &&
1936
      ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1937
    return s->def;
1938

    
1939
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1940

    
1941
  cf_define_symbol(s, SYM_TABLE, c);
1942
  c->name = s->name;
1943
  c->addr_type = addr_type;
1944
  c->gc_max_ops = 1000;
1945
  c->gc_min_time = 5;
1946

    
1947
  add_tail(&new_config->tables, &c->n);
1948

    
1949
  /* First table of each type is kept as default */
1950
  if (! new_config->def_tables[addr_type])
1951
    new_config->def_tables[addr_type] = c;
1952

    
1953
  return c;
1954
}
1955

    
1956
/**
1957
 * rt_lock_table - lock a routing table
1958
 * @r: routing table to be locked
1959
 *
1960
 * Lock a routing table, because it's in use by a protocol,
1961
 * preventing it from being freed when it gets undefined in a new
1962
 * configuration.
1963
 */
1964
void
1965
rt_lock_table(rtable *r)
1966
{
1967
  r->use_count++;
1968
}
1969

    
1970
/**
1971
 * rt_unlock_table - unlock a routing table
1972
 * @r: routing table to be unlocked
1973
 *
1974
 * Unlock a routing table formerly locked by rt_lock_table(),
1975
 * that is decrease its use count and delete it if it's scheduled
1976
 * for deletion by configuration changes.
1977
 */
1978
void
1979
rt_unlock_table(rtable *r)
1980
{
1981
  if (!--r->use_count && r->deleted)
1982
    {
1983
      struct config *conf = r->deleted;
1984
      DBG("Deleting routing table %s\n", r->name);
1985
      r->config->table = NULL;
1986
      if (r->hostcache)
1987
        rt_free_hostcache(r);
1988
      rem_node(&r->n);
1989
      fib_free(&r->fib);
1990
      rfree(r->rt_event);
1991
      mb_free(r);
1992
      config_del_obstacle(conf);
1993
    }
1994
}
1995

    
1996
/**
1997
 * rt_commit - commit new routing table configuration
1998
 * @new: new configuration
1999
 * @old: original configuration or %NULL if it's boot time config
2000
 *
2001
 * Scan differences between @old and @new configuration and modify
2002
 * the routing tables according to these changes. If @new defines a
2003
 * previously unknown table, create it, if it omits a table existing
2004
 * in @old, schedule it for deletion (it gets deleted when all protocols
2005
 * disconnect from it by calling rt_unlock_table()), if it exists
2006
 * in both configurations, leave it unchanged.
2007
 */
2008
void
2009
rt_commit(struct config *new, struct config *old)
2010
{
2011
  struct rtable_config *o, *r;
2012

    
2013
  DBG("rt_commit:\n");
2014
  if (old)
2015
    {
2016
      WALK_LIST(o, old->tables)
2017
        {
2018
          rtable *ot = o->table;
2019
          if (!ot->deleted)
2020
            {
2021
              struct symbol *sym = cf_find_symbol(new, o->name);
2022
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
2023
                {
2024
                  DBG("\t%s: same\n", o->name);
2025
                  r = sym->def;
2026
                  r->table = ot;
2027
                  ot->name = r->name;
2028
                  ot->config = r;
2029
                  if (o->sorted != r->sorted)
2030
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2031
                }
2032
              else
2033
                {
2034
                  DBG("\t%s: deleted\n", o->name);
2035
                  ot->deleted = old;
2036
                  config_add_obstacle(old);
2037
                  rt_lock_table(ot);
2038
                  rt_unlock_table(ot);
2039
                }
2040
            }
2041
        }
2042
    }
2043

    
2044
  WALK_LIST(r, new->tables)
2045
    if (!r->table)
2046
      {
2047
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2048
        DBG("\t%s: created\n", r->name);
2049
        rt_setup(rt_table_pool, t, r->name, r);
2050
        add_tail(&routing_tables, &t->n);
2051
        r->table = t;
2052
      }
2053
  DBG("\tdone\n");
2054
}
2055

    
2056
static inline void
2057
do_feed_channel(struct channel *c, net *n, rte *e)
2058
{
2059
  rte_update_lock();
2060
  if (c->ra_mode == RA_ACCEPTED)
2061
    rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2062
  else if (c->ra_mode == RA_MERGED)
2063
    rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2064
  else /* RA_BASIC */
2065
    rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2066
  rte_update_unlock();
2067
}
2068

    
2069
/**
2070
 * rt_feed_channel - advertise all routes to a channel
2071
 * @c: channel to be fed
2072
 *
2073
 * This function performs one pass of advertisement of routes to a channel that
2074
 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2075
 * has something to do. (We avoid transferring all the routes in single pass in
2076
 * order not to monopolize CPU time.)
2077
 */
2078
int
2079
rt_feed_channel(struct channel *c)
2080
{
2081
  struct fib_iterator *fit = &c->feed_fit;
2082
  int max_feed = 256;
2083

    
2084
  ASSERT(c->export_state == ES_FEEDING);
2085

    
2086
  if (!c->feed_active)
2087
    {
2088
      FIB_ITERATE_INIT(fit, &c->table->fib);
2089
      c->feed_active = 1;
2090
    }
2091

    
2092
  FIB_ITERATE_START(&c->table->fib, fit, net, n)
2093
    {
2094
      rte *e = n->routes;
2095
      if (max_feed <= 0)
2096
        {
2097
          FIB_ITERATE_PUT(fit);
2098
          return 0;
2099
        }
2100

    
2101
      /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2102

    
2103
      if ((c->ra_mode == RA_OPTIMAL) ||
2104
          (c->ra_mode == RA_ACCEPTED) ||
2105
          (c->ra_mode == RA_MERGED))
2106
        if (rte_is_valid(e))
2107
          {
2108
            /* In the meantime, the protocol may fell down */
2109
            if (c->export_state != ES_FEEDING)
2110
              goto done;
2111

    
2112
            do_feed_channel(c, n, e);
2113
            max_feed--;
2114
          }
2115

    
2116
      if (c->ra_mode == RA_ANY)
2117
        for(e = n->routes; e; e = e->next)
2118
          {
2119
            /* In the meantime, the protocol may fell down */
2120
            if (c->export_state != ES_FEEDING)
2121
              goto done;
2122

    
2123
            if (!rte_is_valid(e))
2124
              continue;
2125

    
2126
            do_feed_channel(c, n, e);
2127
            max_feed--;
2128
          }
2129
    }
2130
  FIB_ITERATE_END;
2131

    
2132
done:
2133
  c->feed_active = 0;
2134
  return 1;
2135
}
2136

    
2137
/**
2138
 * rt_feed_baby_abort - abort protocol feeding
2139
 * @c: channel
2140
 *
2141
 * This function is called by the protocol code when the protocol stops or
2142
 * ceases to exist during the feeding.
2143
 */
2144
void
2145
rt_feed_channel_abort(struct channel *c)
2146
{
2147
  if (c->feed_active)
2148
    {
2149
      /* Unlink the iterator */
2150
      fit_get(&c->table->fib, &c->feed_fit);
2151
      c->feed_active = 0;
2152
    }
2153
}
2154

    
2155
static inline unsigned
2156
ptr_hash(void *ptr)
2157
{
2158
  uintptr_t p = (uintptr_t) ptr;
2159
  return p ^ (p << 8) ^ (p >> 16);
2160
}
2161

    
2162
static inline u32
2163
hc_hash(ip_addr a, rtable *dep)
2164
{
2165
  return ipa_hash(a) ^ ptr_hash(dep);
2166
}
2167

    
2168
static inline void
2169
hc_insert(struct hostcache *hc, struct hostentry *he)
2170
{
2171
  uint k = he->hash_key >> hc->hash_shift;
2172
  he->next = hc->hash_table[k];
2173
  hc->hash_table[k] = he;
2174
}
2175

    
2176
static inline void
2177
hc_remove(struct hostcache *hc, struct hostentry *he)
2178
{
2179
  struct hostentry **hep;
2180
  uint k = he->hash_key >> hc->hash_shift;
2181

    
2182
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2183
  *hep = he->next;
2184
}
2185

    
2186
#define HC_DEF_ORDER 10
2187
#define HC_HI_MARK *4
2188
#define HC_HI_STEP 2
2189
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2190
#define HC_LO_MARK /5
2191
#define HC_LO_STEP 2
2192
#define HC_LO_ORDER 10
2193

    
2194
static void
2195
hc_alloc_table(struct hostcache *hc, unsigned order)
2196
{
2197
  uint hsize = 1 << order;
2198
  hc->hash_order = order;
2199
  hc->hash_shift = 32 - order;
2200
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2201
  hc->hash_min = (order <= HC_LO_ORDER) ?  0U : (hsize HC_LO_MARK);
2202

    
2203
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2204
}
2205

    
2206
static void
2207
hc_resize(struct hostcache *hc, unsigned new_order)
2208
{
2209
  struct hostentry **old_table = hc->hash_table;
2210
  struct hostentry *he, *hen;
2211
  uint old_size = 1 << hc->hash_order;
2212
  uint i;
2213

    
2214
  hc_alloc_table(hc, new_order);
2215
  for (i = 0; i < old_size; i++)
2216
    for (he = old_table[i]; he != NULL; he=hen)
2217
      {
2218
        hen = he->next;
2219
        hc_insert(hc, he);
2220
      }
2221
  mb_free(old_table);
2222
}
2223

    
2224
static struct hostentry *
2225
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2226
{
2227
  struct hostentry *he = sl_alloc(hc->slab);
2228

    
2229
  he->addr = a;
2230
  he->link = ll;
2231
  he->tab = dep;
2232
  he->hash_key = k;
2233
  he->uc = 0;
2234
  he->src = NULL;
2235

    
2236
  add_tail(&hc->hostentries, &he->ln);
2237
  hc_insert(hc, he);
2238

    
2239
  hc->hash_items++;
2240
  if (hc->hash_items > hc->hash_max)
2241
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2242

    
2243
  return he;
2244
}
2245

    
2246
static void
2247
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2248
{
2249
  rta_free(he->src);
2250

    
2251
  rem_node(&he->ln);
2252
  hc_remove(hc, he);
2253
  sl_free(hc->slab, he);
2254

    
2255
  hc->hash_items--;
2256
  if (hc->hash_items < hc->hash_min)
2257
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2258
}
2259

    
2260
static void
2261
rt_init_hostcache(rtable *tab)
2262
{
2263
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2264
  init_list(&hc->hostentries);
2265

    
2266
  hc->hash_items = 0;
2267
  hc_alloc_table(hc, HC_DEF_ORDER);
2268
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2269

    
2270
  hc->lp = lp_new(rt_table_pool, 1008);
2271
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2272

    
2273
  tab->hostcache = hc;
2274
}
2275

    
2276
static void
2277
rt_free_hostcache(rtable *tab)
2278
{
2279
  struct hostcache *hc = tab->hostcache;
2280

    
2281
  node *n;
2282
  WALK_LIST(n, hc->hostentries)
2283
    {
2284
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2285
      rta_free(he->src);
2286

    
2287
      if (he->uc)
2288
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2289
    }
2290

    
2291
  rfree(hc->slab);
2292
  rfree(hc->lp);
2293
  mb_free(hc->hash_table);
2294
  mb_free(hc);
2295
}
2296

    
2297
static void
2298
rt_notify_hostcache(rtable *tab, net *net)
2299
{
2300
  if (tab->hcu_scheduled)
2301
    return;
2302

    
2303
  if (trie_match_net(tab->hostcache->trie, net->n.addr))
2304
    rt_schedule_hcu(tab);
2305
}
2306

    
2307
static int
2308
if_local_addr(ip_addr a, struct iface *i)
2309
{
2310
  struct ifa *b;
2311

    
2312
  WALK_LIST(b, i->addrs)
2313
    if (ipa_equal(a, b->ip))
2314
      return 1;
2315

    
2316
  return 0;
2317
}
2318

    
2319
static u32 
2320
rt_get_igp_metric(rte *rt)
2321
{
2322
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2323

    
2324
  if (ea)
2325
    return ea->u.data;
2326

    
2327
  rta *a = rt->attrs;
2328

    
2329
#ifdef CONFIG_OSPF
2330
  if ((a->source == RTS_OSPF) ||
2331
      (a->source == RTS_OSPF_IA) ||
2332
      (a->source == RTS_OSPF_EXT1))
2333
    return rt->u.ospf.metric1;
2334
#endif
2335

    
2336
#ifdef CONFIG_RIP
2337
  if (a->source == RTS_RIP)
2338
    return rt->u.rip.metric;
2339
#endif
2340

    
2341
  if (a->source == RTS_DEVICE)
2342
    return 0;
2343

    
2344
  return IGP_METRIC_UNKNOWN;
2345
}
2346

    
2347
static int
2348
rt_update_hostentry(rtable *tab, struct hostentry *he)
2349
{
2350
  rta *old_src = he->src;
2351
  int pxlen = 0;
2352

    
2353
  /* Reset the hostentry */
2354
  he->src = NULL;
2355
  he->dest = RTD_UNREACHABLE;
2356
  he->igp_metric = 0;
2357

    
2358
  net_addr he_addr;
2359
  net_fill_ip_host(&he_addr, he->addr);
2360
  net *n = net_route(tab, &he_addr);
2361
  if (n)
2362
    {
2363
      rte *e = n->routes;
2364
      rta *a = e->attrs;
2365
      pxlen = n->n.addr->pxlen;
2366

    
2367
      if (a->hostentry)
2368
        {
2369
          /* Recursive route should not depend on another recursive route */
2370
          log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2371
              he->addr, n->n.addr);
2372
          goto done;
2373
        }
2374

    
2375
      if ((a->dest == RTD_UNICAST) && ipa_zero(a->nh.gw) && !a->next)
2376
        { /* We have singlepath device route */
2377
          if (if_local_addr(he->addr, a->nh.iface))
2378
            {
2379
              /* The host address is a local address, this is not valid */
2380
              log(L_WARN "Next hop address %I is a local address of iface %s",
2381
                  he->addr, a->nh.iface->name);
2382
              goto done;
2383
                  }
2384

    
2385
          /* The host is directly reachable, use link as a gateway */
2386
          he->nh = NULL;
2387
          he->dest = RTD_UNICAST;
2388
        }
2389
      else
2390
        {
2391
          /* The host is reachable through some route entry */
2392
          he->nh = (&a->nh);
2393
          he->dest = a->dest;
2394
        }
2395

    
2396
      he->src = rta_clone(a);
2397
      he->igp_metric = rt_get_igp_metric(e);
2398
    }
2399

    
2400
 done:
2401
  /* Add a prefix range to the trie */
2402
  trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2403

    
2404
  rta_free(old_src);
2405
  return old_src != he->src;
2406
}
2407

    
2408
static void
2409
rt_update_hostcache(rtable *tab)
2410
{
2411
  struct hostcache *hc = tab->hostcache;
2412
  struct hostentry *he;
2413
  node *n, *x;
2414

    
2415
  /* Reset the trie */
2416
  lp_flush(hc->lp);
2417
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2418

    
2419
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2420
    {
2421
      he = SKIP_BACK(struct hostentry, ln, n);
2422
      if (!he->uc)
2423
        {
2424
          hc_delete_hostentry(hc, he);
2425
          continue;
2426
        }
2427

    
2428
      if (rt_update_hostentry(tab, he))
2429
        rt_schedule_nhu(he->tab);
2430
    }
2431

    
2432
  tab->hcu_scheduled = 0;
2433
}
2434

    
2435
static struct hostentry *
2436
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2437
{
2438
  struct hostentry *he;
2439

    
2440
  if (!tab->hostcache)
2441
    rt_init_hostcache(tab);
2442

    
2443
  u32 k = hc_hash(a, dep);
2444
  struct hostcache *hc = tab->hostcache;
2445
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2446
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2447
      return he;
2448

    
2449
  he = hc_new_hostentry(hc, a, ll, dep, k);
2450
  rt_update_hostentry(tab, he);
2451
  return he;
2452
}
2453

    
2454
void
2455
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll)
2456
{
2457
  rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep));
2458
}
2459

    
2460

    
2461
/*
2462
 *  CLI commands
2463
 */
2464

    
2465
static byte *
2466
rt_format_via(rte *e)
2467
{
2468
  rta *a = e->attrs;
2469

    
2470
  /* Max text length w/o IP addr and interface name is 16 */
2471
  static byte via[IPA_MAX_TEXT_LENGTH+sizeof(a->nh.iface->name)+16];
2472

    
2473
  switch (a->dest)
2474
    {
2475
    case RTD_UNICAST:        bsprintf(via, "unicast"); break;
2476
    case RTD_BLACKHOLE:        bsprintf(via, "blackhole"); break;
2477
    case RTD_UNREACHABLE:        bsprintf(via, "unreachable"); break;
2478
    case RTD_PROHIBIT:        bsprintf(via, "prohibited"); break;
2479
    default:                bsprintf(via, "???");
2480
    }
2481
  return via;
2482
}
2483

    
2484
static void
2485
rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2486
{
2487
  byte from[IPA_MAX_TEXT_LENGTH+8];
2488
  byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2489
  rta *a = e->attrs;
2490
  int primary = (e->net->routes == e);
2491
  int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2492
  void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2493
  struct nexthop *nh;
2494

    
2495
  tm_format_datetime(tm, &config->tf_route, e->lastmod);
2496
  if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw))
2497
    bsprintf(from, " from %I", a->from);
2498
  else
2499
    from[0] = 0;
2500

    
2501
  get_route_info = a->src->proto->proto->get_route_info;
2502
  if (get_route_info || d->verbose)
2503
    {
2504
      /* Need to normalize the extended attributes */
2505
      ea_list *t = tmpa;
2506
      t = ea_append(t, a->eattrs);
2507
      tmpa = alloca(ea_scan(t));
2508
      ea_merge(t, tmpa);
2509
      ea_sort(tmpa);
2510
    }
2511
  if (get_route_info)
2512
    get_route_info(e, info, tmpa);
2513
  else
2514
    bsprintf(info, " (%d)", e->pref);
2515
  cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name,
2516
             tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2517
  for (nh = &(a->nh); nh; nh = nh->next)
2518
    {
2519
      char ls[MPLS_MAX_LABEL_STACK*8 + 5]; char *lsp = ls;
2520
      if (nh->labels)
2521
        {
2522
          lsp += bsprintf(lsp, " mpls %d", nh->label[0]);
2523
          for (int i=1;i<nh->labels; i++)
2524
            lsp += bsprintf(lsp, "/%d", nh->label[i]);
2525
          *lsp++ = '\0';
2526
        }
2527
      if (a->nh.next)
2528
        cli_printf(c, -1007, "\tvia %I%s on %s weight %d", nh->gw, (nh->labels ? ls : ""), nh->iface->name, nh->weight + 1);
2529
      else
2530
        cli_printf(c, -1007, "\tvia %I%s on %s", nh->gw, (nh->labels ? ls : ""), nh->iface->name);
2531
    }
2532
  if (d->verbose)
2533
    rta_show(c, a, tmpa);
2534

    
2535
}
2536

    
2537
static void
2538
rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2539
{
2540
  rte *e, *ee;
2541
  byte ia[NET_MAX_TEXT_LENGTH+1];
2542
  struct ea_list *tmpa;
2543
  struct channel *ec = d->export_channel;
2544
  int first = 1;
2545
  int pass = 0;
2546

    
2547
  bsnprintf(ia, sizeof(ia), "%N", n->n.addr);
2548

    
2549
  for (e = n->routes; e; e = e->next)
2550
    {
2551
      if (rte_is_filtered(e) != d->filtered)
2552
        continue;
2553

    
2554
      d->rt_counter++;
2555
      d->net_counter += first;
2556
      first = 0;
2557

    
2558
      if (pass)
2559
        continue;
2560

    
2561
      ee = e;
2562
      rte_update_lock();                /* We use the update buffer for filtering */
2563
      tmpa = make_tmp_attrs(e, rte_update_pool);
2564

    
2565
      /* Special case for merged export */
2566
      if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED))
2567
        {
2568
          rte *rt_free;
2569
          e = rt_export_merged(ec, n, &rt_free, &tmpa, rte_update_pool, 1);
2570
          pass = 1;
2571

    
2572
          if (!e)
2573
          { e = ee; goto skip; }
2574
        }
2575
      else if (d->export_mode)
2576
        {
2577
          struct proto *ep = d->export_protocol;
2578
          int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2579

    
2580
          if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED)
2581
            pass = 1;
2582

    
2583
          if (ic < 0)
2584
            goto skip;
2585

    
2586
          if (d->export_mode > RSEM_PREEXPORT)
2587
            {
2588
              /*
2589
               * FIXME - This shows what should be exported according to current
2590
               * filters, but not what was really exported. 'configure soft'
2591
               * command may change the export filter and do not update routes.
2592
               */
2593
              int do_export = (ic > 0) ||
2594
                (f_run(ec->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2595

    
2596
              if (do_export != (d->export_mode == RSEM_EXPORT))
2597
                goto skip;
2598

    
2599
              if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_ACCEPTED))
2600
                pass = 1;
2601
            }
2602
        }
2603

    
2604
      if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2605
        goto skip;
2606

    
2607
      if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2608
        goto skip;
2609

    
2610
      d->show_counter++;
2611
      if (d->stats < 2)
2612
        rt_show_rte(c, ia, e, d, tmpa);
2613
      ia[0] = 0;
2614

    
2615
    skip:
2616
      if (e != ee)
2617
      {
2618
        rte_free(e);
2619
        e = ee;
2620
      }
2621
      rte_update_unlock();
2622

    
2623
      if (d->primary_only)
2624
        break;
2625
    }
2626
}
2627

    
2628
static struct channel *
2629
rt_show_export_channel(struct rt_show_data *d)
2630
{
2631
  if (! d->export_protocol->rt_notify)
2632
    return NULL;
2633

    
2634
  return proto_find_channel_by_table(d->export_protocol, d->table);
2635
}
2636

    
2637
static void
2638
rt_show_cont(struct cli *c)
2639
{
2640
  struct rt_show_data *d = c->rover;
2641
#ifdef DEBUGGING
2642
  unsigned max = 4;
2643
#else
2644
  unsigned max = 64;
2645
#endif
2646
  struct fib *fib = &d->table->fib;
2647
  struct fib_iterator *it = &d->fit;
2648

    
2649
  if (d->export_mode)
2650
    {
2651
      /* Ensure we have current export channel */
2652
      d->export_channel = rt_show_export_channel(d);
2653
      if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2654
        {
2655
          cli_printf(c, 8005, "Channel is down");
2656
          goto done;
2657
        }
2658
    }
2659

    
2660
  FIB_ITERATE_START(fib, it, net, n)
2661
    {
2662
      if (!max--)
2663
        {
2664
          FIB_ITERATE_PUT(it);
2665
          return;
2666
        }
2667
      rt_show_net(c, n, d);
2668
    }
2669
  FIB_ITERATE_END;
2670
  if (d->stats)
2671
    cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2672
  else
2673
    cli_printf(c, 0, "");
2674
done:
2675
  c->cont = c->cleanup = NULL;
2676
}
2677

    
2678
static void
2679
rt_show_cleanup(struct cli *c)
2680
{
2681
  struct rt_show_data *d = c->rover;
2682

    
2683
  /* Unlink the iterator */
2684
  fit_get(&d->table->fib, &d->fit);
2685
}
2686

    
2687
static inline rtable *
2688
rt_show_get_table(struct proto *p)
2689
{
2690
  /* FIXME: Use a better way to handle multi-channel protocols */
2691

    
2692
  if (p->main_channel)
2693
    return p->main_channel->table;
2694

    
2695
  if (!EMPTY_LIST(p->channels))
2696
    return ((struct channel *) HEAD(p->channels))->table;
2697

    
2698
  return NULL;
2699
}
2700

    
2701
void
2702
rt_show(struct rt_show_data *d)
2703
{
2704
  net *n;
2705

    
2706
  /* Default is either a master table or a table related to a respective protocol */
2707
  if (!d->table && d->export_protocol) d->table = rt_show_get_table(d->export_protocol);
2708
  if (!d->table && d->show_protocol) d->table = rt_show_get_table(d->show_protocol);
2709
  if (!d->table) d->table = config->def_tables[NET_IP4]->table; /* FIXME: iterate through all tables ? */
2710

    
2711
  /* Filtered routes are neither exported nor have sensible ordering */
2712
  if (d->filtered && (d->export_mode || d->primary_only))
2713
    cli_msg(0, "");
2714

    
2715
  if (!d->addr)
2716
    {
2717
      FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2718
      this_cli->cont = rt_show_cont;
2719
      this_cli->cleanup = rt_show_cleanup;
2720
      this_cli->rover = d;
2721
    }
2722
  else
2723
    {
2724
      if (d->export_mode)
2725
        {
2726
          /* Find channel associated with the export protocol */
2727
          d->export_channel = rt_show_export_channel(d);
2728
          if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2729
            {
2730
              cli_msg(8005, "Channel is down");
2731
              return;
2732
            }
2733
        }
2734

    
2735
      if (d->table->addr_type != d->addr->type)
2736
      {
2737
        cli_msg(8001, "Incompatible type of prefix/ip with table");
2738
        return;
2739
      }
2740

    
2741
      if (d->show_for)
2742
        n = net_route(d->table, d->addr);
2743
      else
2744
        n = net_find(d->table, d->addr);
2745

    
2746
      if (n)
2747
        rt_show_net(this_cli, n, d);
2748

    
2749
      if (d->rt_counter)
2750
        cli_msg(0, "");
2751
      else
2752
        cli_msg(8001, "Network not in table");
2753
    }
2754
}
2755

    
2756
/*
2757
 *  Documentation for functions declared inline in route.h
2758
 */
2759
#if 0
2760

2761
/**
2762
 * net_find - find a network entry
2763
 * @tab: a routing table
2764
 * @addr: address of the network
2765
 *
2766
 * net_find() looks up the given network in routing table @tab and
2767
 * returns a pointer to its &net entry or %NULL if no such network
2768
 * exists.
2769
 */
2770
static inline net *net_find(rtable *tab, net_addr *addr)
2771
{ DUMMY; }
2772

2773
/**
2774
 * net_get - obtain a network entry
2775
 * @tab: a routing table
2776
 * @addr: address of the network
2777
 *
2778
 * net_get() looks up the given network in routing table @tab and
2779
 * returns a pointer to its &net entry. If no such entry exists, it's
2780
 * created.
2781
 */
2782
static inline net *net_get(rtable *tab, net_addr *addr)
2783
{ DUMMY; }
2784

2785
/**
2786
 * rte_cow - copy a route for writing
2787
 * @r: a route entry to be copied
2788
 *
2789
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2790
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2791
 * just returned unchanged, else a new temporary entry with the same contents
2792
 * is created.
2793
 *
2794
 * The primary use of this function is inside the filter machinery -- when
2795
 * a filter wants to modify &rte contents (to change the preference or to
2796
 * attach another set of attributes), it must ensure that the &rte is not
2797
 * shared with anyone else (and especially that it isn't stored in any routing
2798
 * table).
2799
 *
2800
 * Result: a pointer to the new writable &rte.
2801
 */
2802
static inline rte * rte_cow(rte *r)
2803
{ DUMMY; }
2804

2805
#endif