Statistics
| Branch: | Revision:

iof-bird-daemon / nest / rt-table.c @ f8d44b01

History | View | Annotate | Download (62.1 KB)

1
/*
2
 *        BIRD -- Routing Tables
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Routing tables
11
 *
12
 * Routing tables are probably the most important structures BIRD uses. They
13
 * hold all the information about known networks, the associated routes and
14
 * their attributes.
15
 *
16
 * There are multiple routing tables (a primary one together with any
17
 * number of secondary ones if requested by the configuration). Each table
18
 * is basically a FIB containing entries describing the individual
19
 * destination networks. For each network (represented by structure &net),
20
 * there is a one-way linked list of route entries (&rte), the first entry
21
 * on the list being the best one (i.e., the one we currently use
22
 * for routing), the order of the other ones is undetermined.
23
 *
24
 * The &rte contains information specific to the route (preference, protocol
25
 * metrics, time of last modification etc.) and a pointer to a &rta structure
26
 * (see the route attribute module for a precise explanation) holding the
27
 * remaining route attributes which are expected to be shared by multiple
28
 * routes in order to conserve memory.
29
 */
30

    
31
#undef LOCAL_DEBUG
32

    
33
#include "nest/bird.h"
34
#include "nest/route.h"
35
#include "nest/protocol.h"
36
#include "nest/iface.h"
37
#include "lib/resource.h"
38
#include "lib/event.h"
39
#include "lib/string.h"
40
#include "conf/conf.h"
41
#include "filter/filter.h"
42
#include "lib/string.h"
43
#include "lib/alloca.h"
44

    
45
pool *rt_table_pool;
46

    
47
static slab *rte_slab;
48
static linpool *rte_update_pool;
49

    
50
static list routing_tables;
51

    
52
static void rt_free_hostcache(rtable *tab);
53
static void rt_notify_hostcache(rtable *tab, net *net);
54
static void rt_update_hostcache(rtable *tab);
55
static void rt_next_hop_update(rtable *tab);
56
static inline void rt_prune_table(rtable *tab);
57

    
58

    
59
/* Like fib_route(), but skips empty net entries */
60
static inline void *
61
net_route_ip4(rtable *t, net_addr_ip4 *n)
62
{
63
  net *r;
64

    
65
  while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
66
  {
67
    n->pxlen--;
68
    ip4_clrbit(&n->prefix, n->pxlen);
69
  }
70

    
71
  return r;
72
}
73

    
74
static inline void *
75
net_route_ip6(rtable *t, net_addr_ip6 *n)
76
{
77
  net *r;
78

    
79
  while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
80
  {
81
    n->pxlen--;
82
    ip6_clrbit(&n->prefix, n->pxlen);
83
  }
84

    
85
  return r;
86
}
87

    
88
void *
89
net_route(rtable *tab, const net_addr *n)
90
{
91
  ASSERT(tab->addr_type == n->type);
92

    
93
  net_addr *n0 = alloca(n->length);
94
  net_copy(n0, n);
95

    
96
  switch (n->type)
97
  {
98
  case NET_IP4:
99
  case NET_VPN4:
100
  case NET_ROA4:
101
    return net_route_ip4(tab, (net_addr_ip4 *) n0);
102

    
103
  case NET_IP6:
104
  case NET_VPN6:
105
  case NET_ROA6:
106
    return net_route_ip6(tab, (net_addr_ip6 *) n0);
107

    
108
  default:
109
    return NULL;
110
  }
111
}
112

    
113

    
114
static int
115
net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
116
{
117
  struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
118
  struct fib_node *fn;
119
  int anything = 0;
120

    
121
  while (1)
122
  {
123
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
124
    {
125
      net_addr_roa4 *roa = (void *) fn->addr;
126
      net *r = fib_node_to_user(&tab->fib, fn);
127

    
128
      if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
129
      {
130
        anything = 1;
131
        if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
132
          return ROA_VALID;
133
      }
134
    }
135

    
136
    if (n.pxlen == 0)
137
      break;
138

    
139
    n.pxlen--;
140
    ip4_clrbit(&n.prefix, n.pxlen);
141
  }
142

    
143
  return anything ? ROA_INVALID : ROA_UNKNOWN;
144
}
145

    
146
static int
147
net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
148
{
149
  struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
150
  struct fib_node *fn;
151
  int anything = 0;
152

    
153
  while (1)
154
  {
155
    for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
156
    {
157
      net_addr_roa6 *roa = (void *) fn->addr;
158
      net *r = fib_node_to_user(&tab->fib, fn);
159

    
160
      if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
161
      {
162
        anything = 1;
163
        if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
164
          return ROA_VALID;
165
      }
166
    }
167

    
168
    if (n.pxlen == 0)
169
      break;
170

    
171
    n.pxlen--;
172
    ip6_clrbit(&n.prefix, n.pxlen);
173
  }
174

    
175
  return anything ? ROA_INVALID : ROA_UNKNOWN;
176
}
177

    
178
/**
179
 * roa_check - check validity of route origination in a ROA table
180
 * @tab: ROA table
181
 * @n: network prefix to check
182
 * @asn: AS number of network prefix
183
 *
184
 * Implements RFC 6483 route validation for the given network prefix. The
185
 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
186
 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
187
 * a candidate ROA with matching ASN and maxlen field greater than or equal to
188
 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
189
 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
190
 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
191
 * must have type NET_IP4 or NET_IP6, respectively.
192
 */
193
int
194
net_roa_check(rtable *tab, const net_addr *n, u32 asn)
195
{
196
  if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
197
    return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
198
  else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
199
    return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
200
  else
201
    return ROA_UNKNOWN;        /* Should not happen */
202
}
203

    
204
/**
205
 * rte_find - find a route
206
 * @net: network node
207
 * @src: route source
208
 *
209
 * The rte_find() function returns a route for destination @net
210
 * which is from route source @src.
211
 */
212
rte *
213
rte_find(net *net, struct rte_src *src)
214
{
215
  rte *e = net->routes;
216

    
217
  while (e && e->attrs->src != src)
218
    e = e->next;
219
  return e;
220
}
221

    
222
/**
223
 * rte_get_temp - get a temporary &rte
224
 * @a: attributes to assign to the new route (a &rta; in case it's
225
 * un-cached, rte_update() will create a cached copy automatically)
226
 *
227
 * Create a temporary &rte and bind it with the attributes @a.
228
 * Also set route preference to the default preference set for
229
 * the protocol.
230
 */
231
rte *
232
rte_get_temp(rta *a)
233
{
234
  rte *e = sl_alloc(rte_slab);
235

    
236
  e->attrs = a;
237
  e->flags = 0;
238
  e->pref = 0;
239
  return e;
240
}
241

    
242
rte *
243
rte_do_cow(rte *r)
244
{
245
  rte *e = sl_alloc(rte_slab);
246

    
247
  memcpy(e, r, sizeof(rte));
248
  e->attrs = rta_clone(r->attrs);
249
  e->flags = 0;
250
  return e;
251
}
252

    
253
/**
254
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
255
 * @r: a route entry to be copied
256
 * @lp: a linpool from which to allocate &rta
257
 *
258
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
259
 * modification. There are three possibilities: First, both &rte and &rta are
260
 * private copies, in that case they are returned unchanged.  Second, &rte is
261
 * private copy, but &rta is cached, in that case &rta is duplicated using
262
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
263
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
264
 *
265
 * Note that in the second case, cached &rta loses one reference, while private
266
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
267
 * nexthops, ...) with it. To work properly, original shared &rta should have
268
 * another reference during the life of created private copy.
269
 *
270
 * Result: a pointer to the new writable &rte with writable &rta.
271
 */
272
rte *
273
rte_cow_rta(rte *r, linpool *lp)
274
{
275
  if (!rta_is_cached(r->attrs))
276
    return r;
277

    
278
  rte *e = rte_cow(r);
279
  rta *a = rta_do_cow(r->attrs, lp);
280
  rta_free(e->attrs);
281
  e->attrs = a;
282
  return e;
283
}
284

    
285
static int                                /* Actually better or at least as good as */
286
rte_better(rte *new, rte *old)
287
{
288
  int (*better)(rte *, rte *);
289

    
290
  if (!rte_is_valid(old))
291
    return 1;
292
  if (!rte_is_valid(new))
293
    return 0;
294

    
295
  if (new->pref > old->pref)
296
    return 1;
297
  if (new->pref < old->pref)
298
    return 0;
299
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
300
    {
301
      /*
302
       *  If the user has configured protocol preferences, so that two different protocols
303
       *  have the same preference, try to break the tie by comparing addresses. Not too
304
       *  useful, but keeps the ordering of routes unambiguous.
305
       */
306
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
307
    }
308
  if (better = new->attrs->src->proto->rte_better)
309
    return better(new, old);
310
  return 0;
311
}
312

    
313
static int
314
rte_mergable(rte *pri, rte *sec)
315
{
316
  int (*mergable)(rte *, rte *);
317

    
318
  if (!rte_is_valid(pri) || !rte_is_valid(sec))
319
    return 0;
320

    
321
  if (pri->pref != sec->pref)
322
    return 0;
323

    
324
  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
325
    return 0;
326

    
327
  if (mergable = pri->attrs->src->proto->rte_mergable)
328
    return mergable(pri, sec);
329

    
330
  return 0;
331
}
332

    
333
static void
334
rte_trace(struct proto *p, rte *e, int dir, char *msg)
335
{
336
  log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
337
}
338

    
339
static inline void
340
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
341
{
342
  if (p->debug & flag)
343
    rte_trace(p, e, '>', msg);
344
}
345

    
346
static inline void
347
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
348
{
349
  if (p->debug & flag)
350
    rte_trace(p, e, '<', msg);
351
}
352

    
353
static rte *
354
export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
355
{
356
  struct proto *p = c->proto;
357
  struct filter *filter = c->out_filter;
358
  struct proto_stats *stats = &c->stats;
359
  ea_list *tmpb = NULL;
360
  rte *rt;
361
  int v;
362

    
363
  rt = rt0;
364
  *rt_free = NULL;
365

    
366
  if (!tmpa)
367
    tmpa = &tmpb;
368

    
369
  *tmpa = rte_make_tmp_attrs(rt, pool);
370

    
371
  v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0;
372
  if (v < 0)
373
    {
374
      if (silent)
375
        goto reject;
376

    
377
      stats->exp_updates_rejected++;
378
      if (v == RIC_REJECT)
379
        rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
380
      goto reject;
381
    }
382
  if (v > 0)
383
    {
384
      if (!silent)
385
        rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
386
      goto accept;
387
    }
388

    
389
  v = filter && ((filter == FILTER_REJECT) ||
390
                 (f_run(filter, &rt, tmpa, pool, FF_FORCE_TMPATTR) > F_ACCEPT));
391
  if (v)
392
    {
393
      if (silent)
394
        goto reject;
395

    
396
      stats->exp_updates_filtered++;
397
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
398
      goto reject;
399
    }
400

    
401
 accept:
402
  if (rt != rt0)
403
    *rt_free = rt;
404
  return rt;
405

    
406
 reject:
407
  /* Discard temporary rte */
408
  if (rt != rt0)
409
    rte_free(rt);
410
  return NULL;
411
}
412

    
413
static inline rte *
414
export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
415
{
416
  return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent);
417
}
418

    
419
static void
420
do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
421
{
422
  struct proto *p = c->proto;
423
  struct proto_stats *stats = &c->stats;
424

    
425

    
426
  /*
427
   * First, apply export limit.
428
   *
429
   * Export route limits has several problems. Because exp_routes
430
   * counter is reset before refeed, we don't really know whether
431
   * limit is breached and whether the update is new or not. Therefore
432
   * the number of really exported routes may exceed the limit
433
   * temporarily (routes exported before and new routes in refeed).
434
   *
435
   * Minor advantage is that if the limit is decreased and refeed is
436
   * requested, the number of exported routes really decrease.
437
   *
438
   * Second problem is that with export limits, we don't know whether
439
   * old was really exported (it might be blocked by limit). When a
440
   * withdraw is exported, we announce it even when the previous
441
   * update was blocked. This is not a big issue, but the same problem
442
   * is in updating exp_routes counter. Therefore, to be consistent in
443
   * increases and decreases of exp_routes, we count exported routes
444
   * regardless of blocking by limits.
445
   *
446
   * Similar problem is in handling updates - when a new route is
447
   * received and blocking is active, the route would be blocked, but
448
   * when an update for the route will be received later, the update
449
   * would be propagated (as old != NULL). Therefore, we have to block
450
   * also non-new updates (contrary to import blocking).
451
   */
452

    
453
  struct channel_limit *l = &c->out_limit;
454
  if (l->action && new)
455
    {
456
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
457
        channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
458

    
459
      if (l->state == PLS_BLOCKED)
460
        {
461
          stats->exp_routes++;        /* see note above */
462
          stats->exp_updates_rejected++;
463
          rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
464
          new = NULL;
465

    
466
          if (!old)
467
            return;
468
        }
469
    }
470

    
471

    
472
  if (new)
473
    stats->exp_updates_accepted++;
474
  else
475
    stats->exp_withdraws_accepted++;
476

    
477
  /* Hack: We do not decrease exp_routes during refeed, we instead
478
     reset exp_routes at the start of refeed. */
479
  if (new)
480
    stats->exp_routes++;
481
  if (old && !refeed)
482
    stats->exp_routes--;
483

    
484
  if (p->debug & D_ROUTES)
485
    {
486
      if (new && old)
487
        rte_trace_out(D_ROUTES, p, new, "replaced");
488
      else if (new)
489
        rte_trace_out(D_ROUTES, p, new, "added");
490
      else if (old)
491
        rte_trace_out(D_ROUTES, p, old, "removed");
492
    }
493
  if (!new)
494
    p->rt_notify(p, c, net, NULL, old, NULL);
495
  else if (tmpa)
496
    {
497
      ea_list *t = tmpa;
498
      while (t->next)
499
        t = t->next;
500
      t->next = new->attrs->eattrs;
501
      p->rt_notify(p, c, net, new, old, tmpa);
502
      t->next = NULL;
503
    }
504
  else
505
    p->rt_notify(p, c, net, new, old, new->attrs->eattrs);
506
}
507

    
508
static void
509
rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
510
{
511
  struct proto *p = c->proto;
512

    
513
  rte *new = new0;
514
  rte *old = old0;
515
  rte *new_free = NULL;
516
  rte *old_free = NULL;
517
  ea_list *tmpa = NULL;
518

    
519
  if (new)
520
    c->stats.exp_updates_received++;
521
  else
522
    c->stats.exp_withdraws_received++;
523

    
524
  /*
525
   * This is a tricky part - we don't know whether route 'old' was
526
   * exported to protocol 'p' or was filtered by the export filter.
527
   * We try to run the export filter to know this to have a correct
528
   * value in 'old' argument of rte_update (and proper filter value)
529
   *
530
   * FIXME - this is broken because 'configure soft' may change
531
   * filters but keep routes. Refeed is expected to be called after
532
   * change of the filters and with old == new, therefore we do not
533
   * even try to run the filter on an old route, This may lead to
534
   * 'spurious withdraws' but ensure that there are no 'missing
535
   * withdraws'.
536
   *
537
   * This is not completely safe as there is a window between
538
   * reconfiguration and the end of refeed - if a newly filtered
539
   * route disappears during this period, proper withdraw is not
540
   * sent (because old would be also filtered) and the route is
541
   * not refeeded (because it disappeared before that).
542
   */
543

    
544
  if (new)
545
    new = export_filter(c, new, &new_free, &tmpa, 0);
546

    
547
  if (old && !refeed)
548
    old = export_filter(c, old, &old_free, NULL, 1);
549

    
550
  if (!new && !old)
551
  {
552
    /*
553
     * As mentioned above, 'old' value may be incorrect in some race conditions.
554
     * We generally ignore it with the exception of withdraw to pipe protocol.
555
     * In that case we rather propagate unfiltered withdraws regardless of
556
     * export filters to ensure that when a protocol is flushed, its routes are
557
     * removed from all tables. Possible spurious unfiltered withdraws are not
558
     * problem here as they are ignored if there is no corresponding route at
559
     * the other end of the pipe. We directly call rt_notify() hook instead of
560
     * do_rt_notify() to avoid logging and stat counters.
561
     */
562

    
563
#ifdef CONFIG_PIPE
564
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
565
      p->rt_notify(p, c, net, NULL, old0, NULL);
566
#endif
567

    
568
    return;
569
  }
570

    
571
  do_rt_notify(c, net, new, old, tmpa, refeed);
572

    
573
  /* Discard temporary rte's */
574
  if (new_free)
575
    rte_free(new_free);
576
  if (old_free)
577
    rte_free(old_free);
578
}
579

    
580
static void
581
rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
582
{
583
  // struct proto *p = c->proto;
584

    
585
  rte *r;
586
  rte *new_best = NULL;
587
  rte *old_best = NULL;
588
  rte *new_free = NULL;
589
  rte *old_free = NULL;
590
  ea_list *tmpa = NULL;
591

    
592
  /* Used to track whether we met old_changed position. If before_old is NULL
593
     old_changed was the first and we met it implicitly before current best route. */
594
  int old_meet = old_changed && !before_old;
595

    
596
  /* Note that before_old is either NULL or valid (not rejected) route.
597
     If old_changed is valid, before_old have to be too. If old changed route
598
     was not valid, caller must use NULL for both old_changed and before_old. */
599

    
600
  if (new_changed)
601
    c->stats.exp_updates_received++;
602
  else
603
    c->stats.exp_withdraws_received++;
604

    
605
  /* First, find the new_best route - first accepted by filters */
606
  for (r=net->routes; rte_is_valid(r); r=r->next)
607
    {
608
      if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
609
        break;
610

    
611
      /* Note if we walked around the position of old_changed route */
612
      if (r == before_old)
613
        old_meet = 1;
614
    }
615

    
616
  /* 
617
   * Second, handle the feed case. That means we do not care for
618
   * old_best. It is NULL for feed, and the new_best for refeed. 
619
   * For refeed, there is a hack similar to one in rt_notify_basic()
620
   * to ensure withdraws in case of changed filters
621
   */
622
  if (feed)
623
    {
624
      if (feed == 2)        /* refeed */
625
        old_best = new_best ? new_best :
626
          (rte_is_valid(net->routes) ? net->routes : NULL);
627
      else
628
        old_best = NULL;
629

    
630
      if (!new_best && !old_best)
631
        return;
632

    
633
      goto found;
634
    }
635

    
636
  /*
637
   * Now, we find the old_best route. Generally, it is the same as the
638
   * new_best, unless new_best is the same as new_changed or
639
   * old_changed is accepted before new_best.
640
   *
641
   * There are four cases:
642
   *
643
   * - We would find and accept old_changed before new_best, therefore
644
   *   old_changed is old_best. In remaining cases we suppose this
645
   *   is not true.
646
   *
647
   * - We found no new_best, therefore there is also no old_best and
648
   *   we ignore this withdraw.
649
   *
650
   * - We found new_best different than new_changed, therefore
651
   *   old_best is the same as new_best and we ignore this update.
652
   *
653
   * - We found new_best the same as new_changed, therefore it cannot
654
   *   be old_best and we have to continue search for old_best.
655
   */
656

    
657
  /* First case */
658
  if (old_meet)
659
    if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
660
      goto found;
661

    
662
  /* Second case */
663
  if (!new_best)
664
    return;
665

    
666
  /* Third case, we use r instead of new_best, because export_filter() could change it */
667
  if (r != new_changed)
668
    {
669
      if (new_free)
670
        rte_free(new_free);
671
      return;
672
    }
673

    
674
  /* Fourth case */
675
  for (r=r->next; rte_is_valid(r); r=r->next)
676
    {
677
      if (old_best = export_filter(c, r, &old_free, NULL, 1))
678
        goto found;
679

    
680
      if (r == before_old)
681
        if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
682
          goto found;
683
    }
684

    
685
  /* Implicitly, old_best is NULL and new_best is non-NULL */
686

    
687
 found:
688
  do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
689

    
690
  /* Discard temporary rte's */
691
  if (new_free)
692
    rte_free(new_free);
693
  if (old_free)
694
    rte_free(old_free);
695
}
696

    
697

    
698
static struct nexthop *
699
nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
700
{
701
  return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
702
}
703

    
704
rte *
705
rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
706
{
707
  // struct proto *p = c->proto;
708
  struct nexthop *nhs = NULL;
709
  rte *best0, *best, *rt0, *rt, *tmp;
710

    
711
  best0 = net->routes;
712
  *rt_free = NULL;
713

    
714
  if (!rte_is_valid(best0))
715
    return NULL;
716

    
717
  best = export_filter_(c, best0, rt_free, tmpa, pool, silent);
718

    
719
  if (!best || !rte_is_reachable(best))
720
    return best;
721

    
722
  for (rt0 = best0->next; rt0; rt0 = rt0->next)
723
  {
724
    if (!rte_mergable(best0, rt0))
725
      continue;
726

    
727
    rt = export_filter_(c, rt0, &tmp, NULL, pool, 1);
728

    
729
    if (!rt)
730
      continue;
731

    
732
    if (rte_is_reachable(rt))
733
      nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
734

    
735
    if (tmp)
736
      rte_free(tmp);
737
  }
738

    
739
  if (nhs)
740
  {
741
    nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
742

    
743
    if (nhs->next)
744
    {
745
      best = rte_cow_rta(best, pool);
746
      nexthop_link(best->attrs, nhs);
747
    }
748
  }
749

    
750
  if (best != best0)
751
    *rt_free = best;
752

    
753
  return best;
754
}
755

    
756

    
757
static void
758
rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
759
                 rte *new_best, rte*old_best, int refeed)
760
{
761
  // struct proto *p = c->proto;
762

    
763
  rte *new_best_free = NULL;
764
  rte *old_best_free = NULL;
765
  rte *new_changed_free = NULL;
766
  rte *old_changed_free = NULL;
767
  ea_list *tmpa = NULL;
768

    
769
  /* We assume that all rte arguments are either NULL or rte_is_valid() */
770

    
771
  /* This check should be done by the caller */
772
  if (!new_best && !old_best)
773
    return;
774

    
775
  /* Check whether the change is relevant to the merged route */
776
  if ((new_best == old_best) && !refeed)
777
  {
778
    new_changed = rte_mergable(new_best, new_changed) ?
779
      export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
780

    
781
    old_changed = rte_mergable(old_best, old_changed) ?
782
      export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
783

    
784
    if (!new_changed && !old_changed)
785
      return;
786
  }
787

    
788
  if (new_best)
789
    c->stats.exp_updates_received++;
790
  else
791
    c->stats.exp_withdraws_received++;
792

    
793
  /* Prepare new merged route */
794
  if (new_best)
795
    new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0);
796

    
797
  /* Prepare old merged route (without proper merged next hops) */
798
  /* There are some issues with running filter on old route - see rt_notify_basic() */
799
  if (old_best && !refeed)
800
    old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
801

    
802
  if (new_best || old_best)
803
    do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
804

    
805
  /* Discard temporary rte's */
806
  if (new_best_free)
807
    rte_free(new_best_free);
808
  if (old_best_free)
809
    rte_free(old_best_free);
810
  if (new_changed_free)
811
    rte_free(new_changed_free);
812
  if (old_changed_free)
813
    rte_free(old_changed_free);
814
}
815

    
816

    
817
/**
818
 * rte_announce - announce a routing table change
819
 * @tab: table the route has been added to
820
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
821
 * @net: network in question
822
 * @new: the new route to be announced
823
 * @old: the previous route for the same network
824
 * @new_best: the new best route for the same network
825
 * @old_best: the previous best route for the same network
826
 * @before_old: The previous route before @old for the same network.
827
 *                 If @before_old is NULL @old was the first.
828
 *
829
 * This function gets a routing table update and announces it
830
 * to all protocols that acccepts given type of route announcement
831
 * and are connected to the same table by their announcement hooks.
832
 *
833
 * Route announcement of type %RA_OPTIMAL si generated when optimal
834
 * route (in routing table @tab) changes. In that case @old stores the
835
 * old optimal route.
836
 *
837
 * Route announcement of type %RA_ANY si generated when any route (in
838
 * routing table @tab) changes In that case @old stores the old route
839
 * from the same protocol.
840
 *
841
 * For each appropriate protocol, we first call its import_control()
842
 * hook which performs basic checks on the route (each protocol has a
843
 * right to veto or force accept of the route before any filter is
844
 * asked) and adds default values of attributes specific to the new
845
 * protocol (metrics, tags etc.).  Then it consults the protocol's
846
 * export filter and if it accepts the route, the rt_notify() hook of
847
 * the protocol gets called.
848
 */
849
static void
850
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
851
             rte *new_best, rte *old_best, rte *before_old)
852
{
853
  if (!rte_is_valid(new))
854
    new = NULL;
855

    
856
  if (!rte_is_valid(old))
857
    old = before_old = NULL;
858

    
859
  if (!rte_is_valid(new_best))
860
    new_best = NULL;
861

    
862
  if (!rte_is_valid(old_best))
863
    old_best = NULL;
864

    
865
  if (!old && !new)
866
    return;
867

    
868
  if ((type == RA_OPTIMAL) && tab->hostcache)
869
    rt_notify_hostcache(tab, net);
870

    
871
  struct channel *c; node *n;
872
  WALK_LIST2(c, n, tab->channels, table_node)
873
    {
874
      if (c->export_state == ES_DOWN)
875
        continue;
876

    
877
      if (c->ra_mode == type)
878
        if (type == RA_ACCEPTED)
879
          rt_notify_accepted(c, net, new, old, before_old, 0);
880
        else if (type == RA_MERGED)
881
          rt_notify_merged(c, net, new, old, new_best, old_best, 0);
882
        else
883
          rt_notify_basic(c, net, new, old, 0);
884
    }
885
}
886

    
887
static inline int
888
rte_validate(rte *e)
889
{
890
  int c;
891
  net *n = e->net;
892

    
893
  if (!net_validate(n->n.addr))
894
  {
895
    log(L_WARN "Ignoring bogus prefix %N received via %s",
896
        n->n.addr, e->sender->proto->name);
897
    return 0;
898
  }
899

    
900
  c = net_classify(n->n.addr);
901
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
902
  {
903
    log(L_WARN "Ignoring bogus route %N received via %s",
904
        n->n.addr, e->sender->proto->name);
905
    return 0;
906
  }
907

    
908
  if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
909
  {
910
    log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
911
        n->n.addr, e->attrs->dest, e->sender->proto->name);
912
    return 0;
913
  }
914

    
915
  if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
916
  {
917
    log(L_WARN "Ignoring unsorted multipath route %N received via %s",
918
        n->n.addr, e->sender->proto->name);
919
    return 0;
920
  }
921

    
922
  return 1;
923
}
924

    
925
/**
926
 * rte_free - delete a &rte
927
 * @e: &rte to be deleted
928
 *
929
 * rte_free() deletes the given &rte from the routing table it's linked to.
930
 */
931
void
932
rte_free(rte *e)
933
{
934
  if (rta_is_cached(e->attrs))
935
    rta_free(e->attrs);
936
  sl_free(rte_slab, e);
937
}
938

    
939
static inline void
940
rte_free_quick(rte *e)
941
{
942
  rta_free(e->attrs);
943
  sl_free(rte_slab, e);
944
}
945

    
946
static int
947
rte_same(rte *x, rte *y)
948
{
949
  return
950
    x->attrs == y->attrs &&
951
    x->flags == y->flags &&
952
    x->pflags == y->pflags &&
953
    x->pref == y->pref &&
954
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
955
}
956

    
957
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
958

    
959
static void
960
rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
961
{
962
  struct proto *p = c->proto;
963
  struct rtable *table = c->table;
964
  struct proto_stats *stats = &c->stats;
965
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
966
  rte *before_old = NULL;
967
  rte *old_best = net->routes;
968
  rte *old = NULL;
969
  rte **k;
970

    
971
  k = &net->routes;                        /* Find and remove original route from the same protocol */
972
  while (old = *k)
973
    {
974
      if (old->attrs->src == src)
975
        {
976
          /* If there is the same route in the routing table but from
977
           * a different sender, then there are two paths from the
978
           * source protocol to this routing table through transparent
979
           * pipes, which is not allowed.
980
           *
981
           * We log that and ignore the route. If it is withdraw, we
982
           * ignore it completely (there might be 'spurious withdraws',
983
           * see FIXME in do_rte_announce())
984
           */
985
          if (old->sender->proto != p)
986
            {
987
              if (new)
988
                {
989
                  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
990
                      net->n.addr, table->name);
991
                  rte_free_quick(new);
992
                }
993
              return;
994
            }
995

    
996
          if (new && rte_same(old, new))
997
            {
998
              /* No changes, ignore the new route */
999

    
1000
              if (!rte_is_filtered(new))
1001
                {
1002
                  stats->imp_updates_ignored++;
1003
                  rte_trace_in(D_ROUTES, p, new, "ignored");
1004
                }
1005

    
1006
              rte_free_quick(new);
1007
              return;
1008
            }
1009
          *k = old->next;
1010
          break;
1011
        }
1012
      k = &old->next;
1013
      before_old = old;
1014
    }
1015

    
1016
  if (!old)
1017
    before_old = NULL;
1018

    
1019
  if (!old && !new)
1020
    {
1021
      stats->imp_withdraws_ignored++;
1022
      return;
1023
    }
1024

    
1025
  int new_ok = rte_is_ok(new);
1026
  int old_ok = rte_is_ok(old);
1027

    
1028
  struct channel_limit *l = &c->rx_limit;
1029
  if (l->action && !old && new)
1030
    {
1031
      u32 all_routes = stats->imp_routes + stats->filt_routes;
1032

    
1033
      if (all_routes >= l->limit)
1034
        channel_notify_limit(c, l, PLD_RX, all_routes);
1035

    
1036
      if (l->state == PLS_BLOCKED)
1037
        {
1038
          /* In receive limit the situation is simple, old is NULL so
1039
             we just free new and exit like nothing happened */
1040

    
1041
          stats->imp_updates_ignored++;
1042
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1043
          rte_free_quick(new);
1044
          return;
1045
        }
1046
    }
1047

    
1048
  l = &c->in_limit;
1049
  if (l->action && !old_ok && new_ok)
1050
    {
1051
      if (stats->imp_routes >= l->limit)
1052
        channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1053

    
1054
      if (l->state == PLS_BLOCKED)
1055
        {
1056
          /* In import limit the situation is more complicated. We
1057
             shouldn't just drop the route, we should handle it like
1058
             it was filtered. We also have to continue the route
1059
             processing if old or new is non-NULL, but we should exit
1060
             if both are NULL as this case is probably assumed to be
1061
             already handled. */
1062

    
1063
          stats->imp_updates_ignored++;
1064
          rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1065

    
1066
          if (c->in_keep_filtered)
1067
            new->flags |= REF_FILTERED;
1068
          else
1069
            { rte_free_quick(new); new = NULL; }
1070

    
1071
          /* Note that old && !new could be possible when
1072
             c->in_keep_filtered changed in the recent past. */
1073

    
1074
          if (!old && !new)
1075
            return;
1076

    
1077
          new_ok = 0;
1078
          goto skip_stats1;
1079
        }
1080
    }
1081

    
1082
  if (new_ok)
1083
    stats->imp_updates_accepted++;
1084
  else if (old_ok)
1085
    stats->imp_withdraws_accepted++;
1086
  else
1087
    stats->imp_withdraws_ignored++;
1088

    
1089
 skip_stats1:
1090

    
1091
  if (new)
1092
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1093
  if (old)
1094
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1095

    
1096
  if (table->config->sorted)
1097
    {
1098
      /* If routes are sorted, just insert new route to appropriate position */
1099
      if (new)
1100
        {
1101
          if (before_old && !rte_better(new, before_old))
1102
            k = &before_old->next;
1103
          else
1104
            k = &net->routes;
1105

    
1106
          for (; *k; k=&(*k)->next)
1107
            if (rte_better(new, *k))
1108
              break;
1109

    
1110
          new->next = *k;
1111
          *k = new;
1112
        }
1113
    }
1114
  else
1115
    {
1116
      /* If routes are not sorted, find the best route and move it on
1117
         the first position. There are several optimized cases. */
1118

    
1119
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1120
        goto do_recalculate;
1121

    
1122
      if (new && rte_better(new, old_best))
1123
        {
1124
          /* The first case - the new route is cleary optimal,
1125
             we link it at the first position */
1126

    
1127
          new->next = net->routes;
1128
          net->routes = new;
1129
        }
1130
      else if (old == old_best)
1131
        {
1132
          /* The second case - the old best route disappeared, we add the
1133
             new route (if we have any) to the list (we don't care about
1134
             position) and then we elect the new optimal route and relink
1135
             that route at the first position and announce it. New optimal
1136
             route might be NULL if there is no more routes */
1137

    
1138
        do_recalculate:
1139
          /* Add the new route to the list */
1140
          if (new)
1141
            {
1142
              new->next = net->routes;
1143
              net->routes = new;
1144
            }
1145

    
1146
          /* Find a new optimal route (if there is any) */
1147
          if (net->routes)
1148
            {
1149
              rte **bp = &net->routes;
1150
              for (k=&(*bp)->next; *k; k=&(*k)->next)
1151
                if (rte_better(*k, *bp))
1152
                  bp = k;
1153

    
1154
              /* And relink it */
1155
              rte *best = *bp;
1156
              *bp = best->next;
1157
              best->next = net->routes;
1158
              net->routes = best;
1159
            }
1160
        }
1161
      else if (new)
1162
        {
1163
          /* The third case - the new route is not better than the old
1164
             best route (therefore old_best != NULL) and the old best
1165
             route was not removed (therefore old_best == net->routes).
1166
             We just link the new route after the old best route. */
1167

    
1168
          ASSERT(net->routes != NULL);
1169
          new->next = net->routes->next;
1170
          net->routes->next = new;
1171
        }
1172
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1173
    }
1174

    
1175
  if (new)
1176
    new->lastmod = now;
1177

    
1178
  /* Log the route change */
1179
  if (p->debug & D_ROUTES)
1180
    {
1181
      if (new_ok)
1182
        rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1183
      else if (old_ok)
1184
        {
1185
          if (old != old_best)
1186
            rte_trace(p, old, '>', "removed");
1187
          else if (rte_is_ok(net->routes))
1188
            rte_trace(p, old, '>', "removed [replaced]");
1189
          else
1190
            rte_trace(p, old, '>', "removed [sole]");
1191
        }
1192
    }
1193

    
1194
  /* Propagate the route change */
1195
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1196
  if (net->routes != old_best)
1197
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1198
  if (table->config->sorted)
1199
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1200
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1201

    
1202
  if (!net->routes &&
1203
      (table->gc_counter++ >= table->config->gc_max_ops) &&
1204
      (table->gc_time + table->config->gc_min_time <= now))
1205
    rt_schedule_prune(table);
1206

    
1207
  if (old_ok && p->rte_remove)
1208
    p->rte_remove(net, old);
1209
  if (new_ok && p->rte_insert)
1210
    p->rte_insert(net, new);
1211

    
1212
  if (old)
1213
    rte_free_quick(old);
1214
}
1215

    
1216
static int rte_update_nest_cnt;                /* Nesting counter to allow recursive updates */
1217

    
1218
static inline void
1219
rte_update_lock(void)
1220
{
1221
  rte_update_nest_cnt++;
1222
}
1223

    
1224
static inline void
1225
rte_update_unlock(void)
1226
{
1227
  if (!--rte_update_nest_cnt)
1228
    lp_flush(rte_update_pool);
1229
}
1230

    
1231
static inline void
1232
rte_hide_dummy_routes(net *net, rte **dummy)
1233
{
1234
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1235
  {
1236
    *dummy = net->routes;
1237
    net->routes = (*dummy)->next;
1238
  }
1239
}
1240

    
1241
static inline void
1242
rte_unhide_dummy_routes(net *net, rte **dummy)
1243
{
1244
  if (*dummy)
1245
  {
1246
    (*dummy)->next = net->routes;
1247
    net->routes = *dummy;
1248
  }
1249
}
1250

    
1251
/**
1252
 * rte_update - enter a new update to a routing table
1253
 * @table: table to be updated
1254
 * @c: channel doing the update
1255
 * @net: network node
1256
 * @p: protocol submitting the update
1257
 * @src: protocol originating the update
1258
 * @new: a &rte representing the new route or %NULL for route removal.
1259
 *
1260
 * This function is called by the routing protocols whenever they discover
1261
 * a new route or wish to update/remove an existing route. The right announcement
1262
 * sequence is to build route attributes first (either un-cached with @aflags set
1263
 * to zero or a cached one using rta_lookup(); in this case please note that
1264
 * you need to increase the use count of the attributes yourself by calling
1265
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1266
 * the appropriate data and finally submit the new &rte by calling rte_update().
1267
 *
1268
 * @src specifies the protocol that originally created the route and the meaning
1269
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1270
 * same value as @new->attrs->proto. @p specifies the protocol that called
1271
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1272
 * stores @p in @new->sender;
1273
 *
1274
 * When rte_update() gets any route, it automatically validates it (checks,
1275
 * whether the network and next hop address are valid IP addresses and also
1276
 * whether a normal routing protocol doesn't try to smuggle a host or link
1277
 * scope route to the table), converts all protocol dependent attributes stored
1278
 * in the &rte to temporary extended attributes, consults import filters of the
1279
 * protocol to see if the route should be accepted and/or its attributes modified,
1280
 * stores the temporary attributes back to the &rte.
1281
 *
1282
 * Now, having a "public" version of the route, we
1283
 * automatically find any old route defined by the protocol @src
1284
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1285
 * recalculate the optimal route for this destination and finally broadcast
1286
 * the change (if any) to all routing protocols by calling rte_announce().
1287
 *
1288
 * All memory used for attribute lists and other temporary allocations is taken
1289
 * from a special linear pool @rte_update_pool and freed when rte_update()
1290
 * finishes.
1291
 */
1292

    
1293
void
1294
rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1295
{
1296
  struct proto *p = c->proto;
1297
  struct proto_stats *stats = &c->stats;
1298
  struct filter *filter = c->in_filter;
1299
  ea_list *tmpa = NULL;
1300
  rte *dummy = NULL;
1301
  net *nn;
1302

    
1303
  ASSERT(c->channel_state == CS_UP);
1304

    
1305
  rte_update_lock();
1306
  if (new)
1307
    {
1308
      nn = net_get(c->table, n);
1309

    
1310
      new->net = nn;
1311
      new->sender = c;
1312

    
1313
      if (!new->pref)
1314
        new->pref = c->preference;
1315

    
1316
      stats->imp_updates_received++;
1317
      if (!rte_validate(new))
1318
        {
1319
          rte_trace_in(D_FILTERS, p, new, "invalid");
1320
          stats->imp_updates_invalid++;
1321
          goto drop;
1322
        }
1323

    
1324
      if (filter == FILTER_REJECT)
1325
        {
1326
          stats->imp_updates_filtered++;
1327
          rte_trace_in(D_FILTERS, p, new, "filtered out");
1328

    
1329
          if (! c->in_keep_filtered)
1330
            goto drop;
1331

    
1332
          /* new is a private copy, i could modify it */
1333
          new->flags |= REF_FILTERED;
1334
        }
1335
      else
1336
        {
1337
          tmpa = rte_make_tmp_attrs(new, rte_update_pool);
1338
          if (filter && (filter != FILTER_REJECT))
1339
            {
1340
              ea_list *old_tmpa = tmpa;
1341
              int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1342
              if (fr > F_ACCEPT)
1343
                {
1344
                  stats->imp_updates_filtered++;
1345
                  rte_trace_in(D_FILTERS, p, new, "filtered out");
1346

    
1347
                  if (! c->in_keep_filtered)
1348
                    goto drop;
1349

    
1350
                  new->flags |= REF_FILTERED;
1351
                }
1352
              if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1353
                src->proto->store_tmp_attrs(new, tmpa);
1354
            }
1355
        }
1356
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1357
        new->attrs = rta_lookup(new->attrs);
1358
      new->flags |= REF_COW;
1359
    }
1360
  else
1361
    {
1362
      stats->imp_withdraws_received++;
1363

    
1364
      if (!(nn = net_find(c->table, n)) || !src)
1365
        {
1366
          stats->imp_withdraws_ignored++;
1367
          rte_update_unlock();
1368
          return;
1369
        }
1370
    }
1371

    
1372
 recalc:
1373
  rte_hide_dummy_routes(nn, &dummy);
1374
  rte_recalculate(c, nn, new, src);
1375
  rte_unhide_dummy_routes(nn, &dummy);
1376
  rte_update_unlock();
1377
  return;
1378

    
1379
 drop:
1380
  rte_free(new);
1381
  new = NULL;
1382
  goto recalc;
1383
}
1384

    
1385
/* Independent call to rte_announce(), used from next hop
1386
   recalculation, outside of rte_update(). new must be non-NULL */
1387
static inline void 
1388
rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1389
               rte *new_best, rte *old_best)
1390
{
1391
  rte_update_lock();
1392
  rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1393
  rte_update_unlock();
1394
}
1395

    
1396
static inline void
1397
rte_discard(rte *old)        /* Non-filtered route deletion, used during garbage collection */
1398
{
1399
  rte_update_lock();
1400
  rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1401
  rte_update_unlock();
1402
}
1403

    
1404
/* Check rtable for best route to given net whether it would be exported do p */
1405
int
1406
rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1407
{
1408
  net *n = net_find(t, a);
1409
  rte *rt = n ? n->routes : NULL;
1410

    
1411
  if (!rte_is_valid(rt))
1412
    return 0;
1413

    
1414
  rte_update_lock();
1415

    
1416
  /* Rest is stripped down export_filter() */
1417
  ea_list *tmpa = rte_make_tmp_attrs(rt, rte_update_pool);
1418
  int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1419
  if (v == RIC_PROCESS)
1420
    v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1421

    
1422
   /* Discard temporary rte */
1423
  if (rt != n->routes)
1424
    rte_free(rt);
1425

    
1426
  rte_update_unlock();
1427

    
1428
  return v > 0;
1429
}
1430

    
1431

    
1432
/**
1433
 * rt_refresh_begin - start a refresh cycle
1434
 * @t: related routing table
1435
 * @c related channel
1436
 *
1437
 * This function starts a refresh cycle for given routing table and announce
1438
 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1439
 * routes to the routing table (by rte_update()). After that, all protocol
1440
 * routes (more precisely routes with @c as @sender) not sent during the
1441
 * refresh cycle but still in the table from the past are pruned. This is
1442
 * implemented by marking all related routes as stale by REF_STALE flag in
1443
 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1444
 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1445
 */
1446
void
1447
rt_refresh_begin(rtable *t, struct channel *c)
1448
{
1449
  FIB_WALK(&t->fib, net, n)
1450
    {
1451
      rte *e;
1452
      for (e = n->routes; e; e = e->next)
1453
        if (e->sender == c)
1454
          e->flags |= REF_STALE;
1455
    }
1456
  FIB_WALK_END;
1457
}
1458

    
1459
/**
1460
 * rt_refresh_end - end a refresh cycle
1461
 * @t: related routing table
1462
 * @c: related channel
1463
 *
1464
 * This function ends a refresh cycle for given routing table and announce
1465
 * hook. See rt_refresh_begin() for description of refresh cycles.
1466
 */
1467
void
1468
rt_refresh_end(rtable *t, struct channel *c)
1469
{
1470
  int prune = 0;
1471

    
1472
  FIB_WALK(&t->fib, net, n)
1473
    {
1474
      rte *e;
1475
      for (e = n->routes; e; e = e->next)
1476
        if ((e->sender == c) && (e->flags & REF_STALE))
1477
          {
1478
            e->flags |= REF_DISCARD;
1479
            prune = 1;
1480
          }
1481
    }
1482
  FIB_WALK_END;
1483

    
1484
  if (prune)
1485
    rt_schedule_prune(t);
1486
}
1487

    
1488

    
1489
/**
1490
 * rte_dump - dump a route
1491
 * @e: &rte to be dumped
1492
 *
1493
 * This functions dumps contents of a &rte to debug output.
1494
 */
1495
void
1496
rte_dump(rte *e)
1497
{
1498
  net *n = e->net;
1499
  debug("%-1N ", n->n.addr);
1500
  debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1501
  rta_dump(e->attrs);
1502
  if (e->attrs->src->proto->proto->dump_attrs)
1503
    e->attrs->src->proto->proto->dump_attrs(e);
1504
  debug("\n");
1505
}
1506

    
1507
/**
1508
 * rt_dump - dump a routing table
1509
 * @t: routing table to be dumped
1510
 *
1511
 * This function dumps contents of a given routing table to debug output.
1512
 */
1513
void
1514
rt_dump(rtable *t)
1515
{
1516
  debug("Dump of routing table <%s>\n", t->name);
1517
#ifdef DEBUGGING
1518
  fib_check(&t->fib);
1519
#endif
1520
  FIB_WALK(&t->fib, net, n)
1521
    {
1522
      rte *e;
1523
      for(e=n->routes; e; e=e->next)
1524
        rte_dump(e);
1525
    }
1526
  FIB_WALK_END;
1527
  debug("\n");
1528
}
1529

    
1530
/**
1531
 * rt_dump_all - dump all routing tables
1532
 *
1533
 * This function dumps contents of all routing tables to debug output.
1534
 */
1535
void
1536
rt_dump_all(void)
1537
{
1538
  rtable *t;
1539

    
1540
  WALK_LIST(t, routing_tables)
1541
    rt_dump(t);
1542
}
1543

    
1544
static inline void
1545
rt_schedule_hcu(rtable *tab)
1546
{
1547
  if (tab->hcu_scheduled)
1548
    return;
1549

    
1550
  tab->hcu_scheduled = 1;
1551
  ev_schedule(tab->rt_event);
1552
}
1553

    
1554
static inline void
1555
rt_schedule_nhu(rtable *tab)
1556
{
1557
  if (tab->nhu_state == NHU_CLEAN)
1558
    ev_schedule(tab->rt_event);
1559

    
1560
  /* state change:
1561
   *   NHU_CLEAN   -> NHU_SCHEDULED
1562
   *   NHU_RUNNING -> NHU_DIRTY
1563
   */
1564
  tab->nhu_state |= NHU_SCHEDULED;
1565
}
1566

    
1567
void
1568
rt_schedule_prune(rtable *tab)
1569
{
1570
  if (tab->prune_state == 0)
1571
    ev_schedule(tab->rt_event);
1572

    
1573
  /* state change 0->1, 2->3 */
1574
  tab->prune_state |= 1;
1575
}
1576

    
1577

    
1578
static void
1579
rt_event(void *ptr)
1580
{
1581
  rtable *tab = ptr;
1582

    
1583
  rt_lock_table(tab);
1584

    
1585
  if (tab->hcu_scheduled)
1586
    rt_update_hostcache(tab);
1587

    
1588
  if (tab->nhu_state)
1589
    rt_next_hop_update(tab);
1590

    
1591
  if (tab->prune_state)
1592
    rt_prune_table(tab);
1593

    
1594
  rt_unlock_table(tab);
1595
}
1596

    
1597
void
1598
rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1599
{
1600
  bzero(t, sizeof(*t));
1601
  t->name = name;
1602
  t->config = cf;
1603
  t->addr_type = cf ? cf->addr_type : NET_IP4;
1604
  fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1605
  init_list(&t->channels);
1606

    
1607
  if (cf)
1608
    {
1609
      t->rt_event = ev_new(p);
1610
      t->rt_event->hook = rt_event;
1611
      t->rt_event->data = t;
1612
      t->gc_time = now;
1613
    }
1614
}
1615

    
1616
/**
1617
 * rt_init - initialize routing tables
1618
 *
1619
 * This function is called during BIRD startup. It initializes the
1620
 * routing table module.
1621
 */
1622
void
1623
rt_init(void)
1624
{
1625
  rta_init();
1626
  rt_table_pool = rp_new(&root_pool, "Routing tables");
1627
  rte_update_pool = lp_new(rt_table_pool, 4080);
1628
  rte_slab = sl_new(rt_table_pool, sizeof(rte));
1629
  init_list(&routing_tables);
1630
}
1631

    
1632

    
1633
/**
1634
 * rt_prune_table - prune a routing table
1635
 *
1636
 * The prune loop scans routing tables and removes routes belonging to flushing
1637
 * protocols, discarded routes and also stale network entries. It is called from
1638
 * rt_event(). The event is rescheduled if the current iteration do not finish
1639
 * the table. The pruning is directed by the prune state (@prune_state),
1640
 * specifying whether the prune cycle is scheduled or running, and there
1641
 * is also a persistent pruning iterator (@prune_fit).
1642
 *
1643
 * The prune loop is used also for channel flushing. For this purpose, the
1644
 * channels to flush are marked before the iteration and notified after the
1645
 * iteration.
1646
 */
1647
static void
1648
rt_prune_table(rtable *tab)
1649
{
1650
  struct fib_iterator *fit = &tab->prune_fit;
1651
  int limit = 512;
1652

    
1653
  struct channel *c;
1654
  node *n, *x;
1655

    
1656
  DBG("Pruning route table %s\n", tab->name);
1657
#ifdef DEBUGGING
1658
  fib_check(&tab->fib);
1659
#endif
1660

    
1661
  if (tab->prune_state == 0)
1662
    return;
1663

    
1664
  if (tab->prune_state == 1)
1665
  {
1666
    /* Mark channels to flush */
1667
    WALK_LIST2(c, n, tab->channels, table_node)
1668
      if (c->channel_state == CS_FLUSHING)
1669
        c->flush_active = 1;
1670

    
1671
    FIB_ITERATE_INIT(fit, &tab->fib);
1672
    tab->prune_state = 2;
1673
  }
1674

    
1675
again:
1676
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1677
    {
1678
      rte *e;
1679

    
1680
    rescan:
1681
      for (e=n->routes; e; e=e->next)
1682
        if (e->sender->flush_active || (e->flags & REF_DISCARD))
1683
          {
1684
            if (limit <= 0)
1685
              {
1686
                FIB_ITERATE_PUT(fit);
1687
                ev_schedule(tab->rt_event);
1688
                return;
1689
              }
1690

    
1691
            rte_discard(e);
1692
            limit--;
1693

    
1694
            goto rescan;
1695
          }
1696

    
1697
      if (!n->routes)                /* Orphaned FIB entry */
1698
        {
1699
          FIB_ITERATE_PUT(fit);
1700
          fib_delete(&tab->fib, n);
1701
          goto again;
1702
        }
1703
    }
1704
  FIB_ITERATE_END;
1705

    
1706
#ifdef DEBUGGING
1707
  fib_check(&tab->fib);
1708
#endif
1709

    
1710
  tab->gc_counter = 0;
1711
  tab->gc_time = now;
1712

    
1713
  /* state change 2->0, 3->1 */
1714
  tab->prune_state &= 1;
1715

    
1716
  if (tab->prune_state > 0)
1717
    ev_schedule(tab->rt_event);
1718

    
1719
  /* FIXME: This should be handled in a better way */
1720
  rt_prune_sources();
1721

    
1722
  /* Close flushed channels */
1723
  WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1724
    if (c->flush_active)
1725
      {
1726
        c->flush_active = 0;
1727
        channel_set_state(c, CS_DOWN);
1728
      }
1729

    
1730
  return;
1731
}
1732

    
1733
void
1734
rt_preconfig(struct config *c)
1735
{
1736
  init_list(&c->tables);
1737

    
1738
  rt_new_table(cf_get_symbol("master4"), NET_IP4);
1739
  rt_new_table(cf_get_symbol("master6"), NET_IP6);
1740
}
1741

    
1742

    
1743
/*
1744
 * Some functions for handing internal next hop updates
1745
 * triggered by rt_schedule_nhu().
1746
 */
1747

    
1748
static inline int
1749
rta_next_hop_outdated(rta *a)
1750
{
1751
  struct hostentry *he = a->hostentry;
1752

    
1753
  if (!he)
1754
    return 0;
1755

    
1756
  if (!he->src)
1757
    return a->dest != RTD_UNREACHABLE;
1758

    
1759
  return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1760
    (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
1761
}
1762

    
1763
void
1764
rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
1765
{
1766
  a->hostentry = he;
1767
  a->dest = he->dest;
1768
  a->igp_metric = he->igp_metric;
1769

    
1770
  if (a->dest != RTD_UNICAST)
1771
  {
1772
    /* No nexthop */
1773
no_nexthop:
1774
    a->nh = (struct nexthop) {};
1775
    if (mls)
1776
    { /* Store the label stack for later changes */
1777
      a->nh.labels_orig = a->nh.labels = mls->len;
1778
      memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1779
    }
1780
    return;
1781
  }
1782

    
1783
  if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1784
  { /* Just link the nexthop chain, no label append happens. */
1785
    memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1786
    return;
1787
  }
1788

    
1789
  struct nexthop *nhp = NULL, *nhr = NULL;
1790
  int skip_nexthop = 0;
1791

    
1792
  for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
1793
  {
1794
    if (skip_nexthop)
1795
      skip_nexthop--;
1796
    else
1797
    {
1798
      nhr = nhp;
1799
      nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
1800
    }
1801

    
1802
    nhp->iface = nh->iface;
1803
    nhp->weight = nh->weight;
1804
    if (mls)
1805
    {
1806
      nhp->labels = nh->labels + mls->len;
1807
      nhp->labels_orig = mls->len;
1808
      if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1809
      {
1810
        memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
1811
        memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
1812
      }
1813
      else
1814
      {
1815
        log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1816
            nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
1817
        skip_nexthop++;
1818
        continue;
1819
      }
1820
    }
1821
    if (ipa_nonzero(nh->gw))
1822
      nhp->gw = nh->gw;                /* Router nexthop */
1823
    else if (ipa_nonzero(he->link))
1824
      nhp->gw = he->link;                /* Device nexthop with link-local address known */
1825
    else
1826
      nhp->gw = he->addr;                /* Device nexthop with link-local address unknown */
1827
  }
1828

    
1829
  if (skip_nexthop)
1830
    if (nhr)
1831
      nhr->next = NULL;
1832
    else
1833
    {
1834
      a->dest = RTD_UNREACHABLE;
1835
      log(L_WARN "No valid nexthop remaining, setting route unreachable");
1836
      goto no_nexthop;
1837
    }
1838
}
1839

    
1840
static inline rte *
1841
rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1842
{
1843
  rta *a = alloca(RTA_MAX_SIZE);
1844
  memcpy(a, old->attrs, rta_size(old->attrs));
1845

    
1846
  mpls_label_stack mls = { .len = a->nh.labels_orig };
1847
  memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
1848

    
1849
  rta_apply_hostentry(a, old->attrs->hostentry, &mls);
1850
  a->aflags = 0;
1851

    
1852
  rte *e = sl_alloc(rte_slab);
1853
  memcpy(e, old, sizeof(rte));
1854
  e->attrs = rta_lookup(a);
1855

    
1856
  return e;
1857
}
1858

    
1859
static inline int
1860
rt_next_hop_update_net(rtable *tab, net *n)
1861
{
1862
  rte **k, *e, *new, *old_best, **new_best;
1863
  int count = 0;
1864
  int free_old_best = 0;
1865

    
1866
  old_best = n->routes;
1867
  if (!old_best)
1868
    return 0;
1869

    
1870
  for (k = &n->routes; e = *k; k = &e->next)
1871
    if (rta_next_hop_outdated(e->attrs))
1872
      {
1873
        new = rt_next_hop_update_rte(tab, e);
1874
        *k = new;
1875

    
1876
        rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1877
        rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1878

    
1879
        /* Call a pre-comparison hook */
1880
        /* Not really an efficient way to compute this */
1881
        if (e->attrs->src->proto->rte_recalculate)
1882
          e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1883

    
1884
        if (e != old_best)
1885
          rte_free_quick(e);
1886
        else /* Freeing of the old best rte is postponed */
1887
          free_old_best = 1;
1888

    
1889
        e = new;
1890
        count++;
1891
      }
1892

    
1893
  if (!count)
1894
    return 0;
1895

    
1896
  /* Find the new best route */
1897
  new_best = NULL;
1898
  for (k = &n->routes; e = *k; k = &e->next)
1899
    {
1900
      if (!new_best || rte_better(e, *new_best))
1901
        new_best = k;
1902
    }
1903

    
1904
  /* Relink the new best route to the first position */
1905
  new = *new_best;
1906
  if (new != n->routes)
1907
    {
1908
      *new_best = new->next;
1909
      new->next = n->routes;
1910
      n->routes = new;
1911
    }
1912

    
1913
  /* Announce the new best route */
1914
  if (new != old_best)
1915
    {
1916
      rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1917
      rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1918
    }
1919

    
1920
  /* FIXME: Better announcement of merged routes */
1921
  rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1922

    
1923
  if (free_old_best)
1924
    rte_free_quick(old_best);
1925

    
1926
  return count;
1927
}
1928

    
1929
static void
1930
rt_next_hop_update(rtable *tab)
1931
{
1932
  struct fib_iterator *fit = &tab->nhu_fit;
1933
  int max_feed = 32;
1934

    
1935
  if (tab->nhu_state == NHU_CLEAN)
1936
    return;
1937

    
1938
  if (tab->nhu_state == NHU_SCHEDULED)
1939
    {
1940
      FIB_ITERATE_INIT(fit, &tab->fib);
1941
      tab->nhu_state = NHU_RUNNING;
1942
    }
1943

    
1944
  FIB_ITERATE_START(&tab->fib, fit, net, n)
1945
    {
1946
      if (max_feed <= 0)
1947
        {
1948
          FIB_ITERATE_PUT(fit);
1949
          ev_schedule(tab->rt_event);
1950
          return;
1951
        }
1952
      max_feed -= rt_next_hop_update_net(tab, n);
1953
    }
1954
  FIB_ITERATE_END;
1955

    
1956
  /* State change:
1957
   *   NHU_DIRTY   -> NHU_SCHEDULED
1958
   *   NHU_RUNNING -> NHU_CLEAN
1959
   */
1960
  tab->nhu_state &= 1;
1961

    
1962
  if (tab->nhu_state != NHU_CLEAN)
1963
    ev_schedule(tab->rt_event);
1964
}
1965

    
1966

    
1967
struct rtable_config *
1968
rt_new_table(struct symbol *s, uint addr_type)
1969
{
1970
  /* Hack that allows to 'redefine' the master table */
1971
  if ((s->class == SYM_TABLE) &&
1972
      (s->def == new_config->def_tables[addr_type]) &&
1973
      ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1974
    return s->def;
1975

    
1976
  struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1977

    
1978
  cf_define_symbol(s, SYM_TABLE, c);
1979
  c->name = s->name;
1980
  c->addr_type = addr_type;
1981
  c->gc_max_ops = 1000;
1982
  c->gc_min_time = 5;
1983

    
1984
  add_tail(&new_config->tables, &c->n);
1985

    
1986
  /* First table of each type is kept as default */
1987
  if (! new_config->def_tables[addr_type])
1988
    new_config->def_tables[addr_type] = c;
1989

    
1990
  return c;
1991
}
1992

    
1993
/**
1994
 * rt_lock_table - lock a routing table
1995
 * @r: routing table to be locked
1996
 *
1997
 * Lock a routing table, because it's in use by a protocol,
1998
 * preventing it from being freed when it gets undefined in a new
1999
 * configuration.
2000
 */
2001
void
2002
rt_lock_table(rtable *r)
2003
{
2004
  r->use_count++;
2005
}
2006

    
2007
/**
2008
 * rt_unlock_table - unlock a routing table
2009
 * @r: routing table to be unlocked
2010
 *
2011
 * Unlock a routing table formerly locked by rt_lock_table(),
2012
 * that is decrease its use count and delete it if it's scheduled
2013
 * for deletion by configuration changes.
2014
 */
2015
void
2016
rt_unlock_table(rtable *r)
2017
{
2018
  if (!--r->use_count && r->deleted)
2019
    {
2020
      struct config *conf = r->deleted;
2021
      DBG("Deleting routing table %s\n", r->name);
2022
      r->config->table = NULL;
2023
      if (r->hostcache)
2024
        rt_free_hostcache(r);
2025
      rem_node(&r->n);
2026
      fib_free(&r->fib);
2027
      rfree(r->rt_event);
2028
      mb_free(r);
2029
      config_del_obstacle(conf);
2030
    }
2031
}
2032

    
2033
/**
2034
 * rt_commit - commit new routing table configuration
2035
 * @new: new configuration
2036
 * @old: original configuration or %NULL if it's boot time config
2037
 *
2038
 * Scan differences between @old and @new configuration and modify
2039
 * the routing tables according to these changes. If @new defines a
2040
 * previously unknown table, create it, if it omits a table existing
2041
 * in @old, schedule it for deletion (it gets deleted when all protocols
2042
 * disconnect from it by calling rt_unlock_table()), if it exists
2043
 * in both configurations, leave it unchanged.
2044
 */
2045
void
2046
rt_commit(struct config *new, struct config *old)
2047
{
2048
  struct rtable_config *o, *r;
2049

    
2050
  DBG("rt_commit:\n");
2051
  if (old)
2052
    {
2053
      WALK_LIST(o, old->tables)
2054
        {
2055
          rtable *ot = o->table;
2056
          if (!ot->deleted)
2057
            {
2058
              struct symbol *sym = cf_find_symbol(new, o->name);
2059
              if (sym && sym->class == SYM_TABLE && !new->shutdown)
2060
                {
2061
                  DBG("\t%s: same\n", o->name);
2062
                  r = sym->def;
2063
                  r->table = ot;
2064
                  ot->name = r->name;
2065
                  ot->config = r;
2066
                  if (o->sorted != r->sorted)
2067
                    log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2068
                }
2069
              else
2070
                {
2071
                  DBG("\t%s: deleted\n", o->name);
2072
                  ot->deleted = old;
2073
                  config_add_obstacle(old);
2074
                  rt_lock_table(ot);
2075
                  rt_unlock_table(ot);
2076
                }
2077
            }
2078
        }
2079
    }
2080

    
2081
  WALK_LIST(r, new->tables)
2082
    if (!r->table)
2083
      {
2084
        rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2085
        DBG("\t%s: created\n", r->name);
2086
        rt_setup(rt_table_pool, t, r->name, r);
2087
        add_tail(&routing_tables, &t->n);
2088
        r->table = t;
2089
      }
2090
  DBG("\tdone\n");
2091
}
2092

    
2093
static inline void
2094
do_feed_channel(struct channel *c, net *n, rte *e)
2095
{
2096
  rte_update_lock();
2097
  if (c->ra_mode == RA_ACCEPTED)
2098
    rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2099
  else if (c->ra_mode == RA_MERGED)
2100
    rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2101
  else /* RA_BASIC */
2102
    rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2103
  rte_update_unlock();
2104
}
2105

    
2106
/**
2107
 * rt_feed_channel - advertise all routes to a channel
2108
 * @c: channel to be fed
2109
 *
2110
 * This function performs one pass of advertisement of routes to a channel that
2111
 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2112
 * has something to do. (We avoid transferring all the routes in single pass in
2113
 * order not to monopolize CPU time.)
2114
 */
2115
int
2116
rt_feed_channel(struct channel *c)
2117
{
2118
  struct fib_iterator *fit = &c->feed_fit;
2119
  int max_feed = 256;
2120

    
2121
  ASSERT(c->export_state == ES_FEEDING);
2122

    
2123
  if (!c->feed_active)
2124
    {
2125
      FIB_ITERATE_INIT(fit, &c->table->fib);
2126
      c->feed_active = 1;
2127
    }
2128

    
2129
  FIB_ITERATE_START(&c->table->fib, fit, net, n)
2130
    {
2131
      rte *e = n->routes;
2132
      if (max_feed <= 0)
2133
        {
2134
          FIB_ITERATE_PUT(fit);
2135
          return 0;
2136
        }
2137

    
2138
      /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2139

    
2140
      if ((c->ra_mode == RA_OPTIMAL) ||
2141
          (c->ra_mode == RA_ACCEPTED) ||
2142
          (c->ra_mode == RA_MERGED))
2143
        if (rte_is_valid(e))
2144
          {
2145
            /* In the meantime, the protocol may fell down */
2146
            if (c->export_state != ES_FEEDING)
2147
              goto done;
2148

    
2149
            do_feed_channel(c, n, e);
2150
            max_feed--;
2151
          }
2152

    
2153
      if (c->ra_mode == RA_ANY)
2154
        for(e = n->routes; e; e = e->next)
2155
          {
2156
            /* In the meantime, the protocol may fell down */
2157
            if (c->export_state != ES_FEEDING)
2158
              goto done;
2159

    
2160
            if (!rte_is_valid(e))
2161
              continue;
2162

    
2163
            do_feed_channel(c, n, e);
2164
            max_feed--;
2165
          }
2166
    }
2167
  FIB_ITERATE_END;
2168

    
2169
done:
2170
  c->feed_active = 0;
2171
  return 1;
2172
}
2173

    
2174
/**
2175
 * rt_feed_baby_abort - abort protocol feeding
2176
 * @c: channel
2177
 *
2178
 * This function is called by the protocol code when the protocol stops or
2179
 * ceases to exist during the feeding.
2180
 */
2181
void
2182
rt_feed_channel_abort(struct channel *c)
2183
{
2184
  if (c->feed_active)
2185
    {
2186
      /* Unlink the iterator */
2187
      fit_get(&c->table->fib, &c->feed_fit);
2188
      c->feed_active = 0;
2189
    }
2190
}
2191

    
2192
static inline unsigned
2193
ptr_hash(void *ptr)
2194
{
2195
  uintptr_t p = (uintptr_t) ptr;
2196
  return p ^ (p << 8) ^ (p >> 16);
2197
}
2198

    
2199
static inline u32
2200
hc_hash(ip_addr a, rtable *dep)
2201
{
2202
  return ipa_hash(a) ^ ptr_hash(dep);
2203
}
2204

    
2205
static inline void
2206
hc_insert(struct hostcache *hc, struct hostentry *he)
2207
{
2208
  uint k = he->hash_key >> hc->hash_shift;
2209
  he->next = hc->hash_table[k];
2210
  hc->hash_table[k] = he;
2211
}
2212

    
2213
static inline void
2214
hc_remove(struct hostcache *hc, struct hostentry *he)
2215
{
2216
  struct hostentry **hep;
2217
  uint k = he->hash_key >> hc->hash_shift;
2218

    
2219
  for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2220
  *hep = he->next;
2221
}
2222

    
2223
#define HC_DEF_ORDER 10
2224
#define HC_HI_MARK *4
2225
#define HC_HI_STEP 2
2226
#define HC_HI_ORDER 16                        /* Must be at most 16 */
2227
#define HC_LO_MARK /5
2228
#define HC_LO_STEP 2
2229
#define HC_LO_ORDER 10
2230

    
2231
static void
2232
hc_alloc_table(struct hostcache *hc, unsigned order)
2233
{
2234
  uint hsize = 1 << order;
2235
  hc->hash_order = order;
2236
  hc->hash_shift = 32 - order;
2237
  hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2238
  hc->hash_min = (order <= HC_LO_ORDER) ?  0U : (hsize HC_LO_MARK);
2239

    
2240
  hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2241
}
2242

    
2243
static void
2244
hc_resize(struct hostcache *hc, unsigned new_order)
2245
{
2246
  struct hostentry **old_table = hc->hash_table;
2247
  struct hostentry *he, *hen;
2248
  uint old_size = 1 << hc->hash_order;
2249
  uint i;
2250

    
2251
  hc_alloc_table(hc, new_order);
2252
  for (i = 0; i < old_size; i++)
2253
    for (he = old_table[i]; he != NULL; he=hen)
2254
      {
2255
        hen = he->next;
2256
        hc_insert(hc, he);
2257
      }
2258
  mb_free(old_table);
2259
}
2260

    
2261
static struct hostentry *
2262
hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2263
{
2264
  struct hostentry *he = sl_alloc(hc->slab);
2265

    
2266
  *he = (struct hostentry) {
2267
    .addr = a,
2268
    .link = ll,
2269
    .tab = dep,
2270
    .hash_key = k,
2271
  };
2272

    
2273
  add_tail(&hc->hostentries, &he->ln);
2274
  hc_insert(hc, he);
2275

    
2276
  hc->hash_items++;
2277
  if (hc->hash_items > hc->hash_max)
2278
    hc_resize(hc, hc->hash_order + HC_HI_STEP);
2279

    
2280
  return he;
2281
}
2282

    
2283
static void
2284
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2285
{
2286
  rta_free(he->src);
2287

    
2288
  rem_node(&he->ln);
2289
  hc_remove(hc, he);
2290
  sl_free(hc->slab, he);
2291

    
2292
  hc->hash_items--;
2293
  if (hc->hash_items < hc->hash_min)
2294
    hc_resize(hc, hc->hash_order - HC_LO_STEP);
2295
}
2296

    
2297
static void
2298
rt_init_hostcache(rtable *tab)
2299
{
2300
  struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2301
  init_list(&hc->hostentries);
2302

    
2303
  hc->hash_items = 0;
2304
  hc_alloc_table(hc, HC_DEF_ORDER);
2305
  hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2306

    
2307
  hc->lp = lp_new(rt_table_pool, 1008);
2308
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2309

    
2310
  tab->hostcache = hc;
2311
}
2312

    
2313
static void
2314
rt_free_hostcache(rtable *tab)
2315
{
2316
  struct hostcache *hc = tab->hostcache;
2317

    
2318
  node *n;
2319
  WALK_LIST(n, hc->hostentries)
2320
    {
2321
      struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2322
      rta_free(he->src);
2323

    
2324
      if (he->uc)
2325
        log(L_ERR "Hostcache is not empty in table %s", tab->name);
2326
    }
2327

    
2328
  rfree(hc->slab);
2329
  rfree(hc->lp);
2330
  mb_free(hc->hash_table);
2331
  mb_free(hc);
2332
}
2333

    
2334
static void
2335
rt_notify_hostcache(rtable *tab, net *net)
2336
{
2337
  if (tab->hcu_scheduled)
2338
    return;
2339

    
2340
  if (trie_match_net(tab->hostcache->trie, net->n.addr))
2341
    rt_schedule_hcu(tab);
2342
}
2343

    
2344
static int
2345
if_local_addr(ip_addr a, struct iface *i)
2346
{
2347
  struct ifa *b;
2348

    
2349
  WALK_LIST(b, i->addrs)
2350
    if (ipa_equal(a, b->ip))
2351
      return 1;
2352

    
2353
  return 0;
2354
}
2355

    
2356
static u32 
2357
rt_get_igp_metric(rte *rt)
2358
{
2359
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2360

    
2361
  if (ea)
2362
    return ea->u.data;
2363

    
2364
  rta *a = rt->attrs;
2365

    
2366
#ifdef CONFIG_OSPF
2367
  if ((a->source == RTS_OSPF) ||
2368
      (a->source == RTS_OSPF_IA) ||
2369
      (a->source == RTS_OSPF_EXT1))
2370
    return rt->u.ospf.metric1;
2371
#endif
2372

    
2373
#ifdef CONFIG_RIP
2374
  if (a->source == RTS_RIP)
2375
    return rt->u.rip.metric;
2376
#endif
2377

    
2378
  if (a->source == RTS_DEVICE)
2379
    return 0;
2380

    
2381
  return IGP_METRIC_UNKNOWN;
2382
}
2383

    
2384
static int
2385
rt_update_hostentry(rtable *tab, struct hostentry *he)
2386
{
2387
  rta *old_src = he->src;
2388
  int pxlen = 0;
2389

    
2390
  /* Reset the hostentry */
2391
  he->src = NULL;
2392
  he->nexthop_linkable = 0;
2393
  he->dest = RTD_UNREACHABLE;
2394
  he->igp_metric = 0;
2395

    
2396
  net_addr he_addr;
2397
  net_fill_ip_host(&he_addr, he->addr);
2398
  net *n = net_route(tab, &he_addr);
2399
  if (n)
2400
    {
2401
      rte *e = n->routes;
2402
      rta *a = e->attrs;
2403
      pxlen = n->n.addr->pxlen;
2404

    
2405
      if (a->hostentry)
2406
        {
2407
          /* Recursive route should not depend on another recursive route */
2408
          log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2409
              he->addr, n->n.addr);
2410
          goto done;
2411
        }
2412

    
2413
      he->dest = a->dest;
2414
      he->nexthop_linkable = 1;
2415
      if (he->dest == RTD_UNICAST)
2416
        {
2417
          for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2418
            if (ipa_zero(nh->gw))
2419
              {
2420
                if (if_local_addr(he->addr, nh->iface))
2421
                  {
2422
                    /* The host address is a local address, this is not valid */
2423
                    log(L_WARN "Next hop address %I is a local address of iface %s",
2424
                        he->addr, nh->iface->name);
2425
                    goto done;
2426
                  }
2427

    
2428
                he->nexthop_linkable = 0;
2429
                break;
2430
              }
2431
        }
2432

    
2433
      he->src = rta_clone(a);
2434
      he->igp_metric = rt_get_igp_metric(e);
2435
    }
2436

    
2437
done:
2438
  /* Add a prefix range to the trie */
2439
  trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2440

    
2441
  rta_free(old_src);
2442
  return old_src != he->src;
2443
}
2444

    
2445
static void
2446
rt_update_hostcache(rtable *tab)
2447
{
2448
  struct hostcache *hc = tab->hostcache;
2449
  struct hostentry *he;
2450
  node *n, *x;
2451

    
2452
  /* Reset the trie */
2453
  lp_flush(hc->lp);
2454
  hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2455

    
2456
  WALK_LIST_DELSAFE(n, x, hc->hostentries)
2457
    {
2458
      he = SKIP_BACK(struct hostentry, ln, n);
2459
      if (!he->uc)
2460
        {
2461
          hc_delete_hostentry(hc, he);
2462
          continue;
2463
        }
2464

    
2465
      if (rt_update_hostentry(tab, he))
2466
        rt_schedule_nhu(he->tab);
2467
    }
2468

    
2469
  tab->hcu_scheduled = 0;
2470
}
2471

    
2472
struct hostentry *
2473
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2474
{
2475
  struct hostentry *he;
2476

    
2477
  if (!tab->hostcache)
2478
    rt_init_hostcache(tab);
2479

    
2480
  u32 k = hc_hash(a, dep);
2481
  struct hostcache *hc = tab->hostcache;
2482
  for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2483
    if (ipa_equal(he->addr, a) && (he->tab == dep))
2484
      return he;
2485

    
2486
  he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
2487
  rt_update_hostentry(tab, he);
2488
  return he;
2489
}
2490

    
2491

    
2492
/*
2493
 *  Documentation for functions declared inline in route.h
2494
 */
2495
#if 0
2496

2497
/**
2498
 * net_find - find a network entry
2499
 * @tab: a routing table
2500
 * @addr: address of the network
2501
 *
2502
 * net_find() looks up the given network in routing table @tab and
2503
 * returns a pointer to its &net entry or %NULL if no such network
2504
 * exists.
2505
 */
2506
static inline net *net_find(rtable *tab, net_addr *addr)
2507
{ DUMMY; }
2508

2509
/**
2510
 * net_get - obtain a network entry
2511
 * @tab: a routing table
2512
 * @addr: address of the network
2513
 *
2514
 * net_get() looks up the given network in routing table @tab and
2515
 * returns a pointer to its &net entry. If no such entry exists, it's
2516
 * created.
2517
 */
2518
static inline net *net_get(rtable *tab, net_addr *addr)
2519
{ DUMMY; }
2520

2521
/**
2522
 * rte_cow - copy a route for writing
2523
 * @r: a route entry to be copied
2524
 *
2525
 * rte_cow() takes a &rte and prepares it for modification. The exact action
2526
 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2527
 * just returned unchanged, else a new temporary entry with the same contents
2528
 * is created.
2529
 *
2530
 * The primary use of this function is inside the filter machinery -- when
2531
 * a filter wants to modify &rte contents (to change the preference or to
2532
 * attach another set of attributes), it must ensure that the &rte is not
2533
 * shared with anyone else (and especially that it isn't stored in any routing
2534
 * table).
2535
 *
2536
 * Result: a pointer to the new writable &rte.
2537
 */
2538
static inline rte * rte_cow(rte *r)
2539
{ DUMMY; }
2540

2541
#endif