Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / krt.c @ d14f8c3c

History | View | Annotate | Download (27.3 KB)

1
/*
2
 *        BIRD -- UNIX Kernel Synchronization
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
/**
10
 * DOC: Kernel synchronization
11
 *
12
 * This system dependent module implements the Kernel and Device protocol,
13
 * that is synchronization of interface lists and routing tables with the
14
 * OS kernel.
15
 *
16
 * The whole kernel synchronization is a bit messy and touches some internals
17
 * of the routing table engine, because routing table maintenance is a typical
18
 * example of the proverbial compatibility between different Unices and we want
19
 * to keep the overhead of our KRT business as low as possible and avoid maintaining
20
 * a local routing table copy.
21
 *
22
 * The kernel syncer can work in three different modes (according to system config header):
23
 * Either with a single routing table and single KRT protocol [traditional UNIX]
24
 * or with many routing tables and separate KRT protocols for all of them
25
 * or with many routing tables, but every scan including all tables, so we start
26
 * separate KRT protocols which cooperate with each other [Linux].
27
 * In this case, we keep only a single scan timer.
28
 *
29
 * We use FIB node flags in the routing table to keep track of route
30
 * synchronization status. We also attach temporary &rte's to the routing table,
31
 * but it cannot do any harm to the rest of BIRD since table synchronization is
32
 * an atomic process.
33
 *
34
 * When starting up, we cheat by looking if there is another
35
 * KRT instance to be initialized later and performing table scan
36
 * only once for all the instances.
37
 *
38
 * The code uses OS-dependent parts for kernel updates and scans. These parts are
39
 * in more specific sysdep directories (e.g. sysdep/linux) in functions krt_sys_*
40
 * and kif_sys_* (and some others like krt_replace_rte()) and krt-sys.h header file.
41
 * This is also used for platform specific protocol options and route attributes.
42
 *
43
 * There was also an old code that used traditional UNIX ioctls for these tasks.
44
 * It was unmaintained and later removed. For reference, see sysdep/krt-* files
45
 * in commit 396dfa9042305f62da1f56589c4b98fac57fc2f6
46
 */
47

    
48
/*
49
 *  If you are brave enough, continue now.  You cannot say you haven't been warned.
50
 */
51

    
52
#undef LOCAL_DEBUG
53

    
54
#include "nest/bird.h"
55
#include "nest/iface.h"
56
#include "nest/route.h"
57
#include "nest/protocol.h"
58
#include "filter/filter.h"
59
#include "sysdep/unix/timer.h"
60
#include "conf/conf.h"
61
#include "lib/string.h"
62

    
63
#include "unix.h"
64
#include "krt.h"
65

    
66
/*
67
 *        Global resources
68
 */
69

    
70
pool *krt_pool;
71
static linpool *krt_filter_lp;
72
static list krt_proto_list;
73

    
74
void
75
krt_io_init(void)
76
{
77
  krt_pool = rp_new(&root_pool, "Kernel Syncer");
78
  krt_filter_lp = lp_new(krt_pool, 4080);
79
  init_list(&krt_proto_list);
80
  krt_sys_io_init();
81
}
82

    
83
/*
84
 *        Interfaces
85
 */
86

    
87
struct kif_proto *kif_proto;
88
static struct kif_config *kif_cf;
89
static timer *kif_scan_timer;
90
static bird_clock_t kif_last_shot;
91

    
92
static void
93
kif_scan(timer *t)
94
{
95
  struct kif_proto *p = t->data;
96

    
97
  KRT_TRACE(p, D_EVENTS, "Scanning interfaces");
98
  kif_last_shot = now;
99
  kif_do_scan(p);
100
}
101

    
102
static void
103
kif_force_scan(void)
104
{
105
  if (kif_proto && kif_last_shot + 2 < now)
106
    {
107
      kif_scan(kif_scan_timer);
108
      tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time);
109
    }
110
}
111

    
112
void
113
kif_request_scan(void)
114
{
115
  if (kif_proto && kif_scan_timer->expires > now)
116
    tm_start(kif_scan_timer, 1);
117
}
118

    
119
static inline int
120
prefer_addr(struct ifa *a, struct ifa *b)
121
{
122
  int sa = a->scope > SCOPE_LINK;
123
  int sb = b->scope > SCOPE_LINK;
124

    
125
  if (sa < sb)
126
    return 0;
127
  else if (sa > sb)
128
    return 1;
129
  else
130
    return ipa_compare(a->ip, b->ip) < 0;
131
}
132

    
133
static inline struct ifa *
134
find_preferred_ifa(struct iface *i, const net_addr *n)
135
{
136
  struct ifa *a, *b = NULL;
137

    
138
  WALK_LIST(a, i->addrs)
139
    {
140
      if (!(a->flags & IA_SECONDARY) &&
141
          (!n || ipa_in_netX(a->ip, n)) &&
142
          (!b || prefer_addr(a, b)))
143
        b = a;
144
    }
145

    
146
  return b;
147
}
148

    
149
struct ifa *
150
kif_choose_primary(struct iface *i)
151
{
152
  struct kif_config *cf = (struct kif_config *) (kif_proto->p.cf);
153
  struct kif_primary_item *it;
154
  struct ifa *a;
155

    
156
  WALK_LIST(it, cf->primary)
157
    {
158
      if (!it->pattern || patmatch(it->pattern, i->name))
159
        if (a = find_preferred_ifa(i, &it->addr))
160
          return a;
161
    }
162

    
163
  if (a = kif_get_primary_ip(i))
164
    return a;
165

    
166
  return find_preferred_ifa(i, NULL);
167
}
168

    
169

    
170
static struct proto *
171
kif_init(struct proto_config *c)
172
{
173
  struct kif_proto *p = proto_new(c);
174

    
175
  kif_sys_init(p);
176
  return &p->p;
177
}
178

    
179
static int
180
kif_start(struct proto *P)
181
{
182
  struct kif_proto *p = (struct kif_proto *) P;
183

    
184
  kif_proto = p;
185
  kif_sys_start(p);
186

    
187
  /* Start periodic interface scanning */
188
  kif_scan_timer = tm_new(P->pool);
189
  kif_scan_timer->hook = kif_scan;
190
  kif_scan_timer->data = p;
191
  kif_scan_timer->recurrent = KIF_CF->scan_time;
192
  kif_scan(kif_scan_timer);
193
  tm_start(kif_scan_timer, KIF_CF->scan_time);
194

    
195
  return PS_UP;
196
}
197

    
198
static int
199
kif_shutdown(struct proto *P)
200
{
201
  struct kif_proto *p = (struct kif_proto *) P;
202

    
203
  tm_stop(kif_scan_timer);
204
  kif_sys_shutdown(p);
205
  kif_proto = NULL;
206

    
207
  return PS_DOWN;
208
}
209

    
210
static int
211
kif_reconfigure(struct proto *p, struct proto_config *new)
212
{
213
  struct kif_config *o = (struct kif_config *) p->cf;
214
  struct kif_config *n = (struct kif_config *) new;
215

    
216
  if (!kif_sys_reconfigure((struct kif_proto *) p, n, o))
217
    return 0;
218

    
219
  if (o->scan_time != n->scan_time)
220
    {
221
      tm_stop(kif_scan_timer);
222
      kif_scan_timer->recurrent = n->scan_time;
223
      kif_scan(kif_scan_timer);
224
      tm_start(kif_scan_timer, n->scan_time);
225
    }
226

    
227
  if (!EMPTY_LIST(o->primary) || !EMPTY_LIST(n->primary))
228
    {
229
      /* This is hack, we have to update a configuration
230
       * to the new value just now, because it is used
231
       * for recalculation of primary addresses.
232
       */
233
      p->cf = new;
234

    
235
      ifa_recalc_all_primary_addresses();
236
    }
237

    
238
  return 1;
239
}
240

    
241

    
242
static void
243
kif_preconfig(struct protocol *P UNUSED, struct config *c)
244
{
245
  kif_cf = NULL;
246
  kif_sys_preconfig(c);
247
}
248

    
249
struct proto_config *
250
kif_init_config(int class)
251
{
252
  if (kif_cf)
253
    cf_error("Kernel device protocol already defined");
254

    
255
  kif_cf = (struct kif_config *) proto_config_new(&proto_unix_iface, class);
256
  kif_cf->scan_time = 60;
257
  init_list(&kif_cf->primary);
258

    
259
  kif_sys_init_config(kif_cf);
260
  return (struct proto_config *) kif_cf;
261
}
262

    
263
static void
264
kif_copy_config(struct proto_config *dest, struct proto_config *src)
265
{
266
  struct kif_config *d = (struct kif_config *) dest;
267
  struct kif_config *s = (struct kif_config *) src;
268

    
269
  /* Copy primary addr list */
270
  cfg_copy_list(&d->primary, &s->primary, sizeof(struct kif_primary_item));
271

    
272
  /* Fix sysdep parts */
273
  kif_sys_copy_config(d, s);
274
}
275

    
276

    
277
struct protocol proto_unix_iface = {
278
  .name =                 "Device",
279
  .template =                 "device%d",
280
  .proto_size =                sizeof(struct kif_proto),
281
  .config_size =        sizeof(struct kif_config),
282
  .preconfig =                kif_preconfig,
283
  .init =                kif_init,
284
  .start =                kif_start,
285
  .shutdown =                kif_shutdown,
286
  .reconfigure =        kif_reconfigure,
287
  .copy_config =        kif_copy_config
288
};
289

    
290
/*
291
 *        Tracing of routes
292
 */
293

    
294
static inline void
295
krt_trace_in(struct krt_proto *p, rte *e, char *msg)
296
{
297
  if (p->p.debug & D_PACKETS)
298
    log(L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg);
299
}
300

    
301
static inline void
302
krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg)
303
{
304
  if (p->p.debug & D_PACKETS)
305
    log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg);
306
}
307

    
308
/*
309
 *        Inherited Routes
310
 */
311

    
312
#ifdef KRT_ALLOW_LEARN
313

    
314
static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS;
315

    
316
/*
317
 * krt_same_key() specifies what (aside from the net) is the key in
318
 * kernel routing tables. It should be OS-dependent, this is for
319
 * Linux. It is important for asynchronous alien updates, because a
320
 * positive update is implicitly a negative one for any old route with
321
 * the same key.
322
 */
323

    
324
static inline int
325
krt_same_key(rte *a, rte *b)
326
{
327
  return a->u.krt.metric == b->u.krt.metric;
328
}
329

    
330
static inline int
331
krt_uptodate(rte *a, rte *b)
332
{
333
  if (a->attrs != b->attrs)
334
    return 0;
335

    
336
  if (a->u.krt.proto != b->u.krt.proto)
337
    return 0;
338

    
339
  return 1;
340
}
341

    
342
static void
343
krt_learn_announce_update(struct krt_proto *p, rte *e)
344
{
345
  net *n = e->net;
346
  rta *aa = rta_clone(e->attrs);
347
  rte *ee = rte_get_temp(aa);
348
  ee->pflags = 0;
349
  ee->u.krt = e->u.krt;
350
  rte_update(&p->p, n->n.addr, ee);
351
}
352

    
353
static void
354
krt_learn_announce_delete(struct krt_proto *p, net *n)
355
{
356
  rte_update(&p->p, n->n.addr, NULL);
357
}
358

    
359
/* Called when alien route is discovered during scan */
360
static void
361
krt_learn_scan(struct krt_proto *p, rte *e)
362
{
363
  net *n0 = e->net;
364
  net *n = net_get(&p->krt_table, n0->n.addr);
365
  rte *m, **mm;
366

    
367
  e->attrs = rta_lookup(e->attrs);
368

    
369
  for(mm=&n->routes; m = *mm; mm=&m->next)
370
    if (krt_same_key(m, e))
371
      break;
372
  if (m)
373
    {
374
      if (krt_uptodate(m, e))
375
        {
376
          krt_trace_in_rl(&rl_alien, p, e, "[alien] seen");
377
          rte_free(e);
378
          m->u.krt.seen = 1;
379
        }
380
      else
381
        {
382
          krt_trace_in(p, e, "[alien] updated");
383
          *mm = m->next;
384
          rte_free(m);
385
          m = NULL;
386
        }
387
    }
388
  else
389
    krt_trace_in(p, e, "[alien] created");
390
  if (!m)
391
    {
392
      e->next = n->routes;
393
      n->routes = e;
394
      e->u.krt.seen = 1;
395
    }
396
}
397

    
398
static void
399
krt_learn_prune(struct krt_proto *p)
400
{
401
  struct fib *fib = &p->krt_table.fib;
402
  struct fib_iterator fit;
403

    
404
  KRT_TRACE(p, D_EVENTS, "Pruning inherited routes");
405

    
406
  FIB_ITERATE_INIT(&fit, fib);
407
again:
408
  FIB_ITERATE_START(fib, &fit, net, n)
409
    {
410
      rte *e, **ee, *best, **pbest, *old_best;
411

    
412
      /*
413
       * Note that old_best may be NULL even if there was an old best route in
414
       * the previous step, because it might be replaced in krt_learn_scan().
415
       * But in that case there is a new valid best route.
416
       */
417

    
418
      old_best = NULL;
419
      best = NULL;
420
      pbest = NULL;
421
      ee = &n->routes;
422
      while (e = *ee)
423
        {
424
          if (e->u.krt.best)
425
            old_best = e;
426

    
427
          if (!e->u.krt.seen)
428
            {
429
              *ee = e->next;
430
              rte_free(e);
431
              continue;
432
            }
433

    
434
          if (!best || best->u.krt.metric > e->u.krt.metric)
435
            {
436
              best = e;
437
              pbest = ee;
438
            }
439

    
440
          e->u.krt.seen = 0;
441
          e->u.krt.best = 0;
442
          ee = &e->next;
443
        }
444
      if (!n->routes)
445
        {
446
          DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen);
447
          if (old_best)
448
            krt_learn_announce_delete(p, n);
449

    
450
          FIB_ITERATE_PUT(&fit);
451
          fib_delete(fib, n);
452
          goto again;
453
        }
454

    
455
      best->u.krt.best = 1;
456
      *pbest = best->next;
457
      best->next = n->routes;
458
      n->routes = best;
459

    
460
      if ((best != old_best) || p->reload)
461
        {
462
          DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
463
          krt_learn_announce_update(p, best);
464
        }
465
      else
466
        DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
467
    }
468
  FIB_ITERATE_END;
469

    
470
  p->reload = 0;
471
}
472

    
473
static void
474
krt_learn_async(struct krt_proto *p, rte *e, int new)
475
{
476
  net *n0 = e->net;
477
  net *n = net_get(&p->krt_table, n0->n.addr);
478
  rte *g, **gg, *best, **bestp, *old_best;
479

    
480
  e->attrs = rta_lookup(e->attrs);
481

    
482
  old_best = n->routes;
483
  for(gg=&n->routes; g = *gg; gg = &g->next)
484
    if (krt_same_key(g, e))
485
      break;
486
  if (new)
487
    {
488
      if (g)
489
        {
490
          if (krt_uptodate(g, e))
491
            {
492
              krt_trace_in(p, e, "[alien async] same");
493
              rte_free(e);
494
              return;
495
            }
496
          krt_trace_in(p, e, "[alien async] updated");
497
          *gg = g->next;
498
          rte_free(g);
499
        }
500
      else
501
        krt_trace_in(p, e, "[alien async] created");
502

    
503
      e->next = n->routes;
504
      n->routes = e;
505
    }
506
  else if (!g)
507
    {
508
      krt_trace_in(p, e, "[alien async] delete failed");
509
      rte_free(e);
510
      return;
511
    }
512
  else
513
    {
514
      krt_trace_in(p, e, "[alien async] removed");
515
      *gg = g->next;
516
      rte_free(e);
517
      rte_free(g);
518
    }
519
  best = n->routes;
520
  bestp = &n->routes;
521
  for(gg=&n->routes; g=*gg; gg=&g->next)
522
  {
523
    if (best->u.krt.metric > g->u.krt.metric)
524
      {
525
        best = g;
526
        bestp = gg;
527
      }
528

    
529
    g->u.krt.best = 0;
530
  }
531

    
532
  if (best)
533
    {
534
      best->u.krt.best = 1;
535
      *bestp = best->next;
536
      best->next = n->routes;
537
      n->routes = best;
538
    }
539

    
540
  if (best != old_best)
541
    {
542
      DBG("krt_learn_async: distributing change\n");
543
      if (best)
544
        krt_learn_announce_update(p, best);
545
      else
546
        krt_learn_announce_delete(p, n);
547
    }
548
}
549

    
550
static void
551
krt_learn_init(struct krt_proto *p)
552
{
553
  if (KRT_CF->learn)
554
    rt_setup(p->p.pool, &p->krt_table, "Inherited", NULL);
555
}
556

    
557
static void
558
krt_dump(struct proto *P)
559
{
560
  struct krt_proto *p = (struct krt_proto *) P;
561

    
562
  if (!KRT_CF->learn)
563
    return;
564
  debug("KRT: Table of inheritable routes\n");
565
  rt_dump(&p->krt_table);
566
}
567

    
568
static void
569
krt_dump_attrs(rte *e)
570
{
571
  debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto);
572
}
573

    
574
#endif
575

    
576
/*
577
 *        Routes
578
 */
579

    
580
static void
581
krt_flush_routes(struct krt_proto *p)
582
{
583
  struct rtable *t = p->p.main_channel->table;
584

    
585
  KRT_TRACE(p, D_EVENTS, "Flushing kernel routes");
586
  FIB_WALK(&t->fib, net, n)
587
    {
588
      rte *e = n->routes;
589
      if (rte_is_valid(e) && (n->n.flags & KRF_INSTALLED))
590
        {
591
          /* FIXME: this does not work if gw is changed in export filter */
592
          krt_replace_rte(p, e->net, NULL, e, NULL);
593
          n->n.flags &= ~KRF_INSTALLED;
594
        }
595
    }
596
  FIB_WALK_END;
597
}
598

    
599
static struct rte *
600
krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa)
601
{
602
  struct channel *c = p->p.main_channel;
603
  struct filter *filter = c->out_filter;
604
  rte *rt;
605

    
606
  if (c->ra_mode == RA_MERGED)
607
    return rt_export_merged(c, net, rt_free, tmpa, krt_filter_lp, 1);
608

    
609
  rt = net->routes;
610
  *rt_free = NULL;
611

    
612
  if (!rte_is_valid(rt))
613
    return NULL;
614

    
615
  if (filter == FILTER_REJECT)
616
    return NULL;
617

    
618
  struct proto *src = rt->attrs->src->proto;
619
  *tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(rt, krt_filter_lp) : NULL;
620

    
621
  /* We could run krt_import_control() here, but it is already handled by KRF_INSTALLED */
622

    
623
  if (filter == FILTER_ACCEPT)
624
    goto accept;
625

    
626
  if (f_run(filter, &rt, tmpa, krt_filter_lp, FF_FORCE_TMPATTR) > F_ACCEPT)
627
    goto reject;
628

    
629

    
630
accept:
631
  if (rt != net->routes)
632
    *rt_free = rt;
633
  return rt;
634

    
635
reject:
636
  if (rt != net->routes)
637
    rte_free(rt);
638
  return NULL;
639
}
640

    
641
static int
642
krt_same_dest(rte *k, rte *e)
643
{
644
  rta *ka = k->attrs, *ea = e->attrs;
645

    
646
  if (ka->dest != ea->dest)
647
    return 0;
648

    
649
  if (ka->dest == RTD_UNICAST)
650
    return nexthop_same(&(ka->nh), &(ea->nh));
651

    
652
  return 1;
653
}
654

    
655
/*
656
 *  This gets called back when the low-level scanning code discovers a route.
657
 *  We expect that the route is a temporary rte and its attributes are uncached.
658
 */
659

    
660
void
661
krt_got_route(struct krt_proto *p, rte *e)
662
{
663
  net *net = e->net;
664
  int verdict;
665

    
666
#ifdef KRT_ALLOW_LEARN
667
  switch (e->u.krt.src)
668
    {
669
    case KRT_SRC_KERNEL:
670
      verdict = KRF_IGNORE;
671
      goto sentenced;
672

    
673
    case KRT_SRC_REDIRECT:
674
      verdict = KRF_DELETE;
675
      goto sentenced;
676

    
677
    case  KRT_SRC_ALIEN:
678
      if (KRT_CF->learn)
679
        krt_learn_scan(p, e);
680
      else
681
        {
682
          krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored");
683
          rte_free(e);
684
        }
685
      return;
686
    }
687
#endif
688
  /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
689

    
690
  if (net->n.flags & KRF_VERDICT_MASK)
691
    {
692
      /* Route to this destination was already seen. Strange, but it happens... */
693
      krt_trace_in(p, e, "already seen");
694
      rte_free(e);
695
      return;
696
    }
697

    
698
  if (!p->ready)
699
    {
700
      /* We wait for the initial feed to have correct KRF_INSTALLED flag */
701
      verdict = KRF_IGNORE;
702
      goto sentenced;
703
    }
704

    
705
  if (net->n.flags & KRF_INSTALLED)
706
    {
707
      rte *new, *rt_free;
708
      ea_list *tmpa;
709

    
710
      new = krt_export_net(p, net, &rt_free, &tmpa);
711

    
712
      /* TODO: There also may be changes in route eattrs, we ignore that for now. */
713

    
714
      if (!new)
715
        verdict = KRF_DELETE;
716
      else if ((net->n.flags & KRF_SYNC_ERROR) || !krt_same_dest(e, new))
717
        verdict = KRF_UPDATE;
718
      else
719
        verdict = KRF_SEEN;
720

    
721
      if (rt_free)
722
        rte_free(rt_free);
723

    
724
      lp_flush(krt_filter_lp);
725
    }
726
  else
727
    verdict = KRF_DELETE;
728

    
729
 sentenced:
730
  krt_trace_in(p, e, ((char *[]) { "?", "seen", "will be updated", "will be removed", "ignored" }) [verdict]);
731
  net->n.flags = (net->n.flags & ~KRF_VERDICT_MASK) | verdict;
732
  if (verdict == KRF_UPDATE || verdict == KRF_DELETE)
733
    {
734
      /* Get a cached copy of attributes and temporarily link the route */
735
      rta *a = e->attrs;
736
      a->source = RTS_DUMMY;
737
      e->attrs = rta_lookup(a);
738
      e->next = net->routes;
739
      net->routes = e;
740
    }
741
  else
742
    rte_free(e);
743
}
744

    
745
static void
746
krt_prune(struct krt_proto *p)
747
{
748
  struct rtable *t = p->p.main_channel->table;
749

    
750
  KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name);
751
  FIB_WALK(&t->fib, net, n)
752
    {
753
      int verdict = n->n.flags & KRF_VERDICT_MASK;
754
      rte *new, *old, *rt_free = NULL;
755
      ea_list *tmpa = NULL;
756

    
757
      if (verdict == KRF_UPDATE || verdict == KRF_DELETE)
758
        {
759
          /* Get a dummy route from krt_got_route() */
760
          old = n->routes;
761
          n->routes = old->next;
762
        }
763
      else
764
        old = NULL;
765

    
766
      if (verdict == KRF_CREATE || verdict == KRF_UPDATE)
767
        {
768
          /* We have to run export filter to get proper 'new' route */
769
          new = krt_export_net(p, n, &rt_free, &tmpa);
770

    
771
          if (!new)
772
            verdict = (verdict == KRF_CREATE) ? KRF_IGNORE : KRF_DELETE;
773
          else
774
            tmpa = ea_append(tmpa, new->attrs->eattrs);
775
        }
776
      else
777
        new = NULL;
778

    
779
      switch (verdict)
780
        {
781
        case KRF_CREATE:
782
          if (new && (n->n.flags & KRF_INSTALLED))
783
            {
784
              krt_trace_in(p, new, "reinstalling");
785
              krt_replace_rte(p, n, new, NULL, tmpa);
786
            }
787
          break;
788
        case KRF_SEEN:
789
        case KRF_IGNORE:
790
          /* Nothing happens */
791
          break;
792
        case KRF_UPDATE:
793
          krt_trace_in(p, new, "updating");
794
          krt_replace_rte(p, n, new, old, tmpa);
795
          break;
796
        case KRF_DELETE:
797
          krt_trace_in(p, old, "deleting");
798
          krt_replace_rte(p, n, NULL, old, NULL);
799
          break;
800
        default:
801
          bug("krt_prune: invalid route status");
802
        }
803

    
804
      if (old)
805
        rte_free(old);
806
      if (rt_free)
807
        rte_free(rt_free);
808
      lp_flush(krt_filter_lp);
809
      n->n.flags &= ~KRF_VERDICT_MASK;
810
    }
811
  FIB_WALK_END;
812

    
813
#ifdef KRT_ALLOW_LEARN
814
  if (KRT_CF->learn)
815
    krt_learn_prune(p);
816
#endif
817

    
818
  if (p->ready)
819
    p->initialized = 1;
820
}
821

    
822
void
823
krt_got_route_async(struct krt_proto *p, rte *e, int new)
824
{
825
  net *net = e->net;
826

    
827
  switch (e->u.krt.src)
828
    {
829
    case KRT_SRC_BIRD:
830
      ASSERT(0);                        /* Should be filtered by the back end */
831

    
832
    case KRT_SRC_REDIRECT:
833
      if (new)
834
        {
835
          krt_trace_in(p, e, "[redirect] deleting");
836
          krt_replace_rte(p, net, NULL, e, NULL);
837
        }
838
      /* If !new, it is probably echo of our deletion */
839
      break;
840

    
841
#ifdef KRT_ALLOW_LEARN
842
    case KRT_SRC_ALIEN:
843
      if (KRT_CF->learn)
844
        {
845
          krt_learn_async(p, e, new);
846
          return;
847
        }
848
#endif
849
    }
850
  rte_free(e);
851
}
852

    
853
/*
854
 *        Periodic scanning
855
 */
856

    
857

    
858
#ifdef CONFIG_ALL_TABLES_AT_ONCE
859

    
860
static timer *krt_scan_timer;
861
static int krt_scan_count;
862

    
863
static void
864
krt_scan(timer *t UNUSED)
865
{
866
  struct krt_proto *p;
867

    
868
  kif_force_scan();
869

    
870
  /* We need some node to decide whether to print the debug messages or not */
871
  p = SKIP_BACK(struct krt_proto, krt_node, HEAD(krt_proto_list));
872
  KRT_TRACE(p, D_EVENTS, "Scanning routing table");
873

    
874
  krt_do_scan(NULL);
875

    
876
  void *q;
877
  WALK_LIST(q, krt_proto_list)
878
  {
879
    p = SKIP_BACK(struct krt_proto, krt_node, q);
880
    krt_prune(p);
881
  }
882
}
883

    
884
static void
885
krt_scan_timer_start(struct krt_proto *p)
886
{
887
  if (!krt_scan_count)
888
    krt_scan_timer = tm_new_set(krt_pool, krt_scan, NULL, 0, KRT_CF->scan_time);
889

    
890
  krt_scan_count++;
891

    
892
  tm_start(krt_scan_timer, 1);
893
}
894

    
895
static void
896
krt_scan_timer_stop(struct krt_proto *p UNUSED)
897
{
898
  krt_scan_count--;
899

    
900
  if (!krt_scan_count)
901
  {
902
    rfree(krt_scan_timer);
903
    krt_scan_timer = NULL;
904
  }
905
}
906

    
907
static void
908
krt_scan_timer_kick(struct krt_proto *p UNUSED)
909
{
910
  tm_start(krt_scan_timer, 0);
911
}
912

    
913
#else
914

    
915
static void
916
krt_scan(timer *t)
917
{
918
  struct krt_proto *p = t->data;
919

    
920
  kif_force_scan();
921

    
922
  KRT_TRACE(p, D_EVENTS, "Scanning routing table");
923
  krt_do_scan(p);
924
  krt_prune(p);
925
}
926

    
927
static void
928
krt_scan_timer_start(struct krt_proto *p)
929
{
930
  p->scan_timer = tm_new_set(p->p.pool, krt_scan, p, 0, KRT_CF->scan_time);
931
  tm_start(p->scan_timer, 1);
932
}
933

    
934
static void
935
krt_scan_timer_stop(struct krt_proto *p)
936
{
937
  tm_stop(p->scan_timer);
938
}
939

    
940
static void
941
krt_scan_timer_kick(struct krt_proto *p)
942
{
943
  tm_start(p->scan_timer, 0);
944
}
945

    
946
#endif
947

    
948

    
949

    
950

    
951
/*
952
 *        Updates
953
 */
954

    
955
static struct ea_list *
956
krt_make_tmp_attrs(rte *rt, struct linpool *pool)
957
{
958
  struct ea_list *l = lp_alloc(pool, sizeof(struct ea_list) + 2 * sizeof(eattr));
959

    
960
  l->next = NULL;
961
  l->flags = EALF_SORTED;
962
  l->count = 2;
963

    
964
  l->attrs[0].id = EA_KRT_SOURCE;
965
  l->attrs[0].flags = 0;
966
  l->attrs[0].type = EAF_TYPE_INT | EAF_TEMP;
967
  l->attrs[0].u.data = rt->u.krt.proto;
968

    
969
  l->attrs[1].id = EA_KRT_METRIC;
970
  l->attrs[1].flags = 0;
971
  l->attrs[1].type = EAF_TYPE_INT | EAF_TEMP;
972
  l->attrs[1].u.data = rt->u.krt.metric;
973

    
974
  return l;
975
}
976

    
977
static void
978
krt_store_tmp_attrs(rte *rt, struct ea_list *attrs)
979
{
980
  /* EA_KRT_SOURCE is read-only */
981
  rt->u.krt.metric = ea_get_int(attrs, EA_KRT_METRIC, 0);
982
}
983

    
984
static int
985
krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED)
986
{
987
  struct krt_proto *p = (struct krt_proto *) P;
988
  rte *e = *new;
989

    
990
  if (e->attrs->src->proto == P)
991
  {
992
#ifdef CONFIG_SINGLE_ROUTE
993
    /*
994
     * Implicit withdraw - when the imported kernel route becomes the best one,
995
     * we know that the previous one exported to the kernel was already removed,
996
     * but if we processed the update as usual, we would send withdraw to the
997
     * kernel, which would remove the new imported route instead.
998
     *
999
     * We will remove KRT_INSTALLED flag, which stops such withdraw to be
1000
     * processed in krt_rt_notify() and krt_replace_rte().
1001
     */
1002
    if (e == e->net->routes)
1003
      e->net->n.flags &= ~KRF_INSTALLED;
1004
#endif
1005
    return -1;
1006
  }
1007

    
1008
  if (!KRT_CF->devroutes && (e->attrs->source != RTS_STATIC_DEVICE))
1009
  {
1010
    struct nexthop *nh = &(e->attrs->nh);
1011
    for (; nh; nh = nh->next)
1012
      if (ipa_nonzero(nh->gw))
1013
        break;
1014

    
1015
    if (!nh) /* Gone through all the nexthops and no explicit GW found */
1016
      return -1;
1017
  }
1018

    
1019
  if (!krt_capable(e))
1020
    return -1;
1021

    
1022
  return 0;
1023
}
1024

    
1025
static void
1026
krt_rt_notify(struct proto *P, struct channel *ch UNUSED, net *net,
1027
              rte *new, rte *old, struct ea_list *eattrs)
1028
{
1029
  struct krt_proto *p = (struct krt_proto *) P;
1030

    
1031
  if (config->shutdown)
1032
    return;
1033
  if (!(net->n.flags & KRF_INSTALLED))
1034
    old = NULL;
1035
  if (new)
1036
    net->n.flags |= KRF_INSTALLED;
1037
  else
1038
    net->n.flags &= ~KRF_INSTALLED;
1039
  if (p->initialized)                /* Before first scan we don't touch the routes */
1040
    krt_replace_rte(p, net, new, old, eattrs);
1041
}
1042

    
1043
static void
1044
krt_if_notify(struct proto *P, uint flags, struct iface *iface UNUSED)
1045
{
1046
  struct krt_proto *p = (struct krt_proto *) P;
1047

    
1048
  /*
1049
   * When interface went down, we should remove routes to it. In the ideal world,
1050
   * OS kernel would send us route removal notifications in such cases, but we
1051
   * cannot rely on it as it is often not true. E.g. Linux kernel removes related
1052
   * routes when an interface went down, but it does not notify userspace about
1053
   * that. To be sure, we just schedule a scan to ensure synchronization.
1054
   */
1055

    
1056
  if ((flags & IF_CHANGE_DOWN) && KRT_CF->learn)
1057
    krt_scan_timer_kick(p);
1058
}
1059

    
1060
static void
1061
krt_reload_routes(struct channel *C)
1062
{
1063
  struct krt_proto *p = (void *) C->proto;
1064

    
1065
  /* Although we keep learned routes in krt_table, we rather schedule a scan */
1066

    
1067
  if (KRT_CF->learn)
1068
  {
1069
    p->reload = 1;
1070
    krt_scan_timer_kick(p);
1071
  }
1072
}
1073

    
1074
static void
1075
krt_feed_end(struct channel *C)
1076
{
1077
  struct krt_proto *p = (void *) C->proto;
1078

    
1079
  p->ready = 1;
1080
  krt_scan_timer_kick(p);
1081
}
1082

    
1083

    
1084
static int
1085
krt_rte_same(rte *a, rte *b)
1086
{
1087
  /* src is always KRT_SRC_ALIEN and type is irrelevant */
1088
  return (a->u.krt.proto == b->u.krt.proto) && (a->u.krt.metric == b->u.krt.metric);
1089
}
1090

    
1091

    
1092
/*
1093
 *        Protocol glue
1094
 */
1095

    
1096
struct krt_config *krt_cf;
1097

    
1098
static void
1099
krt_preconfig(struct protocol *P UNUSED, struct config *c)
1100
{
1101
  krt_cf = NULL;
1102
  krt_sys_preconfig(c);
1103
}
1104

    
1105
static void
1106
krt_postconfig(struct proto_config *CF)
1107
{
1108
  struct krt_config *cf = (void *) CF;
1109

    
1110
  if (EMPTY_LIST(CF->channels))
1111
    cf_error("Channel not specified");
1112

    
1113
#ifdef CONFIG_ALL_TABLES_AT_ONCE
1114
  if (krt_cf->scan_time != cf->scan_time)
1115
    cf_error("All kernel syncers must use the same table scan interval");
1116
#endif
1117

    
1118
  struct rtable_config *tab = proto_cf_main_channel(CF)->table;
1119
  if (tab->krt_attached)
1120
    cf_error("Kernel syncer (%s) already attached to table %s", tab->krt_attached->name, tab->name);
1121
  tab->krt_attached = CF;
1122

    
1123
  krt_sys_postconfig(cf);
1124
}
1125

    
1126
static struct proto *
1127
krt_init(struct proto_config *CF)
1128
{
1129
  struct krt_proto *p = proto_new(CF);
1130
  // struct krt_config *cf = (void *) CF;
1131

    
1132
  p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF));
1133

    
1134
  p->p.import_control = krt_import_control;
1135
  p->p.rt_notify = krt_rt_notify;
1136
  p->p.if_notify = krt_if_notify;
1137
  p->p.reload_routes = krt_reload_routes;
1138
  p->p.feed_end = krt_feed_end;
1139
  p->p.make_tmp_attrs = krt_make_tmp_attrs;
1140
  p->p.store_tmp_attrs = krt_store_tmp_attrs;
1141
  p->p.rte_same = krt_rte_same;
1142

    
1143
  krt_sys_init(p);
1144
  return &p->p;
1145
}
1146

    
1147
static int
1148
krt_start(struct proto *P)
1149
{
1150
  struct krt_proto *p = (struct krt_proto *) P;
1151

    
1152
  switch (p->p.net_type)
1153
  {
1154
  case NET_IP4:        p->af = AF_INET; break;
1155
  case NET_IP6:        p->af = AF_INET6; break;
1156
  case NET_MPLS: p->af = AF_MPLS; break;
1157
  default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break;
1158
  }
1159

    
1160
  add_tail(&krt_proto_list, &p->krt_node);
1161

    
1162
#ifdef KRT_ALLOW_LEARN
1163
  krt_learn_init(p);
1164
#endif
1165

    
1166
  if (!krt_sys_start(p))
1167
  {
1168
    rem_node(&p->krt_node);
1169
    return PS_START;
1170
  }
1171

    
1172
  krt_scan_timer_start(p);
1173

    
1174
  if (p->p.gr_recovery && KRT_CF->graceful_restart)
1175
    p->p.main_channel->gr_wait = 1;
1176

    
1177
  return PS_UP;
1178
}
1179

    
1180
static int
1181
krt_shutdown(struct proto *P)
1182
{
1183
  struct krt_proto *p = (struct krt_proto *) P;
1184

    
1185
  krt_scan_timer_stop(p);
1186

    
1187
  /* FIXME we should flush routes even when persist during reconfiguration */
1188
  if (p->initialized && !KRT_CF->persist)
1189
    krt_flush_routes(p);
1190

    
1191
  p->ready = 0;
1192
  p->initialized = 0;
1193

    
1194
  if (p->p.proto_state == PS_START)
1195
    return PS_DOWN;
1196

    
1197
  krt_sys_shutdown(p);
1198
  rem_node(&p->krt_node);
1199

    
1200
  return PS_DOWN;
1201
}
1202

    
1203
static int
1204
krt_reconfigure(struct proto *p, struct proto_config *CF)
1205
{
1206
  struct krt_config *o = (void *) p->cf;
1207
  struct krt_config *n = (void *) CF;
1208

    
1209
  if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF)))
1210
    return 0;
1211

    
1212
  if (!krt_sys_reconfigure((struct krt_proto *) p, n, o))
1213
    return 0;
1214

    
1215
  /* persist, graceful restart need not be the same */
1216
  return o->scan_time == n->scan_time && o->learn == n->learn && o->devroutes == n->devroutes;
1217
}
1218

    
1219
struct proto_config *
1220
krt_init_config(int class)
1221
{
1222
#ifndef CONFIG_MULTIPLE_TABLES
1223
  if (krt_cf)
1224
    cf_error("Kernel protocol already defined");
1225
#endif
1226

    
1227
  krt_cf = (struct krt_config *) proto_config_new(&proto_unix_kernel, class);
1228
  krt_cf->scan_time = 60;
1229

    
1230
  krt_sys_init_config(krt_cf);
1231
  return (struct proto_config *) krt_cf;
1232
}
1233

    
1234
static void
1235
krt_copy_config(struct proto_config *dest, struct proto_config *src)
1236
{
1237
  struct krt_config *d = (struct krt_config *) dest;
1238
  struct krt_config *s = (struct krt_config *) src;
1239

    
1240
  /* Fix sysdep parts */
1241
  krt_sys_copy_config(d, s);
1242
}
1243

    
1244
static int
1245
krt_get_attr(eattr *a, byte *buf, int buflen)
1246
{
1247
  switch (a->id)
1248
  {
1249
  case EA_KRT_SOURCE:
1250
    bsprintf(buf, "source");
1251
    return GA_NAME;
1252

    
1253
  case EA_KRT_METRIC:
1254
    bsprintf(buf, "metric");
1255
    return GA_NAME;
1256

    
1257
  default:
1258
    return krt_sys_get_attr(a, buf, buflen);
1259
  }
1260
}
1261

    
1262

    
1263
struct protocol proto_unix_kernel = {
1264
  .name =                "Kernel",
1265
  .template =                "kernel%d",
1266
  .attr_class =                EAP_KRT,
1267
  .preference =                DEF_PREF_INHERITED,
1268
  .channel_mask =        NB_IP | NB_MPLS,
1269
  .proto_size =                sizeof(struct krt_proto),
1270
  .config_size =        sizeof(struct krt_config),
1271
  .preconfig =                krt_preconfig,
1272
  .postconfig =                krt_postconfig,
1273
  .init =                krt_init,
1274
  .start =                krt_start,
1275
  .shutdown =                krt_shutdown,
1276
  .reconfigure =        krt_reconfigure,
1277
  .copy_config =        krt_copy_config,
1278
  .get_attr =                krt_get_attr,
1279
#ifdef KRT_ALLOW_LEARN
1280
  .dump =                krt_dump,
1281
  .dump_attrs =                krt_dump_attrs,
1282
#endif
1283
};