Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / bsd / krt-sock.c @ 517d05df

History | View | Annotate | Download (24.6 KB)

1
/*
2
 *        BIRD -- BSD Routing Table Syncing
3
 *
4
 *        (c) 2004 Ondrej Filip <feela@network.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#include <stdio.h>
10
#include <stdlib.h>
11
#include <ctype.h>
12
#include <fcntl.h>
13
#include <unistd.h>
14
#include <sys/param.h>
15
#include <sys/types.h>
16
#include <sys/socket.h>
17
#include <sys/sysctl.h>
18
#include <sys/ioctl.h>
19
#include <netinet/in.h>
20
#include <net/route.h>
21
#include <net/if.h>
22
#include <net/if_dl.h>
23

    
24
#undef LOCAL_DEBUG
25

    
26
#include "nest/bird.h"
27
#include "nest/iface.h"
28
#include "nest/route.h"
29
#include "nest/protocol.h"
30
#include "nest/iface.h"
31
#include "sysdep/unix/unix.h"
32
#include "sysdep/unix/krt.h"
33
#include "lib/string.h"
34
#include "lib/socket.h"
35

    
36
const int rt_default_ecmp = 0;
37

    
38
/*
39
 * There are significant differences in multiple tables support between BSD variants.
40
 *
41
 * OpenBSD has table_id field for routes in route socket protocol, therefore all
42
 * tables could be managed by one kernel socket. FreeBSD lacks such field,
43
 * therefore multiple sockets (locked to specific table using SO_SETFIB socket
44
 * option) must be used.
45
 *
46
 * Both FreeBSD and OpenBSD uses separate scans for each table. In OpenBSD,
47
 * table_id is specified explicitly as sysctl scan argument, while in FreeBSD it
48
 * is handled implicitly by changing default table using setfib() syscall.
49
 *
50
 * KRT_SHARED_SOCKET        - use shared kernel socked instead of one for each krt_proto
51
 * KRT_USE_SETFIB_SCAN        - use setfib() for sysctl() route scan
52
 * KRT_USE_SETFIB_SOCK        - use SO_SETFIB socket option for kernel sockets
53
 * KRT_USE_SYSCTL_7        - use 7-th arg of sysctl() as table id for route scans
54
 * KRT_USE_SYSCTL_NET_FIBS - use net.fibs sysctl() for dynamic max number of fibs
55
 */
56

    
57
#ifdef __FreeBSD__
58
#define KRT_MAX_TABLES 256
59
#define KRT_USE_SETFIB_SCAN
60
#define KRT_USE_SETFIB_SOCK
61
#define KRT_USE_SYSCTL_NET_FIBS
62
#endif
63

    
64
#ifdef __OpenBSD__
65
#define KRT_MAX_TABLES (RT_TABLEID_MAX+1)
66
#define KRT_SHARED_SOCKET
67
#define KRT_USE_SYSCTL_7
68
#endif
69

    
70
#ifndef KRT_MAX_TABLES
71
#define KRT_MAX_TABLES 1
72
#endif
73

    
74

    
75
/* Dynamic max number of tables */
76

    
77
int krt_max_tables;
78

    
79
#ifdef KRT_USE_SYSCTL_NET_FIBS
80

    
81
static int
82
krt_get_max_tables(void)
83
{
84
  int fibs;
85
  size_t fibs_len = sizeof(fibs);
86

    
87
  if (sysctlbyname("net.fibs", &fibs, &fibs_len, NULL, 0) < 0)
88
  {
89
    log(L_WARN "KRT: unable to get max number of fib tables: %m");
90
    return 1;
91
  }
92

    
93
  return MIN(fibs, KRT_MAX_TABLES);
94
}
95

    
96
#else
97

    
98
static int
99
krt_get_max_tables(void)
100
{
101
  return KRT_MAX_TABLES;
102
}
103

    
104
#endif /* KRT_USE_SYSCTL_NET_FIBS */
105

    
106

    
107
/* setfib() syscall for FreeBSD scans */
108

    
109
#ifdef KRT_USE_SETFIB_SCAN
110

    
111
/*
112
static int krt_default_fib;
113

114
static int
115
krt_get_active_fib(void)
116
{
117
  int fib;
118
  size_t fib_len = sizeof(fib);
119

120
  if (sysctlbyname("net.my_fibnum", &fib, &fib_len, NULL, 0) < 0)
121
  {
122
    log(L_WARN "KRT: unable to get active fib number: %m");
123
    return 0;
124
  }
125

126
  return fib;
127
}
128
*/
129

    
130
extern int setfib(int fib);
131

    
132
#endif /* KRT_USE_SETFIB_SCAN */
133

    
134

    
135
/* table_id -> krt_proto map */
136

    
137
#ifdef KRT_SHARED_SOCKET
138
static struct krt_proto *krt_table_map[KRT_MAX_TABLES][2];
139
#endif
140

    
141

    
142
/* Route socket message processing */
143

    
144
int
145
krt_capable(rte *e)
146
{
147
  rta *a = e->attrs;
148

    
149
  return
150
    ((a->dest == RTD_UNICAST && !a->nh.next) /* No multipath support */
151
#ifdef RTF_REJECT
152
     || a->dest == RTD_UNREACHABLE
153
#endif
154
#ifdef RTF_BLACKHOLE
155
     || a->dest == RTD_BLACKHOLE
156
#endif
157
     );
158
}
159

    
160
#ifndef RTAX_MAX
161
#define RTAX_MAX 8
162
#endif
163

    
164
struct ks_msg
165
{
166
  struct rt_msghdr rtm;
167
  struct sockaddr_storage buf[RTAX_MAX];
168
} PACKED;
169

    
170
#define ROUNDUP(a) \
171
        ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
172

    
173
#define NEXTADDR(w, u) \
174
        if (msg.rtm.rtm_addrs & (w)) {\
175
          l = ROUNDUP(((struct sockaddr *)&(u))->sa_len);\
176
          memmove(body, &(u), l); body += l;}
177

    
178
#define GETADDR(p, F) \
179
  bzero(p, sizeof(*p));\
180
  if ((addrs & (F)) && ((struct sockaddr *)body)->sa_len) {\
181
    uint l = ROUNDUP(((struct sockaddr *)body)->sa_len);\
182
    memcpy(p, body, (l > sizeof(*p) ? sizeof(*p) : l));\
183
    body += l;}
184

    
185
static int
186
krt_send_route(struct krt_proto *p, int cmd, rte *e)
187
{
188
  net *net = e->net;
189
  rta *a = e->attrs;
190
  static int msg_seq;
191
  struct iface *j, *i = a->nh.iface;
192
  int l;
193
  struct ks_msg msg;
194
  char *body = (char *)msg.buf;
195
  sockaddr gate, mask, dst;
196

    
197
  DBG("krt-sock: send %I/%d via %I\n", net->n.prefix, net->n.pxlen, a->gw);
198

    
199
  bzero(&msg,sizeof (struct rt_msghdr));
200
  msg.rtm.rtm_version = RTM_VERSION;
201
  msg.rtm.rtm_type = cmd;
202
  msg.rtm.rtm_seq = msg_seq++;
203
  msg.rtm.rtm_addrs = RTA_DST;
204
  msg.rtm.rtm_flags = RTF_UP | RTF_PROTO1;
205

    
206
  /* XXXX */
207
  if (net_pxlen(net->n.addr) == net_max_prefix_length[net->n.addr->type])
208
    msg.rtm.rtm_flags |= RTF_HOST;
209
  else
210
    msg.rtm.rtm_addrs |= RTA_NETMASK;
211

    
212
#ifdef KRT_SHARED_SOCKET
213
  msg.rtm.rtm_tableid = KRT_CF->sys.table_id;
214
#endif
215

    
216
#ifdef RTF_REJECT
217
  if(a->dest == RTD_UNREACHABLE)
218
    msg.rtm.rtm_flags |= RTF_REJECT;
219
#endif
220
#ifdef RTF_BLACKHOLE
221
  if(a->dest == RTD_BLACKHOLE)
222
    msg.rtm.rtm_flags |= RTF_BLACKHOLE;
223
#endif
224

    
225
  /*
226
   * This is really very nasty, but I'm not able to add reject/blackhole route
227
   * without gateway address.
228
   */
229
  if (!i)
230
  {
231
    WALK_LIST(j, iface_list)
232
    {
233
      if (j->flags & IF_LOOPBACK)
234
      {
235
        i = j;
236
        break;
237
      }
238
    }
239

    
240
    if (!i)
241
    {
242
      log(L_ERR "KRT: Cannot find loopback iface");
243
      return -1;
244
    }
245
  }
246

    
247
  int af = AF_UNSPEC;
248

    
249
  switch (net->n.addr->type) {
250
    case NET_IP4:
251
      af = AF_INET;
252
      break;
253
    case NET_IP6:
254
      af = AF_INET6;
255
      break;
256
    default:
257
      log(L_ERR "KRT: Not sending route %N to kernel", net->n.addr);
258
      return -1;
259
  }
260

    
261
  sockaddr_fill(&dst,  af, net_prefix(net->n.addr), NULL, 0);
262
  sockaddr_fill(&mask, af, net_pxmask(net->n.addr), NULL, 0);
263

    
264
  switch (a->dest)
265
  {
266
  case RTD_UNICAST:
267
    if (ipa_nonzero(a->nh.gw))
268
    {
269
      ip_addr gw = a->nh.gw;
270

    
271
      /* Embed interface ID to link-local address */
272
      if (ipa_is_link_local(gw))
273
        _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff);
274

    
275
      sockaddr_fill(&gate, af, gw, NULL, 0);
276
      msg.rtm.rtm_flags |= RTF_GATEWAY;
277
      msg.rtm.rtm_addrs |= RTA_GATEWAY;
278
      break;
279
    }
280

    
281
#ifdef RTF_REJECT
282
  case RTD_UNREACHABLE:
283
#endif
284
#ifdef RTF_BLACKHOLE
285
  case RTD_BLACKHOLE:
286
#endif
287
  {
288
    /* Fallback for all other valid cases */
289

    
290
#ifdef RTF_CLONING
291
    if (cmd == RTM_ADD && (i->flags & IF_MULTIACCESS) != IF_MULTIACCESS)        /* PTP */
292
      msg.rtm.rtm_flags |= RTF_CLONING;
293
#endif
294

    
295
    struct ifa *addr = (net->n.addr->type == NET_IP4) ? i->addr4 : (i->addr6 ?: i->llv6);
296

    
297
    if (!addr)
298
    {
299
      log(L_ERR "KRT: interface %s has no IP addess", i->name);
300
      return -1;
301
    }
302

    
303
    sockaddr_fill(&gate, af, addr->ip, i, 0);
304
    msg.rtm.rtm_addrs |= RTA_GATEWAY;
305
    break;
306
  }
307

    
308
  default:
309
    bug("krt-sock: unknown flags, but not filtered");
310
  }
311

    
312
  msg.rtm.rtm_index = i->index;
313

    
314
  NEXTADDR(RTA_DST, dst);
315
  NEXTADDR(RTA_GATEWAY, gate);
316
  NEXTADDR(RTA_NETMASK, mask);
317

    
318
  l = body - (char *)&msg;
319
  msg.rtm.rtm_msglen = l;
320

    
321
  if ((l = write(p->sys.sk->fd, (char *)&msg, l)) < 0) {
322
    log(L_ERR "KRT: Error sending route %N to kernel: %m", net->n.addr);
323
    return -1;
324
  }
325

    
326
  return 0;
327
}
328

    
329
void
330
krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old,
331
                struct ea_list *eattrs UNUSED)
332
{
333
  int err = 0;
334

    
335
  if (old)
336
    krt_send_route(p, RTM_DELETE, old);
337

    
338
  if (new)
339
    err = krt_send_route(p, RTM_ADD, new);
340

    
341
  if (err < 0)
342
    n->n.flags |= KRF_SYNC_ERROR;
343
  else
344
    n->n.flags &= ~KRF_SYNC_ERROR;
345
}
346

    
347
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
348

    
349
static void
350
krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan)
351
{
352
  /* p is NULL iff KRT_SHARED_SOCKET and !scan */
353

    
354
  int ipv6;
355
  rte *e;
356
  net *net;
357
  sockaddr dst, gate, mask;
358
  ip_addr idst, igate, imask;
359
  net_addr ndst;
360
  void *body = (char *)msg->buf;
361
  int new = (msg->rtm.rtm_type != RTM_DELETE);
362
  char *errmsg = "KRT: Invalid route received";
363
  int flags = msg->rtm.rtm_flags;
364
  int addrs = msg->rtm.rtm_addrs;
365
  int src;
366
  byte src2;
367

    
368
  if (!(flags & RTF_UP) && scan)
369
    SKIP("not up in scan\n");
370

    
371
  if (!(flags & RTF_DONE) && !scan)
372
    SKIP("not done in async\n");
373

    
374
  if (flags & RTF_LLINFO)
375
    SKIP("link-local\n");
376

    
377
  GETADDR(&dst, RTA_DST);
378
  GETADDR(&gate, RTA_GATEWAY);
379
  GETADDR(&mask, RTA_NETMASK);
380

    
381
  switch (dst.sa.sa_family) {
382
    case AF_INET:
383
      ipv6 = 0;
384
      break;
385
    case AF_INET6:
386
      ipv6 = 1;
387
      break;
388
    default:
389
      SKIP("invalid DST");
390
  }
391

    
392
  /* We do not test family for RTA_NETMASK, because BSD sends us
393
     some strange values, but interpreting them as IPv4/IPv6 works */
394
  mask.sa.sa_family = dst.sa.sa_family;
395

    
396
  idst  = ipa_from_sa(&dst);
397
  imask = ipa_from_sa(&mask);
398
  igate = (gate.sa.sa_family == dst.sa.sa_family) ? ipa_from_sa(&gate) : IPA_NONE;
399

    
400
#ifdef KRT_SHARED_SOCKET
401
  if (!scan)
402
  {
403
    int table_id = msg->rtm.rtm_tableid;
404
    p = (table_id < KRT_MAX_TABLES) ? krt_table_map[table_id][ipv6] : NULL;
405

    
406
    if (!p)
407
      SKIP("unknown table id %d\n", table_id);
408
  }
409
#endif
410
  if ((!ipv6) && (p->p.main_channel->table->addr_type != NET_IP4))
411
    SKIP("reading only IPv4 routes");
412
  if (  ipv6  && (p->p.main_channel->table->addr_type != NET_IP6))
413
    SKIP("reading only IPv6 routes");
414

    
415
  int c = ipa_classify_net(idst);
416
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
417
    SKIP("strange class/scope\n");
418

    
419
  int pxlen;
420
  if (ipv6)
421
    pxlen = (flags & RTF_HOST) ? IP6_MAX_PREFIX_LENGTH : ip6_masklen(&ipa_to_ip6(imask));
422
  else
423
    pxlen = (flags & RTF_HOST) ? IP4_MAX_PREFIX_LENGTH : ip4_masklen(ipa_to_ip4(imask));
424

    
425
  if (pxlen < 0)
426
    { log(L_ERR "%s (%I) - netmask %I", errmsg, idst, imask); return; }
427

    
428
  if (ipv6)
429
    net_fill_ip6(&ndst, ipa_to_ip6(idst), pxlen);
430
  else
431
    net_fill_ip4(&ndst, ipa_to_ip4(idst), pxlen);
432

    
433
  if ((flags & RTF_GATEWAY) && ipa_zero(igate))
434
    { log(L_ERR "%s (%N) - missing gateway", errmsg, ndst); return; }
435

    
436
  u32 self_mask = RTF_PROTO1;
437
  u32 alien_mask = RTF_STATIC | RTF_PROTO1 | RTF_GATEWAY;
438

    
439
  src2 = (flags & RTF_STATIC) ? 1 : 0;
440
  src2 |= (flags & RTF_PROTO1) ? 2 : 0;
441

    
442
#ifdef RTF_PROTO2
443
  alien_mask |= RTF_PROTO2;
444
  src2 |= (flags & RTF_PROTO2) ? 4 : 0;
445
#endif
446

    
447
#ifdef RTF_PROTO3
448
  alien_mask |= RTF_PROTO3;
449
  src2 |= (flags & RTF_PROTO3) ? 8 : 0;
450
#endif
451

    
452
#ifdef RTF_REJECT
453
  alien_mask |= RTF_REJECT;
454
#endif
455

    
456
#ifdef RTF_BLACKHOLE
457
  alien_mask |= RTF_BLACKHOLE;
458
#endif
459

    
460
  if (flags & (RTF_DYNAMIC | RTF_MODIFIED))
461
    src = KRT_SRC_REDIRECT;
462
  else if (flags & self_mask)
463
    {
464
      if (!scan)
465
        SKIP("echo\n");
466
      src = KRT_SRC_BIRD;
467
    }
468
  else if (flags & alien_mask)
469
    src = KRT_SRC_ALIEN;
470
  else
471
    src = KRT_SRC_KERNEL;
472

    
473
  net = net_get(p->p.main_channel->table, &ndst);
474

    
475
  rta a = {
476
    .src = p->p.main_source,
477
    .source = RTS_INHERIT,
478
    .scope = SCOPE_UNIVERSE,
479
  };
480

    
481
  /* reject/blackhole routes have also set RTF_GATEWAY,
482
     we wil check them first. */
483

    
484
#ifdef RTF_REJECT
485
  if(flags & RTF_REJECT) {
486
    a.dest = RTD_UNREACHABLE;
487
    goto done;
488
  }
489
#endif
490

    
491
#ifdef RTF_BLACKHOLE
492
  if(flags & RTF_BLACKHOLE) {
493
    a.dest = RTD_BLACKHOLE;
494
    goto done;
495
  }
496
#endif
497

    
498
  a.nh.iface = if_find_by_index(msg->rtm.rtm_index);
499
  if (!a.nh.iface)
500
    {
501
      log(L_ERR "KRT: Received route %N with unknown ifindex %u",
502
          net->n.addr, msg->rtm.rtm_index);
503
      return;
504
    }
505

    
506
  a.dest = RTD_UNICAST;
507
  if (flags & RTF_GATEWAY)
508
  {
509
    neighbor *ng;
510
    a.nh.gw = igate;
511

    
512
    /* Clean up embedded interface ID returned in link-local address */
513
    if (ipa_is_link_local(a.nh.gw))
514
      _I0(a.nh.gw) = 0xfe800000;
515

    
516
    ng = neigh_find2(&p->p, &a.nh.gw, a.nh.iface, 0);
517
    if (!ng || (ng->scope == SCOPE_HOST))
518
      {
519
        /* Ignore routes with next-hop 127.0.0.1, host routes with such
520
           next-hop appear on OpenBSD for address aliases. */
521
        if (ipa_classify(a.nh.gw) == (IADDR_HOST | SCOPE_HOST))
522
          return;
523

    
524
        log(L_ERR "KRT: Received route %N with strange next-hop %I",
525
            net->n.addr, a.nh.gw);
526
        return;
527
      }
528
  }
529

    
530
 done:
531
  e = rte_get_temp(&a);
532
  e->net = net;
533
  e->u.krt.src = src;
534
  e->u.krt.proto = src2;
535
  e->u.krt.seen = 0;
536
  e->u.krt.best = 0;
537
  e->u.krt.metric = 0;
538

    
539
  if (scan)
540
    krt_got_route(p, e);
541
  else
542
    krt_got_route_async(p, e, new);
543
}
544

    
545
static void
546
krt_read_ifannounce(struct ks_msg *msg)
547
{
548
  struct if_announcemsghdr *ifam = (struct if_announcemsghdr *)&msg->rtm;
549

    
550
  if (ifam->ifan_what == IFAN_ARRIVAL)
551
  {
552
    /* Not enough info to create the iface, so we just trigger iface scan */
553
    kif_request_scan();
554
  }
555
  else if (ifam->ifan_what == IFAN_DEPARTURE)
556
  {
557
    struct iface *iface = if_find_by_index(ifam->ifan_index);
558

    
559
    /* Interface is destroyed */
560
    if (!iface)
561
    {
562
      DBG("KRT: unknown interface (%s, #%d) going down. Ignoring\n", ifam->ifan_name, ifam->ifan_index);
563
      return;
564
    }
565

    
566
    if_delete(iface);
567
  }
568

    
569
  DBG("KRT: IFANNOUNCE what: %d index %d name %s\n", ifam->ifan_what, ifam->ifan_index, ifam->ifan_name);
570
}
571

    
572
static void
573
krt_read_ifinfo(struct ks_msg *msg, int scan)
574
{
575
  struct if_msghdr *ifm = (struct if_msghdr *)&msg->rtm;
576
  void *body = (void *)(ifm + 1);
577
  struct sockaddr_dl *dl = NULL;
578
  uint i;
579
  struct iface *iface = NULL, f = {};
580
  int fl = ifm->ifm_flags;
581
  int nlen = 0;
582

    
583
  for (i = 1; i<=RTA_IFP; i <<= 1)
584
  {
585
    if (i & ifm->ifm_addrs)
586
    {
587
      if (i == RTA_IFP)
588
      {
589
        dl = (struct sockaddr_dl *)body;
590
        break;
591
      }
592
      body += ROUNDUP(((struct sockaddr *)&(body))->sa_len);
593
    }
594
  }
595

    
596
  if (dl && (dl->sdl_family != AF_LINK))
597
  {
598
    log(L_WARN "Ignoring strange IFINFO");
599
    return;
600
  }
601

    
602
  if (dl)
603
    nlen = MIN(sizeof(f.name)-1, dl->sdl_nlen);
604

    
605
  /* Note that asynchronous IFINFO messages do not contain iface
606
     name, so we have to found an existing iface by iface index */
607

    
608
  iface = if_find_by_index(ifm->ifm_index);
609
  if (!iface)
610
  {
611
    /* New interface */
612
    if (!dl)
613
      return;        /* No interface name, ignoring */
614

    
615
    memcpy(f.name, dl->sdl_data, nlen);
616
    DBG("New interface '%s' found\n", f.name);
617
  }
618
  else if (dl && memcmp(iface->name, dl->sdl_data, nlen))
619
  {
620
    /* Interface renamed */
621
    if_delete(iface);
622
    memcpy(f.name, dl->sdl_data, nlen);
623
  }
624
  else
625
  {
626
    /* Old interface */
627
    memcpy(f.name, iface->name, sizeof(f.name));
628
  }
629

    
630
  f.index = ifm->ifm_index;
631
  f.mtu = ifm->ifm_data.ifi_mtu;
632

    
633
  if (fl & IFF_UP)
634
    f.flags |= IF_ADMIN_UP;
635
  if (ifm->ifm_data.ifi_link_state != LINK_STATE_DOWN)
636
    f.flags |= IF_LINK_UP;          /* up or unknown */
637
  if (fl & IFF_LOOPBACK)            /* Loopback */
638
    f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
639
  else if (fl & IFF_POINTOPOINT)    /* PtP */
640
    f.flags |= IF_MULTICAST;
641
  else if (fl & IFF_BROADCAST)      /* Broadcast */
642
    f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
643
  else
644
    f.flags |= IF_MULTIACCESS;      /* NBMA */
645

    
646
  iface = if_update(&f);
647

    
648
  if (!scan)
649
    if_end_partial_update(iface);
650
}
651

    
652
static void
653
krt_read_addr(struct ks_msg *msg, int scan)
654
{
655
  struct ifa_msghdr *ifam = (struct ifa_msghdr *)&msg->rtm;
656
  void *body = (void *)(ifam + 1);
657
  sockaddr addr, mask, brd;
658
  struct iface *iface = NULL;
659
  struct ifa ifa;
660
  struct sockaddr null;
661
  ip_addr iaddr, imask, ibrd;
662
  int addrs = ifam->ifam_addrs;
663
  int scope, masklen = -1;
664
  int new = (ifam->ifam_type == RTM_NEWADDR);
665

    
666
  /* Strange messages with zero (invalid) ifindex appear on OpenBSD */
667
  if (ifam->ifam_index == 0)
668
    return;
669

    
670
  if(!(iface = if_find_by_index(ifam->ifam_index)))
671
  {
672
    log(L_ERR "KIF: Received address message for unknown interface %d", ifam->ifam_index);
673
    return;
674
  }
675

    
676
  GETADDR (&null, RTA_DST);
677
  GETADDR (&null, RTA_GATEWAY);
678
  GETADDR (&mask, RTA_NETMASK);
679
  GETADDR (&null, RTA_GENMASK);
680
  GETADDR (&null, RTA_IFP);
681
  GETADDR (&addr, RTA_IFA);
682
  GETADDR (&null, RTA_AUTHOR);
683
  GETADDR (&brd, RTA_BRD);
684

    
685
  /* Is addr family IP4 or IP6? */
686
  int ipv6;
687
  switch (addr.sa.sa_family) {
688
    case AF_INET: ipv6 = 0; break;
689
    case AF_INET6: ipv6 = 1; break;
690
    default: return;
691
  }
692

    
693
  /* We do not test family for RTA_NETMASK, because BSD sends us
694
     some strange values, but interpreting them as IPv4/IPv6 works */
695
  mask.sa.sa_family = addr.sa.sa_family;
696

    
697
  iaddr = ipa_from_sa(&addr);
698
  imask = ipa_from_sa(&mask);
699
  ibrd  = ipa_from_sa(&brd);
700

    
701
  if ((ipv6 ? (masklen = ip6_masklen(&ipa_to_ip6(imask))) : (masklen = ip4_masklen(ipa_to_ip4(imask)))) < 0)
702
  {
703
    log(L_ERR "KIF: Invalid mask %I for %s", imask, iface->name);
704
    return;
705
  }
706

    
707
  /* Clean up embedded interface ID returned in link-local address */
708

    
709
  if (ipa_is_link_local(iaddr))
710
    _I0(iaddr) = 0xfe800000;
711

    
712
  if (ipa_is_link_local(ibrd))
713
    _I0(ibrd) = 0xfe800000;
714

    
715

    
716
  bzero(&ifa, sizeof(ifa));
717
  ifa.iface = iface;
718
  ifa.ip = iaddr;
719

    
720
  scope = ipa_classify(ifa.ip);
721
  if (scope < 0)
722
  {
723
    log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, iface->name);
724
    return;
725
  }
726
  ifa.scope = scope & IADDR_SCOPE_MASK;
727

    
728
  if (masklen < (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH))
729
  {
730
    net_fill_ipa(&ifa.prefix, ifa.ip, masklen);
731
    net_normalize(&ifa.prefix);
732

    
733
    if (masklen == ((ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH) - 1))
734
      ifa.opposite = ipa_opposite_m1(ifa.ip);
735

    
736
    if ((!ipv6) && (masklen == IP4_MAX_PREFIX_LENGTH - 2))
737
      ifa.opposite = ipa_opposite_m2(ifa.ip);
738

    
739
    if (iface->flags & IF_BROADCAST)
740
      ifa.brd = ibrd;
741

    
742
    if (!(iface->flags & IF_MULTIACCESS))
743
      ifa.opposite = ibrd;
744
  }
745
  else if (!(iface->flags & IF_MULTIACCESS) && ipa_nonzero(ibrd))
746
  {
747
    net_fill_ipa(&ifa.prefix, ibrd, (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH));
748
    ifa.opposite = ibrd;
749
    ifa.flags |= IA_PEER;
750
  }
751
  else
752
  {
753
    net_fill_ipa(&ifa.prefix, ifa.ip, (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH));
754
    ifa.flags |= IA_HOST;
755
  }
756

    
757
  if (new)
758
    ifa_update(&ifa);
759
  else
760
    ifa_delete(&ifa);
761

    
762
  if (!scan)
763
    if_end_partial_update(iface);
764
}
765

    
766
static void
767
krt_read_msg(struct proto *p, struct ks_msg *msg, int scan)
768
{
769
  /* p is NULL iff KRT_SHARED_SOCKET and !scan */
770

    
771
  switch (msg->rtm.rtm_type)
772
  {
773
    case RTM_GET:
774
      if(!scan) return;
775
    case RTM_ADD:
776
    case RTM_DELETE:
777
    case RTM_CHANGE:
778
      krt_read_route(msg, (struct krt_proto *)p, scan);
779
      break;
780
    case RTM_IFANNOUNCE:
781
      krt_read_ifannounce(msg);
782
      break;
783
    case RTM_IFINFO:
784
      krt_read_ifinfo(msg, scan);
785
      break;
786
    case RTM_NEWADDR:
787
    case RTM_DELADDR:
788
      krt_read_addr(msg, scan);
789
      break;
790
    default:
791
      break;
792
  }
793
}
794

    
795

    
796
/* Sysctl based scans */
797

    
798
static byte *krt_buffer;
799
static size_t krt_buflen, krt_bufmin;
800
static struct proto *krt_buffer_owner;
801

    
802
static byte *
803
krt_buffer_update(struct proto *p, size_t *needed)
804
{
805
  size_t req = *needed;
806

    
807
  if ((req > krt_buflen) ||
808
      ((p == krt_buffer_owner) && (req < krt_bufmin)))
809
  {
810
    /* min buflen is 32 kB, step is 8 kB, or 128 kB if > 1 MB */
811
    size_t step = (req < 0x100000) ? 0x2000 : 0x20000;
812
    krt_buflen = (req < 0x6000) ? 0x8000 : (req + step);
813
    krt_bufmin = (req < 0x8000) ? 0 : (req - 2*step);
814

    
815
    if (krt_buffer) 
816
      mb_free(krt_buffer);
817
    krt_buffer = mb_alloc(krt_pool, krt_buflen);
818
    krt_buffer_owner = p;
819
  }
820

    
821
  *needed = krt_buflen;
822
  return krt_buffer;
823
}
824

    
825
static void
826
krt_buffer_release(struct proto *p)
827
{
828
  if (p == krt_buffer_owner)
829
  {
830
    mb_free(krt_buffer);
831
    krt_buffer = NULL;
832
    krt_buflen = 0;
833
    krt_buffer_owner = 0;
834
  }
835
}
836

    
837
static void
838
krt_sysctl_scan(struct proto *p, int cmd, int table_id)
839
{
840
  byte *buf, *next;
841
  int mib[7], mcnt;
842
  size_t needed;
843
  struct ks_msg *m;
844
  int retries = 3;
845
  int rv;
846

    
847
  mib[0] = CTL_NET;
848
  mib[1] = PF_ROUTE;
849
  mib[2] = 0;
850
  mib[3] = 0; // Set AF to 0 for all available families
851
  mib[4] = cmd;
852
  mib[5] = 0;
853
  mcnt = 6;
854

    
855
#ifdef KRT_USE_SYSCTL_7
856
  if (table_id >= 0)
857
  {
858
    mib[6] = table_id;
859
    mcnt = 7;
860
  }
861
#endif
862

    
863
#ifdef KRT_USE_SETFIB_SCAN
864
  if (table_id > 0)
865
    if (setfib(table_id) < 0)
866
    {
867
      log(L_ERR "KRT: setfib(%d) failed: %m", table_id);
868
      return;
869
    }
870
#endif
871

    
872
 try:
873
  rv = sysctl(mib, mcnt, NULL, &needed, NULL, 0);
874
  if (rv < 0)
875
  {
876
    /* OpenBSD returns EINVAL for not yet used tables */
877
    if ((errno == EINVAL) && (table_id > 0))
878
      goto exit;
879

    
880
    log(L_ERR "KRT: Route scan estimate failed: %m");
881
    goto exit;
882
  }
883

    
884
  /* The table is empty */
885
  if (needed == 0)
886
    goto exit;
887

    
888
  buf = krt_buffer_update(p, &needed);
889

    
890
  rv = sysctl(mib, mcnt, buf, &needed, NULL, 0);
891
  if (rv < 0)
892
  {
893
    /* The buffer size changed since last sysctl ('needed' is not changed) */
894
    if ((errno == ENOMEM) && retries--)
895
      goto try;
896

    
897
    log(L_ERR "KRT: Route scan failed: %m");
898
    goto exit;
899
  }
900

    
901
#ifdef KRT_USE_SETFIB_SCAN
902
  if (table_id > 0)
903
    if (setfib(0) < 0)
904
      die("KRT: setfib(%d) failed: %m", 0);
905
#endif
906

    
907
  /* Process received messages */
908
  for (next = buf; next < (buf + needed); next += m->rtm.rtm_msglen)
909
  {
910
    m = (struct ks_msg *)next;
911
    krt_read_msg(p, m, 1);
912
  }
913

    
914
  return;
915

    
916
 exit:
917
  krt_buffer_release(p);
918

    
919
#ifdef KRT_USE_SETFIB_SCAN
920
  if (table_id > 0)
921
    if (setfib(0) < 0)
922
      die("KRT: setfib(%d) failed: %m", 0);
923
#endif
924
}
925

    
926
void
927
krt_do_scan(struct krt_proto *p)
928
{
929
  krt_sysctl_scan(&p->p, NET_RT_DUMP, KRT_CF->sys.table_id);
930
}
931

    
932
void
933
kif_do_scan(struct kif_proto *p)
934
{
935
  if_start_update();
936
  krt_sysctl_scan(&p->p, NET_RT_IFLIST, -1);
937
  if_end_update();
938
}
939

    
940

    
941
/* Kernel sockets */
942

    
943
static int
944
krt_sock_hook(sock *sk, uint size UNUSED)
945
{
946
  struct ks_msg msg;
947
  int l = read(sk->fd, (char *)&msg, sizeof(msg));
948

    
949
  if (l <= 0)
950
    log(L_ERR "krt-sock: read failed");
951
  else
952
    krt_read_msg((struct proto *) sk->data, &msg, 0);
953

    
954
  return 0;
955
}
956

    
957
static void
958
krt_sock_err_hook(sock *sk, int e UNUSED)
959
{
960
  krt_sock_hook(sk, 0);
961
}
962

    
963
static sock *
964
krt_sock_open(pool *pool, void *data, int table_id UNUSED)
965
{
966
  sock *sk;
967
  int fd;
968

    
969
  fd = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
970
  if (fd < 0)
971
    die("Cannot open kernel socket for routes");
972

    
973
#ifdef KRT_USE_SETFIB_SOCK
974
  if (table_id > 0)
975
  {
976
    if (setsockopt(fd, SOL_SOCKET, SO_SETFIB, &table_id, sizeof(table_id)) < 0)
977
      die("Cannot set FIB %d for kernel socket: %m", table_id);
978
  }
979
#endif
980

    
981
  sk = sk_new(pool);
982
  sk->type = SK_MAGIC;
983
  sk->rx_hook = krt_sock_hook;
984
  sk->err_hook = krt_sock_err_hook;
985
  sk->fd = fd;
986
  sk->data = data;
987

    
988
  if (sk_open(sk) < 0)
989
    bug("krt-sock: sk_open failed");
990

    
991
  return sk;
992
}
993

    
994
static u32 krt_table_cf[(KRT_MAX_TABLES+31) / 32][2];
995

    
996
#ifdef KRT_SHARED_SOCKET
997

    
998
static sock *krt_sock;
999
static int krt_sock_count;
1000

    
1001

    
1002
static void
1003
krt_sock_open_shared(void)
1004
{
1005
  if (!krt_sock_count)
1006
    krt_sock = krt_sock_open(krt_pool, NULL, -1);
1007
  
1008
  krt_sock_count++;
1009
}
1010

    
1011
static void
1012
krt_sock_close_shared(void)
1013
{
1014
  krt_sock_count--;
1015

    
1016
  if (!krt_sock_count)
1017
  {
1018
    rfree(krt_sock);
1019
    krt_sock = NULL;
1020
  }
1021
}
1022

    
1023
int
1024
krt_sys_start(struct krt_proto *p)
1025
{
1026
  int id = KRT_CF->sys.table_id;
1027

    
1028
  if (krt_table_cf[id/32][!!(p->af == AF_INET6)] & (1 << (id%32)))
1029
    {
1030
      log(L_ERR "%s: Multiple kernel syncers defined for table #%d", p->p.name, id);
1031
      return 0;
1032
    }
1033

    
1034
  krt_table_cf[id/32][!!(p->af == AF_INET6)] |= (1 << (id%32));
1035

    
1036
  krt_table_map[KRT_CF->sys.table_id][!!(p->af == AF_INET6)] = p;
1037

    
1038
  krt_sock_open_shared();
1039
  p->sys.sk = krt_sock;
1040

    
1041
  return 1;
1042
}
1043

    
1044
void
1045
krt_sys_shutdown(struct krt_proto *p)
1046
{
1047
  krt_table_cf[(KRT_CF->sys.table_id)/32][!!(p->af == AF_INET6)] &= ~(1 << ((KRT_CF->sys.table_id)%32));
1048

    
1049
  krt_sock_close_shared();
1050
  p->sys.sk = NULL;
1051

    
1052
  krt_table_map[KRT_CF->sys.table_id][!!(p->af == AF_INET6)] = NULL;
1053

    
1054
  krt_buffer_release(&p->p);
1055
}
1056

    
1057
#else
1058

    
1059
int
1060
krt_sys_start(struct krt_proto *p)
1061
{
1062
  int id = KRT_CF->sys.table_id;
1063

    
1064
  if (krt_table_cf[id/32][!!(p->af == AF_INET6)] & (1 << (id%32)))
1065
    {
1066
      log(L_ERR "%s: Multiple kernel syncers defined for table #%d", p->p.name, id);
1067
      return 0;
1068
    }
1069

    
1070
  krt_table_cf[id/32][!!(p->af == AF_INET6)] |= (1 << (id%32));
1071

    
1072
  p->sys.sk = krt_sock_open(p->p.pool, p, KRT_CF->sys.table_id);
1073
  return 1;
1074
}
1075

    
1076
void
1077
krt_sys_shutdown(struct krt_proto *p)
1078
{
1079
  krt_table_cf[(KRT_CF->sys.table_id)/32][!!(p->af == AF_INET6)] &= ~(1 << ((KRT_CF->sys.table_id)%32));
1080

    
1081
  rfree(p->sys.sk);
1082
  p->sys.sk = NULL;
1083

    
1084
  krt_buffer_release(&p->p);
1085
}
1086

    
1087
#endif /* KRT_SHARED_SOCKET */
1088

    
1089

    
1090
/* KRT configuration callbacks */
1091

    
1092
int
1093
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
1094
{
1095
  return n->sys.table_id == o->sys.table_id;
1096
}
1097

    
1098
void
1099
krt_sys_preconfig(struct config *c UNUSED)
1100
{
1101
  krt_max_tables = krt_get_max_tables();
1102
  bzero(&krt_table_cf, sizeof(krt_table_cf));
1103
}
1104

    
1105
void krt_sys_init_config(struct krt_config *c)
1106
{
1107
  c->sys.table_id = 0; /* Default table */
1108
}
1109

    
1110
void krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
1111
{
1112
  d->sys.table_id = s->sys.table_id;
1113
}
1114

    
1115

    
1116
/* KIF misc code */
1117

    
1118
void
1119
kif_sys_start(struct kif_proto *p UNUSED)
1120
{
1121
}
1122

    
1123
void
1124
kif_sys_shutdown(struct kif_proto *p)
1125
{
1126
  krt_buffer_release(&p->p);
1127
}
1128

    
1129
int
1130
kif_update_sysdep_addr(struct iface *i)
1131
{
1132
  static int fd = -1;
1133

    
1134
  if (fd < 0)
1135
    fd = socket(AF_INET, SOCK_DGRAM, 0);
1136

    
1137
  struct ifreq ifr;
1138
  memset(&ifr, 0, sizeof(ifr));
1139
  strncpy(ifr.ifr_name, i->name, IFNAMSIZ);
1140

    
1141
  int rv = ioctl(fd, SIOCGIFADDR, (char *) &ifr);
1142
  if (rv < 0)
1143
    return 0;
1144

    
1145
  ip4_addr old = i->sysdep;
1146
  i->sysdep = ipa_to_ip4(ipa_from_sa4(&ifr.ifr_addr));
1147

    
1148
  return !ip4_equal(i->sysdep, old);
1149
}