Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / linux / netlink.c @ 48e5f32d

History | View | Annotate | Download (26.3 KB)

1 95616c82 Ondrej Zajicek
/*
2
 *        BIRD -- Linux Netlink Interface
3
 *
4
 *        (c) 1999--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8
9
#include <stdio.h>
10 f83ce94d Ondrej Zajicek
#include <unistd.h>
11 95616c82 Ondrej Zajicek
#include <fcntl.h>
12
#include <sys/socket.h>
13
#include <sys/uio.h>
14
#include <errno.h>
15
16
#undef LOCAL_DEBUG
17
18
#include "nest/bird.h"
19
#include "nest/route.h"
20
#include "nest/protocol.h"
21
#include "nest/iface.h"
22
#include "lib/alloca.h"
23
#include "lib/timer.h"
24
#include "lib/unix.h"
25
#include "lib/krt.h"
26
#include "lib/socket.h"
27
#include "lib/string.h"
28
#include "conf/conf.h"
29
30
#include <asm/types.h>
31
#include <linux/if.h>
32
#include <linux/netlink.h>
33
#include <linux/rtnetlink.h>
34
35
#ifndef MSG_TRUNC                        /* Hack: Several versions of glibc miss this one :( */
36
#define MSG_TRUNC 0x20
37
#endif
38
39
#ifndef IFF_LOWER_UP
40
#define IFF_LOWER_UP 0x10000
41
#endif
42
43
/*
44
 *        Synchronous Netlink interface
45
 */
46
47
struct nl_sock
48
{
49
  int fd;
50
  u32 seq;
51
  byte *rx_buffer;                        /* Receive buffer */
52
  struct nlmsghdr *last_hdr;                /* Recently received packet */
53
  unsigned int last_size;
54
};
55
56
#define NL_RX_SIZE 8192
57
58
static struct nl_sock nl_scan = {.fd = -1};        /* Netlink socket for synchronous scan */
59
static struct nl_sock nl_req  = {.fd = -1};        /* Netlink socket for requests */
60
61
static void
62
nl_open_sock(struct nl_sock *nl)
63
{
64
  if (nl->fd < 0)
65
    {
66
      nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
67
      if (nl->fd < 0)
68
        die("Unable to open rtnetlink socket: %m");
69
      nl->seq = now;
70
      nl->rx_buffer = xmalloc(NL_RX_SIZE);
71
      nl->last_hdr = NULL;
72
      nl->last_size = 0;
73
    }
74
}
75
76
static void
77
nl_open(void)
78
{
79
  nl_open_sock(&nl_scan);
80
  nl_open_sock(&nl_req);
81
}
82
83
static void
84
nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
85
{
86
  struct sockaddr_nl sa;
87
88
  memset(&sa, 0, sizeof(sa));
89
  sa.nl_family = AF_NETLINK;
90
  nh->nlmsg_pid = 0;
91
  nh->nlmsg_seq = ++(nl->seq);
92
  if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
93
    die("rtnetlink sendto: %m");
94
  nl->last_hdr = NULL;
95
}
96
97
static void
98
nl_request_dump(int cmd)
99
{
100
  struct {
101
    struct nlmsghdr nh;
102
    struct rtgenmsg g;
103
  } req;
104
  req.nh.nlmsg_type = cmd;
105
  req.nh.nlmsg_len = sizeof(req);
106
  req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
107
  /* Is it important which PF_* is used for link-level interface scan?
108
     It seems that some information is available only when PF_INET is used. */
109
  req.g.rtgen_family = (cmd == RTM_GETLINK) ? PF_INET : BIRD_PF;
110
  nl_send(&nl_scan, &req.nh);
111
}
112
113
static struct nlmsghdr *
114
nl_get_reply(struct nl_sock *nl)
115
{
116
  for(;;)
117
    {
118
      if (!nl->last_hdr)
119
        {
120
          struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
121
          struct sockaddr_nl sa;
122
          struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
123
          int x = recvmsg(nl->fd, &m, 0);
124
          if (x < 0)
125
            die("nl_get_reply: %m");
126
          if (sa.nl_pid)                /* It isn't from the kernel */
127
            {
128
              DBG("Non-kernel packet\n");
129
              continue;
130
            }
131
          nl->last_size = x;
132
          nl->last_hdr = (void *) nl->rx_buffer;
133
          if (m.msg_flags & MSG_TRUNC)
134
            bug("nl_get_reply: got truncated reply which should be impossible");
135
        }
136
      if (NLMSG_OK(nl->last_hdr, nl->last_size))
137
        {
138
          struct nlmsghdr *h = nl->last_hdr;
139
          nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
140
          if (h->nlmsg_seq != nl->seq)
141
            {
142
              log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
143
                  h->nlmsg_seq, nl->seq);
144
              continue;
145
            }
146
          return h;
147
        }
148
      if (nl->last_size)
149
        log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
150
      nl->last_hdr = NULL;
151
    }
152
}
153
154
static struct rate_limit rl_netlink_err;
155
156
static int
157
nl_error(struct nlmsghdr *h)
158
{
159
  struct nlmsgerr *e;
160
  int ec;
161
162
  if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
163
    {
164
      log(L_WARN "Netlink: Truncated error message received");
165
      return ENOBUFS;
166
    }
167
  e = (struct nlmsgerr *) NLMSG_DATA(h);
168
  ec = -e->error;
169
  if (ec)
170
    log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
171
  return ec;
172
}
173
174
static struct nlmsghdr *
175
nl_get_scan(void)
176
{
177
  struct nlmsghdr *h = nl_get_reply(&nl_scan);
178
179
  if (h->nlmsg_type == NLMSG_DONE)
180
    return NULL;
181
  if (h->nlmsg_type == NLMSG_ERROR)
182
    {
183
      nl_error(h);
184
      return NULL;
185
    }
186
  return h;
187
}
188
189
static int
190
nl_exchange(struct nlmsghdr *pkt)
191
{
192
  struct nlmsghdr *h;
193
194
  nl_send(&nl_req, pkt);
195
  for(;;)
196
    {
197
      h = nl_get_reply(&nl_req);
198
      if (h->nlmsg_type == NLMSG_ERROR)
199
        break;
200
      log(L_WARN "nl_exchange: Unexpected reply received");
201
    }
202
  return nl_error(h) ? -1 : 0;
203
}
204
205
/*
206
 *        Netlink attributes
207
 */
208
209
static int nl_attr_len;
210
211
static void *
212
nl_checkin(struct nlmsghdr *h, int lsize)
213
{
214
  nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
215
  if (nl_attr_len < 0)
216
    {
217
      log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
218
      return NULL;
219
    }
220
  return NLMSG_DATA(h);
221
}
222
223
static int
224
nl_parse_attrs(struct rtattr *a, struct rtattr **k, int ksize)
225
{
226
  int max = ksize / sizeof(struct rtattr *);
227
  bzero(k, ksize);
228
  while (RTA_OK(a, nl_attr_len))
229
    {
230
      if (a->rta_type < max)
231
        k[a->rta_type] = a;
232
      a = RTA_NEXT(a, nl_attr_len);
233
    }
234
  if (nl_attr_len)
235
    {
236
      log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
237
      return 0;
238
    }
239
  else
240
    return 1;
241
}
242
243
void
244
nl_add_attr(struct nlmsghdr *h, unsigned bufsize, unsigned code,
245
            void *data, unsigned dlen)
246
{
247
  unsigned len = RTA_LENGTH(dlen);
248
  unsigned pos = NLMSG_ALIGN(h->nlmsg_len);
249
  struct rtattr *a;
250
251
  if (pos + len > bufsize)
252
    bug("nl_add_attr: packet buffer overflow");
253
  a = (struct rtattr *)((char *)h + pos);
254
  a->rta_type = code;
255
  a->rta_len = len;
256
  h->nlmsg_len = pos + len;
257
  memcpy(RTA_DATA(a), data, dlen);
258
}
259
260
static inline void
261
nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data)
262
{
263
  nl_add_attr(h, bufsize, code, &data, 4);
264
}
265
266
static inline void
267
nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa)
268
{
269
  ipa_hton(ipa);
270
  nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa));
271
}
272
273
#define RTNH_SIZE (sizeof(struct rtnexthop) + sizeof(struct rtattr) + sizeof(ip_addr))
274
275
static inline void
276
add_mpnexthop(char *buf, ip_addr ipa, unsigned iface, unsigned char weight)
277
{
278
  struct rtnexthop *nh = (void *) buf;
279
  struct rtattr *rt = (void *) (buf + sizeof(*nh));
280
  nh->rtnh_len = RTNH_SIZE;
281
  nh->rtnh_flags = 0;
282
  nh->rtnh_hops = weight;
283
  nh->rtnh_ifindex = iface;
284
  rt->rta_len = sizeof(*rt) + sizeof(ipa);
285
  rt->rta_type = RTA_GATEWAY;
286
  ipa_hton(ipa);
287
  memcpy(buf + sizeof(*nh) + sizeof(*rt), &ipa, sizeof(ipa));
288
}
289
290
291
static void
292
nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
293
{
294
  unsigned len = sizeof(struct rtattr);
295
  unsigned pos = NLMSG_ALIGN(h->nlmsg_len);
296
  char *buf = (char *)h + pos;
297
  struct rtattr *rt = (void *) buf;
298
  buf += len;
299
  
300
  for (; nh; nh = nh->next)
301
    {
302
      len += RTNH_SIZE;
303
      if (pos + len > bufsize)
304
        bug("nl_add_multipath: packet buffer overflow");
305
306
      add_mpnexthop(buf, nh->gw, nh->iface->index, nh->weight);
307
      buf += RTNH_SIZE;
308
    }
309
310
  rt->rta_type = RTA_MULTIPATH;
311
  rt->rta_len = len;
312
  h->nlmsg_len = pos + len;
313
}
314
315
316
static struct mpnh *
317
nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
318
{
319
  /* Temporary buffer for multicast nexthops */
320
  static struct mpnh *nh_buffer;
321
  static int nh_buf_size;        /* in number of structures */
322
  static int nh_buf_used;
323
324
  struct rtattr *a[RTA_CACHEINFO+1];
325
  struct rtnexthop *nh = RTA_DATA(ra);
326
  struct mpnh *rv, *first, **last;
327
  int len = RTA_PAYLOAD(ra);
328
329
  first = NULL;
330
  last = &first;
331
  nh_buf_used = 0;
332
333
  while (len)
334
    {
335
      /* Use RTNH_OK(nh,len) ?? */
336
      if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
337
        return NULL;
338
339
      if (nh_buf_used == nh_buf_size)
340
      {
341
        nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
342
        nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh));
343
      }
344
      *last = rv = nh_buffer + nh_buf_used++;
345
      rv->next = NULL;
346
      last = &(rv->next);
347
348
      rv->weight = nh->rtnh_hops;
349
      rv->iface = if_find_by_index(nh->rtnh_ifindex);
350
      if (!rv->iface)
351
        return NULL;
352
353
      /* Nonexistent RTNH_PAYLOAD ?? */
354
      nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
355
      nl_parse_attrs(RTNH_DATA(nh), a, sizeof(a));
356
      if (a[RTA_GATEWAY])
357
        {
358
          if (RTA_PAYLOAD(a[RTA_GATEWAY]) != sizeof(ip_addr))
359
            return NULL;
360
361
          memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ip_addr));
362
          ipa_ntoh(rv->gw);
363
364
          neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
365
                                     (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
366
          if (!ng || (ng->scope == SCOPE_HOST))
367
            return NULL;
368
        }
369
      else
370
        return NULL;
371
372
      len -= NLMSG_ALIGN(nh->rtnh_len);
373
      nh = RTNH_NEXT(nh);
374
    }
375
376
  return first;
377
}
378
379
380
/*
381
 *        Scanning of interfaces
382
 */
383
384
static void
385
nl_parse_link(struct nlmsghdr *h, int scan)
386
{
387
  struct ifinfomsg *i;
388
  struct rtattr *a[IFLA_WIRELESS+1];
389
  int new = h->nlmsg_type == RTM_NEWLINK;
390
  struct iface f = {};
391
  struct iface *ifi;
392
  char *name;
393
  u32 mtu;
394
  unsigned int fl;
395
396
  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), a, sizeof(a)))
397
    return;
398
  if (!a[IFLA_IFNAME] || RTA_PAYLOAD(a[IFLA_IFNAME]) < 2 ||
399
      !a[IFLA_MTU] || RTA_PAYLOAD(a[IFLA_MTU]) != 4)
400
    {
401
      if (scan || !a[IFLA_WIRELESS])
402
        log(L_ERR "nl_parse_link: Malformed message received");
403
      return;
404
    }
405
  name = RTA_DATA(a[IFLA_IFNAME]);
406
  memcpy(&mtu, RTA_DATA(a[IFLA_MTU]), sizeof(u32));
407
408
  ifi = if_find_by_index(i->ifi_index);
409
  if (!new)
410
    {
411
      DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
412
      if (!ifi)
413
        return;
414
415
      if_delete(ifi);
416
    }
417
  else
418
    {
419
      DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
420
      if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
421
        if_delete(ifi);
422
423
      strncpy(f.name, name, sizeof(f.name)-1);
424
      f.index = i->ifi_index;
425
      f.mtu = mtu;
426
427
      fl = i->ifi_flags;
428
      if (fl & IFF_UP)
429
        f.flags |= IF_ADMIN_UP;
430
      if (fl & IFF_LOWER_UP)
431
        f.flags |= IF_LINK_UP;
432
      if (fl & IFF_LOOPBACK)                /* Loopback */
433
        f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
434
      else if (fl & IFF_POINTOPOINT)        /* PtP */
435
        f.flags |= IF_MULTICAST;
436
      else if (fl & IFF_BROADCAST)        /* Broadcast */
437
        f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
438
      else
439
        f.flags |= IF_MULTIACCESS;        /* NBMA */
440
      if_update(&f);
441
    }
442
}
443
444
static void
445
nl_parse_addr(struct nlmsghdr *h)
446
{
447
  struct ifaddrmsg *i;
448
  struct rtattr *a[IFA_ANYCAST+1];
449
  int new = h->nlmsg_type == RTM_NEWADDR;
450
  struct ifa ifa;
451
  struct iface *ifi;
452
  int scope;
453
454
  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFA_RTA(i), a, sizeof(a)))
455
    return;
456
  if (i->ifa_family != BIRD_AF)
457
    return;
458
  if (!a[IFA_ADDRESS] || RTA_PAYLOAD(a[IFA_ADDRESS]) != sizeof(ip_addr)
459
#ifdef IPV6
460
      || a[IFA_LOCAL] && RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr)
461
#else
462
      || !a[IFA_LOCAL] || RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr)
463
      || (a[IFA_BROADCAST] && RTA_PAYLOAD(a[IFA_BROADCAST]) != sizeof(ip_addr))
464
#endif
465
      )
466
    {
467
      log(L_ERR "nl_parse_addr: Malformed message received");
468
      return;
469
    }
470
471
  ifi = if_find_by_index(i->ifa_index);
472
  if (!ifi)
473
    {
474
      log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
475
      return;
476
    }
477
478
  bzero(&ifa, sizeof(ifa));
479
  ifa.iface = ifi;
480
  if (i->ifa_flags & IFA_F_SECONDARY)
481
    ifa.flags |= IA_SECONDARY;
482
483
  /* IFA_LOCAL can be unset for IPv6 interfaces */
484
  memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
485
  ipa_ntoh(ifa.ip);
486
  ifa.pxlen = i->ifa_prefixlen;
487
  if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
488
    {
489
      log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
490
      new = 0;
491
    }
492
  if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
493
    {
494
      ip_addr addr;
495
      memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
496
      ipa_ntoh(addr);
497
      ifa.prefix = ifa.brd = addr;
498
499
      /* It is either a host address or a peer address */
500
      if (ipa_equal(ifa.ip, addr))
501
        ifa.flags |= IA_HOST;
502
      else
503
        {
504
          ifa.flags |= IA_PEER;
505
          ifa.opposite = addr;
506
        }
507
    }
508
  else
509
    {
510
      ip_addr netmask = ipa_mkmask(ifa.pxlen);
511
      ifa.prefix = ipa_and(ifa.ip, netmask);
512
      ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
513
      if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
514
        ifa.opposite = ipa_opposite_m1(ifa.ip);
515
516
#ifndef IPV6
517
      if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
518
        ifa.opposite = ipa_opposite_m2(ifa.ip);
519
520
      if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
521
        {
522
          ip_addr xbrd;
523
          memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
524
          ipa_ntoh(xbrd);
525
          if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
526
            ifa.brd = xbrd;
527
          else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
528
            log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
529
        }
530
#endif
531
    }
532
533
  scope = ipa_classify(ifa.ip);
534
  if (scope < 0)
535
    {
536
      log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
537
      return;
538
    }
539
  ifa.scope = scope & IADDR_SCOPE_MASK;
540
541
  DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
542
      ifi->index, ifi->name,
543
      new ? "added" : "removed",
544
      ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
545
  if (new)
546
    ifa_update(&ifa);
547
  else
548
    ifa_delete(&ifa);
549
}
550
551
void
552
kif_do_scan(struct kif_proto *p UNUSED)
553
{
554
  struct nlmsghdr *h;
555
556
  if_start_update();
557
558
  nl_request_dump(RTM_GETLINK);
559
  while (h = nl_get_scan())
560
    if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
561
      nl_parse_link(h, 1);
562
    else
563
      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
564
565
  nl_request_dump(RTM_GETADDR);
566
  while (h = nl_get_scan())
567
    if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
568
      nl_parse_addr(h);
569
    else
570
      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
571
572
  if_end_update();
573
}
574
575
/*
576
 *        Routes
577
 */
578
579
static struct krt_proto *nl_table_map[NL_NUM_TABLES];
580
581
int
582
krt_capable(rte *e)
583
{
584
  rta *a = e->attrs;
585
586
  if (a->cast != RTC_UNICAST)
587
    return 0;
588
589
  switch (a->dest)
590
    {
591
    case RTD_ROUTER:
592
    case RTD_DEVICE:
593
      if (a->iface == NULL)
594
        return 0;
595
    case RTD_BLACKHOLE:
596
    case RTD_UNREACHABLE:
597
    case RTD_PROHIBIT:
598
    case RTD_MULTIPATH:
599
      break;
600
    default:
601
      return 0;
602
    }
603
  return 1;
604
}
605
606
static inline int
607
nh_bufsize(struct mpnh *nh)
608
{
609
  int rv = 0;
610
  for (; nh != NULL; nh = nh->next)
611
    rv += RTNH_SIZE;
612
  return rv;
613
}
614
615
static int
616
nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
617
{
618
  eattr *ea;
619
  net *net = e->net;
620
  rta *a = e->attrs;
621
  struct {
622
    struct nlmsghdr h;
623
    struct rtmsg r;
624
    char buf[128 + nh_bufsize(a->nexthops)];
625
  } r;
626
627
  DBG("nl_send_route(%I/%d,new=%d)\n", net->n.prefix, net->n.pxlen, new);
628
629
  bzero(&r.h, sizeof(r.h));
630
  bzero(&r.r, sizeof(r.r));
631
  r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
632
  r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
633
  r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0);
634
635
  r.r.rtm_family = BIRD_AF;
636
  r.r.rtm_dst_len = net->n.pxlen;
637
  r.r.rtm_tos = 0;
638
  r.r.rtm_table = KRT_CF->sys.table_id;
639
  r.r.rtm_protocol = RTPROT_BIRD;
640
  r.r.rtm_scope = RT_SCOPE_UNIVERSE;
641
  nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
642
643
  u32 metric = 0;
644
  if (new && e->attrs->source == RTS_INHERIT)
645
    metric = e->u.krt.metric;
646
  if (ea = ea_find(eattrs, EA_KRT_METRIC))
647
    metric = ea->u.data;
648
  if (metric != 0)
649
    nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, metric);
650
651
  if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
652
    nl_add_attr_ipa(&r.h, sizeof(r), RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
653
654
  if (ea = ea_find(eattrs, EA_KRT_REALM))
655
    nl_add_attr_u32(&r.h, sizeof(r), RTA_FLOW, ea->u.data);
656
657
  /* a->iface != NULL checked in krt_capable() for router and device routes */
658
659
  switch (a->dest)
660
    {
661
    case RTD_ROUTER:
662
      r.r.rtm_type = RTN_UNICAST;
663
      nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
664
      nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw);
665
      break;
666
    case RTD_DEVICE:
667
      r.r.rtm_type = RTN_UNICAST;
668
      nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
669
      break;
670
    case RTD_BLACKHOLE:
671
      r.r.rtm_type = RTN_BLACKHOLE;
672
      break;
673
    case RTD_UNREACHABLE:
674
      r.r.rtm_type = RTN_UNREACHABLE;
675
      break;
676
    case RTD_PROHIBIT:
677
      r.r.rtm_type = RTN_PROHIBIT;
678
      break;
679
    case RTD_MULTIPATH:
680
      r.r.rtm_type = RTN_UNICAST;
681
      nl_add_multipath(&r.h, sizeof(r), a->nexthops);
682
      break;
683
    default:
684
      bug("krt_capable inconsistent with nl_send_route");
685
    }
686
687
  return nl_exchange(&r.h);
688
}
689
690
void
691
krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
692
{
693
  int err = 0;
694
695
  /*
696
   * NULL for eattr of the old route is a little hack, but we don't
697
   * get proper eattrs for old in rt_notify() anyway. NULL means no
698
   * extended route attributes and therefore matches if the kernel
699
   * route has any of them.
700
   */
701
702
  if (old)
703
    nl_send_route(p, old, NULL, 0);
704
705
  if (new)
706
    err = nl_send_route(p, new, eattrs, 1);
707
708
  if (err < 0)
709
    n->n.flags |= KRF_SYNC_ERROR;
710
  else
711
    n->n.flags &= ~KRF_SYNC_ERROR;
712
}
713
714
715
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
716
717
static void
718
nl_parse_route(struct nlmsghdr *h, int scan)
719
{
720
  struct krt_proto *p;
721
  struct rtmsg *i;
722
  struct rtattr *a[RTA_CACHEINFO+1];
723
  int new = h->nlmsg_type == RTM_NEWROUTE;
724
725
  ip_addr dst = IPA_NONE;
726
  u32 oif = ~0;
727
  int src;
728
729
  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(RTM_RTA(i), a, sizeof(a)))
730
    return;
731
  if (i->rtm_family != BIRD_AF)
732
    return;
733
  if ((a[RTA_DST] && RTA_PAYLOAD(a[RTA_DST]) != sizeof(ip_addr)) ||
734
#ifdef IPV6
735
      (a[RTA_IIF] && RTA_PAYLOAD(a[RTA_IIF]) != 4) ||
736
#endif
737
      (a[RTA_OIF] && RTA_PAYLOAD(a[RTA_OIF]) != 4) ||
738
      (a[RTA_GATEWAY] && RTA_PAYLOAD(a[RTA_GATEWAY]) != sizeof(ip_addr)) ||
739
      (a[RTA_PRIORITY] && RTA_PAYLOAD(a[RTA_PRIORITY]) != 4) ||
740
      (a[RTA_PREFSRC] && RTA_PAYLOAD(a[RTA_PREFSRC]) != sizeof(ip_addr)) ||
741 c06de722 Ondrej Zajicek
      (a[RTA_FLOW] && RTA_PAYLOAD(a[RTA_FLOW]) != 4))
742 95616c82 Ondrej Zajicek
    {
743
      log(L_ERR "KRT: Malformed message received");
744
      return;
745
    }
746
747
  if (a[RTA_DST])
748
    {
749
      memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
750
      ipa_ntoh(dst);
751
    }
752
753
  if (a[RTA_OIF])
754
    memcpy(&oif, RTA_DATA(a[RTA_OIF]), sizeof(oif));
755
756
  p = nl_table_map[i->rtm_table];        /* Do we know this table? */
757
  DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, i->rtm_table, i->rtm_protocol, p ? p->p.name : "(none)");
758
  if (!p)
759
    SKIP("unknown table %d\n", i->rtm_table);
760
761
762
#ifdef IPV6
763
  if (a[RTA_IIF])
764
    SKIP("IIF set\n");
765
#else
766
  if (i->rtm_tos != 0)                        /* We don't support TOS */
767
    SKIP("TOS %02x\n", i->rtm_tos);
768
#endif
769
770
  if (scan && !new)
771
    SKIP("RTM_DELROUTE in scan\n");
772
773
  int c = ipa_classify_net(dst);
774
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
775
    SKIP("strange class/scope\n");
776
777
  // ignore rtm_scope, it is not a real scope
778
  // if (i->rtm_scope != RT_SCOPE_UNIVERSE)
779
  //   SKIP("scope %u\n", i->rtm_scope);
780
781
  switch (i->rtm_protocol)
782
    {
783
    case RTPROT_UNSPEC:
784
      SKIP("proto unspec\n");
785
786
    case RTPROT_REDIRECT:
787
      src = KRT_SRC_REDIRECT;
788
      break;
789
790
    case RTPROT_KERNEL:
791
      src = KRT_SRC_KERNEL;
792
      return;
793
794
    case RTPROT_BIRD:
795
      if (!scan)
796
        SKIP("echo\n");
797
      src = KRT_SRC_BIRD;
798
      break;
799
800
    case RTPROT_BOOT:
801
    default:
802
      src = KRT_SRC_ALIEN;
803
    }
804
805
  net *net = net_get(p->p.table, dst, i->rtm_dst_len);
806
807
  rta ra = {
808 094d2bdb Ondrej Zajicek
    .src= p->p.main_source,
809 95616c82 Ondrej Zajicek
    .source = RTS_INHERIT,
810
    .scope = SCOPE_UNIVERSE,
811
    .cast = RTC_UNICAST
812
  };
813
814
  switch (i->rtm_type)
815
    {
816
    case RTN_UNICAST:
817
818
      if (a[RTA_MULTIPATH])
819
        {
820
          ra.dest = RTD_MULTIPATH;
821
          ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
822
          if (!ra.nexthops)
823
            {
824
              log(L_ERR "KRT: Received strange multipath route %I/%d",
825
                  net->n.prefix, net->n.pxlen);
826
              return;
827
            }
828
            
829
          break;
830
        }
831
832
      ra.iface = if_find_by_index(oif);
833
      if (!ra.iface)
834
        {
835
          log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
836
              net->n.prefix, net->n.pxlen, oif);
837
          return;
838
        }
839
840
      if (a[RTA_GATEWAY])
841
        {
842
          neighbor *ng;
843
          ra.dest = RTD_ROUTER;
844
          memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
845
          ipa_ntoh(ra.gw);
846
847 9810d055 Ondrej Zajicek
#ifdef IPV6
848 95616c82 Ondrej Zajicek
          /* Silently skip strange 6to4 routes */
849
          if (ipa_in_net(ra.gw, IPA_NONE, 96))
850
            return;
851 9810d055 Ondrej Zajicek
#endif
852 95616c82 Ondrej Zajicek
853
          ng = neigh_find2(&p->p, &ra.gw, ra.iface,
854
                           (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
855
          if (!ng || (ng->scope == SCOPE_HOST))
856
            {
857
              log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
858
                  net->n.prefix, net->n.pxlen, ra.gw);
859
              return;
860
            }
861
        }
862
      else
863
        {
864
          ra.dest = RTD_DEVICE;
865
        }
866
867
      break;
868
    case RTN_BLACKHOLE:
869
      ra.dest = RTD_BLACKHOLE;
870
      break;
871
    case RTN_UNREACHABLE:
872
      ra.dest = RTD_UNREACHABLE;
873
      break;
874
    case RTN_PROHIBIT:
875
      ra.dest = RTD_PROHIBIT;
876
      break;
877
    /* FIXME: What about RTN_THROW? */
878
    default:
879
      SKIP("type %d\n", i->rtm_type);
880
      return;
881
    }
882
883
  rte *e = rte_get_temp(&ra);
884
  e->net = net;
885
  e->u.krt.src = src;
886
  e->u.krt.proto = i->rtm_protocol;
887
  e->u.krt.type = i->rtm_type;
888
889
  if (a[RTA_PRIORITY])
890
    memcpy(&e->u.krt.metric, RTA_DATA(a[RTA_PRIORITY]), sizeof(e->u.krt.metric)); 
891
  else
892
    e->u.krt.metric = 0;
893
894
  if (a[RTA_PREFSRC])
895
    {
896
      ip_addr ps;
897
      memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
898
      ipa_ntoh(ps);
899
900
      ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
901
      ea->next = ra.eattrs;
902
      ra.eattrs = ea;
903
      ea->flags = EALF_SORTED;
904
      ea->count = 1;
905
      ea->attrs[0].id = EA_KRT_PREFSRC;
906
      ea->attrs[0].flags = 0;
907
      ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
908
      ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(ps));
909
      ea->attrs[0].u.ptr->length = sizeof(ps);
910
      memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
911
    }
912
913
  if (a[RTA_FLOW])
914
    {
915
      ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
916
      ea->next = ra.eattrs;
917
      ra.eattrs = ea;
918
      ea->flags = EALF_SORTED;
919
      ea->count = 1;
920
      ea->attrs[0].id = EA_KRT_REALM;
921
      ea->attrs[0].flags = 0;
922
      ea->attrs[0].type = EAF_TYPE_INT;
923
      memcpy(&ea->attrs[0].u.data, RTA_DATA(a[RTA_FLOW]), 4);
924
    }
925
926
  if (scan)
927
    krt_got_route(p, e);
928
  else
929
    krt_got_route_async(p, e, new);
930
}
931
932
void
933
krt_do_scan(struct krt_proto *p UNUSED)        /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
934
{
935
  struct nlmsghdr *h;
936
937
  nl_request_dump(RTM_GETROUTE);
938
  while (h = nl_get_scan())
939
    if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
940
      nl_parse_route(h, 1);
941
    else
942
      log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
943
}
944
945
/*
946
 *        Asynchronous Netlink interface
947
 */
948
949
static sock *nl_async_sk;                /* BIRD socket for asynchronous notifications */
950
static byte *nl_async_rx_buffer;        /* Receive buffer */
951
952
static void
953
nl_async_msg(struct nlmsghdr *h)
954
{
955
  switch (h->nlmsg_type)
956
    {
957
    case RTM_NEWROUTE:
958
    case RTM_DELROUTE:
959
      DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
960
      nl_parse_route(h, 0);
961
      break;
962
    case RTM_NEWLINK:
963
    case RTM_DELLINK:
964
      DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
965
      nl_parse_link(h, 0);
966
      break;
967
    case RTM_NEWADDR:
968
    case RTM_DELADDR:
969
      DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
970
      nl_parse_addr(h);
971
      break;
972
    default:
973
      DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
974
    }
975
}
976
977
static int
978
nl_async_hook(sock *sk, int size UNUSED)
979
{
980
  struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
981
  struct sockaddr_nl sa;
982
  struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
983
  struct nlmsghdr *h;
984
  int x;
985
  unsigned int len;
986
987
  x = recvmsg(sk->fd, &m, 0);
988
  if (x < 0)
989
    {
990
      if (errno == ENOBUFS)
991
        {
992
          /*
993
           *  Netlink reports some packets have been thrown away.
994
           *  One day we might react to it by asking for route table
995
           *  scan in near future.
996
           */
997
          return 1;        /* More data are likely to be ready */
998
        }
999
      else if (errno != EWOULDBLOCK)
1000
        log(L_ERR "Netlink recvmsg: %m");
1001
      return 0;
1002
    }
1003
  if (sa.nl_pid)                /* It isn't from the kernel */
1004
    {
1005
      DBG("Non-kernel packet\n");
1006
      return 1;
1007
    }
1008
  h = (void *) nl_async_rx_buffer;
1009
  len = x;
1010
  if (m.msg_flags & MSG_TRUNC)
1011
    {
1012
      log(L_WARN "Netlink got truncated asynchronous message");
1013
      return 1;
1014
    }
1015
  while (NLMSG_OK(h, len))
1016
    {
1017
      nl_async_msg(h);
1018
      h = NLMSG_NEXT(h, len);
1019
    }
1020
  if (len)
1021
    log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1022
  return 1;
1023
}
1024
1025
static void
1026
nl_open_async(void)
1027
{
1028
  sock *sk;
1029
  struct sockaddr_nl sa;
1030
  int fd;
1031
1032 f83ce94d Ondrej Zajicek
  if (nl_async_sk)
1033 95616c82 Ondrej Zajicek
    return;
1034
1035
  DBG("KRT: Opening async netlink socket\n");
1036
1037
  fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1038
  if (fd < 0)
1039
    {
1040
      log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1041
      return;
1042
    }
1043
1044
  bzero(&sa, sizeof(sa));
1045
  sa.nl_family = AF_NETLINK;
1046
#ifdef IPV6
1047
  sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1048
#else
1049
  sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
1050
#endif
1051
  if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1052
    {
1053
      log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
1054 f83ce94d Ondrej Zajicek
      close(fd);
1055 95616c82 Ondrej Zajicek
      return;
1056
    }
1057
1058 f83ce94d Ondrej Zajicek
  nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1059
1060 95616c82 Ondrej Zajicek
  sk = nl_async_sk = sk_new(krt_pool);
1061
  sk->type = SK_MAGIC;
1062
  sk->rx_hook = nl_async_hook;
1063
  sk->fd = fd;
1064
  if (sk_open(sk))
1065
    bug("Netlink: sk_open failed");
1066
}
1067
1068
/*
1069
 *        Interface to the UNIX krt module
1070
 */
1071
1072
static u8 nl_cf_table[(NL_NUM_TABLES+7) / 8];
1073
1074
void
1075 c6964c30 Ondrej Zajicek
krt_sys_start(struct krt_proto *p)
1076 95616c82 Ondrej Zajicek
{
1077
  nl_table_map[KRT_CF->sys.table_id] = p;
1078 c6964c30 Ondrej Zajicek
1079
  nl_open();
1080
  nl_open_async();
1081 95616c82 Ondrej Zajicek
}
1082
1083
void
1084 c6964c30 Ondrej Zajicek
krt_sys_shutdown(struct krt_proto *p UNUSED)
1085 95616c82 Ondrej Zajicek
{
1086 f83ce94d Ondrej Zajicek
  nl_table_map[KRT_CF->sys.table_id] = NULL;
1087 95616c82 Ondrej Zajicek
}
1088
1089
int
1090
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
1091
{
1092
  return n->sys.table_id == o->sys.table_id;
1093
}
1094
1095
1096
void
1097
krt_sys_preconfig(struct config *c UNUSED)
1098
{
1099
  bzero(&nl_cf_table, sizeof(nl_cf_table));
1100
}
1101
1102
void
1103
krt_sys_postconfig(struct krt_config *x)
1104
{
1105
  int id = x->sys.table_id;
1106
1107
  if (nl_cf_table[id/8] & (1 << (id%8)))
1108
    cf_error("Multiple kernel syncers defined for table #%d", id);
1109
  nl_cf_table[id/8] |= (1 << (id%8));
1110
}
1111
1112
void
1113
krt_sys_init_config(struct krt_config *cf)
1114
{
1115
  cf->sys.table_id = RT_TABLE_MAIN;
1116
}
1117
1118
void
1119
krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
1120
{
1121
  d->sys.table_id = s->sys.table_id;
1122
}
1123
1124
1125
1126
void
1127
kif_sys_start(struct kif_proto *p UNUSED)
1128
{
1129
  nl_open();
1130
  nl_open_async();
1131
}
1132
1133
void
1134
kif_sys_shutdown(struct kif_proto *p UNUSED)
1135
{
1136
}