Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / linux / netlink / netlink.c @ 83696b39

History | View | Annotate | Download (24.8 KB)

1
/*
2
 *        BIRD -- Linux Netlink Interface
3
 *
4
 *        (c) 1999--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#include <stdio.h>
10
#include <fcntl.h>
11
#include <sys/socket.h>
12
#include <sys/uio.h>
13
#include <errno.h>
14

    
15
#undef LOCAL_DEBUG
16

    
17
#include "nest/bird.h"
18
#include "nest/route.h"
19
#include "nest/protocol.h"
20
#include "nest/iface.h"
21
#include "lib/timer.h"
22
#include "lib/unix.h"
23
#include "lib/krt.h"
24
#include "lib/socket.h"
25
#include "lib/string.h"
26
#include "conf/conf.h"
27

    
28
#include <asm/types.h>
29
#include <linux/if.h>
30
#include <linux/netlink.h>
31
#include <linux/rtnetlink.h>
32

    
33
#ifndef MSG_TRUNC                        /* Hack: Several versions of glibc miss this one :( */
34
#define MSG_TRUNC 0x20
35
#endif
36

    
37
#ifndef IFF_LOWER_UP
38
#define IFF_LOWER_UP 0x10000
39
#endif
40

    
41
/*
42
 *        Synchronous Netlink interface
43
 */
44

    
45
struct nl_sock
46
{
47
  int fd;
48
  u32 seq;
49
  byte *rx_buffer;                        /* Receive buffer */
50
  struct nlmsghdr *last_hdr;                /* Recently received packet */
51
  unsigned int last_size;
52
};
53

    
54
#define NL_RX_SIZE 8192
55

    
56
static struct nl_sock nl_scan = {.fd = -1};        /* Netlink socket for synchronous scan */
57
static struct nl_sock nl_req  = {.fd = -1};        /* Netlink socket for requests */
58

    
59
static void
60
nl_open_sock(struct nl_sock *nl)
61
{
62
  if (nl->fd < 0)
63
    {
64
      nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
65
      if (nl->fd < 0)
66
        die("Unable to open rtnetlink socket: %m");
67
      nl->seq = now;
68
      nl->rx_buffer = xmalloc(NL_RX_SIZE);
69
      nl->last_hdr = NULL;
70
      nl->last_size = 0;
71
    }
72
}
73

    
74
static void
75
nl_open(void)
76
{
77
  nl_open_sock(&nl_scan);
78
  nl_open_sock(&nl_req);
79
}
80

    
81
static void
82
nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
83
{
84
  struct sockaddr_nl sa;
85

    
86
  memset(&sa, 0, sizeof(sa));
87
  sa.nl_family = AF_NETLINK;
88
  nh->nlmsg_pid = 0;
89
  nh->nlmsg_seq = ++(nl->seq);
90
  if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
91
    die("rtnetlink sendto: %m");
92
  nl->last_hdr = NULL;
93
}
94

    
95
static void
96
nl_request_dump(int cmd)
97
{
98
  struct {
99
    struct nlmsghdr nh;
100
    struct rtgenmsg g;
101
  } req;
102
  req.nh.nlmsg_type = cmd;
103
  req.nh.nlmsg_len = sizeof(req);
104
  req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
105
  /* Is it important which PF_* is used for link-level interface scan?
106
     It seems that some information is available only when PF_INET is used. */
107
  req.g.rtgen_family = (cmd == RTM_GETLINK) ? PF_INET : BIRD_PF;
108
  nl_send(&nl_scan, &req.nh);
109
}
110

    
111
static struct nlmsghdr *
112
nl_get_reply(struct nl_sock *nl)
113
{
114
  for(;;)
115
    {
116
      if (!nl->last_hdr)
117
        {
118
          struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
119
          struct sockaddr_nl sa;
120
          struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
121
          int x = recvmsg(nl->fd, &m, 0);
122
          if (x < 0)
123
            die("nl_get_reply: %m");
124
          if (sa.nl_pid)                /* It isn't from the kernel */
125
            {
126
              DBG("Non-kernel packet\n");
127
              continue;
128
            }
129
          nl->last_size = x;
130
          nl->last_hdr = (void *) nl->rx_buffer;
131
          if (m.msg_flags & MSG_TRUNC)
132
            bug("nl_get_reply: got truncated reply which should be impossible");
133
        }
134
      if (NLMSG_OK(nl->last_hdr, nl->last_size))
135
        {
136
          struct nlmsghdr *h = nl->last_hdr;
137
          nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
138
          if (h->nlmsg_seq != nl->seq)
139
            {
140
              log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
141
                  h->nlmsg_seq, nl->seq);
142
              continue;
143
            }
144
          return h;
145
        }
146
      if (nl->last_size)
147
        log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
148
      nl->last_hdr = NULL;
149
    }
150
}
151

    
152
static struct rate_limit rl_netlink_err;
153

    
154
static int
155
nl_error(struct nlmsghdr *h)
156
{
157
  struct nlmsgerr *e;
158
  int ec;
159

    
160
  if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
161
    {
162
      log(L_WARN "Netlink: Truncated error message received");
163
      return ENOBUFS;
164
    }
165
  e = (struct nlmsgerr *) NLMSG_DATA(h);
166
  ec = -e->error;
167
  if (ec)
168
    log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
169
  return ec;
170
}
171

    
172
static struct nlmsghdr *
173
nl_get_scan(void)
174
{
175
  struct nlmsghdr *h = nl_get_reply(&nl_scan);
176

    
177
  if (h->nlmsg_type == NLMSG_DONE)
178
    return NULL;
179
  if (h->nlmsg_type == NLMSG_ERROR)
180
    {
181
      nl_error(h);
182
      return NULL;
183
    }
184
  return h;
185
}
186

    
187
static int
188
nl_exchange(struct nlmsghdr *pkt)
189
{
190
  struct nlmsghdr *h;
191

    
192
  nl_send(&nl_req, pkt);
193
  for(;;)
194
    {
195
      h = nl_get_reply(&nl_req);
196
      if (h->nlmsg_type == NLMSG_ERROR)
197
        break;
198
      log(L_WARN "nl_exchange: Unexpected reply received");
199
    }
200
  return nl_error(h);
201
}
202

    
203
/*
204
 *        Netlink attributes
205
 */
206

    
207
static int nl_attr_len;
208

    
209
static void *
210
nl_checkin(struct nlmsghdr *h, int lsize)
211
{
212
  nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
213
  if (nl_attr_len < 0)
214
    {
215
      log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
216
      return NULL;
217
    }
218
  return NLMSG_DATA(h);
219
}
220

    
221
static int
222
nl_parse_attrs(struct rtattr *a, struct rtattr **k, int ksize)
223
{
224
  int max = ksize / sizeof(struct rtattr *);
225
  bzero(k, ksize);
226
  while (RTA_OK(a, nl_attr_len))
227
    {
228
      if (a->rta_type < max)
229
        k[a->rta_type] = a;
230
      a = RTA_NEXT(a, nl_attr_len);
231
    }
232
  if (nl_attr_len)
233
    {
234
      log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
235
      return 0;
236
    }
237
  else
238
    return 1;
239
}
240

    
241
void
242
nl_add_attr(struct nlmsghdr *h, unsigned bufsize, unsigned code,
243
            void *data, unsigned dlen)
244
{
245
  unsigned len = RTA_LENGTH(dlen);
246
  unsigned pos = NLMSG_ALIGN(h->nlmsg_len);
247
  struct rtattr *a;
248

    
249
  if (pos + len > bufsize)
250
    bug("nl_add_attr: packet buffer overflow");
251
  a = (struct rtattr *)((char *)h + pos);
252
  a->rta_type = code;
253
  a->rta_len = len;
254
  h->nlmsg_len = pos + len;
255
  memcpy(RTA_DATA(a), data, dlen);
256
}
257

    
258
static inline void
259
nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data)
260
{
261
  nl_add_attr(h, bufsize, code, &data, 4);
262
}
263

    
264
static inline void
265
nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa)
266
{
267
  ipa_hton(ipa);
268
  nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa));
269
}
270

    
271
#define RTNH_SIZE (sizeof(struct rtnexthop) + sizeof(struct rtattr) + sizeof(ip_addr))
272

    
273
static inline void
274
add_mpnexthop(char *buf, ip_addr ipa, unsigned iface, unsigned char weight)
275
{
276
  struct rtnexthop *nh = (void *) buf;
277
  struct rtattr *rt = (void *) (buf + sizeof(*nh));
278
  nh->rtnh_len = RTNH_SIZE;
279
  nh->rtnh_flags = 0;
280
  nh->rtnh_hops = weight;
281
  nh->rtnh_ifindex = iface;
282
  rt->rta_len = sizeof(*rt) + sizeof(ipa);
283
  rt->rta_type = RTA_GATEWAY;
284
  ipa_hton(ipa);
285
  memcpy(buf + sizeof(*nh) + sizeof(*rt), &ipa, sizeof(ipa));
286
}
287

    
288

    
289
static void
290
nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
291
{
292
  unsigned len = sizeof(struct rtattr);
293
  unsigned pos = NLMSG_ALIGN(h->nlmsg_len);
294
  char *buf = (char *)h + pos;
295
  struct rtattr *rt = (void *) buf;
296
  buf += len;
297
  
298
  for (; nh; nh = nh->next)
299
    {
300
      len += RTNH_SIZE;
301
      if (pos + len > bufsize)
302
        bug("nl_add_multipath: packet buffer overflow");
303

    
304
      add_mpnexthop(buf, nh->gw, nh->iface->index, nh->weight);
305
      buf += RTNH_SIZE;
306
    }
307

    
308
  rt->rta_type = RTA_MULTIPATH;
309
  rt->rta_len = len;
310
  h->nlmsg_len = pos + len;
311
}
312

    
313

    
314
static struct mpnh *
315
nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
316
{
317
  /* Temporary buffer for multicast nexthops */
318
  static struct mpnh *nh_buffer;
319
  static int nh_buf_size;        /* in number of structures */
320
  static int nh_buf_used;
321

    
322
  struct rtattr *a[RTA_CACHEINFO+1];
323
  struct rtnexthop *nh = RTA_DATA(ra);
324
  struct mpnh *rv, *first, **last;
325
  int len = RTA_PAYLOAD(ra);
326

    
327
  first = NULL;
328
  last = &first;
329
  nh_buf_used = 0;
330

    
331
  while (len)
332
    {
333
      /* Use RTNH_OK(nh,len) ?? */
334
      if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
335
        return NULL;
336

    
337
      if (nh_buf_used == nh_buf_size)
338
      {
339
        nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
340
        nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh));
341
      }
342
      *last = rv = nh_buffer + nh_buf_used++;
343
      rv->next = NULL;
344
      last = &(rv->next);
345

    
346
      rv->weight = nh->rtnh_hops;
347
      rv->iface = if_find_by_index(nh->rtnh_ifindex);
348
      if (!rv->iface)
349
        return NULL;
350

    
351
      /* Nonexistent RTNH_PAYLOAD ?? */
352
      nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
353
      nl_parse_attrs(RTNH_DATA(nh), a, sizeof(a));
354
      if (a[RTA_GATEWAY])
355
        {
356
          if (RTA_PAYLOAD(a[RTA_GATEWAY]) != sizeof(ip_addr))
357
            return NULL;
358

    
359
          memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ip_addr));
360
          ipa_ntoh(rv->gw);
361

    
362
          neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
363
                                     (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
364
          if (!ng || (ng->scope == SCOPE_HOST))
365
            return NULL;
366
        }
367
      else
368
        return NULL;
369

    
370
      len -= NLMSG_ALIGN(nh->rtnh_len);
371
      nh = RTNH_NEXT(nh);
372
    }
373

    
374
  return first;
375
}
376

    
377

    
378
/*
379
 *        Scanning of interfaces
380
 */
381

    
382
static void
383
nl_parse_link(struct nlmsghdr *h, int scan)
384
{
385
  struct ifinfomsg *i;
386
  struct rtattr *a[IFLA_WIRELESS+1];
387
  int new = h->nlmsg_type == RTM_NEWLINK;
388
  struct iface f;
389
  struct iface *ifi;
390
  char *name;
391
  u32 mtu;
392
  unsigned int fl;
393

    
394
  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), a, sizeof(a)))
395
    return;
396
  if (!a[IFLA_IFNAME] || RTA_PAYLOAD(a[IFLA_IFNAME]) < 2 ||
397
      !a[IFLA_MTU] || RTA_PAYLOAD(a[IFLA_MTU]) != 4)
398
    {
399
      if (scan || !a[IFLA_WIRELESS])
400
        log(L_ERR "nl_parse_link: Malformed message received");
401
      return;
402
    }
403
  name = RTA_DATA(a[IFLA_IFNAME]);
404
  memcpy(&mtu, RTA_DATA(a[IFLA_MTU]), sizeof(u32));
405

    
406
  ifi = if_find_by_index(i->ifi_index);
407
  if (!new)
408
    {
409
      DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
410
      if (ifi && !scan)
411
        {
412
          memcpy(&f, ifi, sizeof(struct iface));
413
          f.flags |= IF_SHUTDOWN;
414
          if_update(&f);
415
        }
416
    }
417
  else
418
    {
419
      DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
420
      if (ifi)
421
        memcpy(&f, ifi, sizeof(f));
422
      else
423
        {
424
          bzero(&f, sizeof(f));
425
          f.index = i->ifi_index;
426
        }
427
      strncpy(f.name, RTA_DATA(a[IFLA_IFNAME]), sizeof(f.name)-1);
428
      f.mtu = mtu;
429
      f.flags = 0;
430
      fl = i->ifi_flags;
431
      if (fl & IFF_UP)
432
        f.flags |= IF_ADMIN_UP;
433
      if (fl & IFF_LOWER_UP)
434
        f.flags |= IF_LINK_UP;
435
      if (fl & IFF_LOOPBACK)                /* Loopback */
436
        f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
437
      else if (fl & IFF_POINTOPOINT)        /* PtP */
438
        f.flags |= IF_MULTICAST;
439
      else if (fl & IFF_BROADCAST)        /* Broadcast */
440
        f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
441
      else
442
        f.flags |= IF_MULTIACCESS;        /* NBMA */
443
      if_update(&f);
444
    }
445
}
446

    
447
static void
448
nl_parse_addr(struct nlmsghdr *h)
449
{
450
  struct ifaddrmsg *i;
451
  struct rtattr *a[IFA_ANYCAST+1];
452
  int new = h->nlmsg_type == RTM_NEWADDR;
453
  struct ifa ifa;
454
  struct iface *ifi;
455
  int scope;
456

    
457
  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFA_RTA(i), a, sizeof(a)))
458
    return;
459
  if (i->ifa_family != BIRD_AF)
460
    return;
461
  if (!a[IFA_ADDRESS] || RTA_PAYLOAD(a[IFA_ADDRESS]) != sizeof(ip_addr)
462
#ifdef IPV6
463
      || a[IFA_LOCAL] && RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr)
464
#else
465
      || !a[IFA_LOCAL] || RTA_PAYLOAD(a[IFA_LOCAL]) != sizeof(ip_addr)
466
      || (a[IFA_BROADCAST] && RTA_PAYLOAD(a[IFA_BROADCAST]) != sizeof(ip_addr))
467
#endif
468
      )
469
    {
470
      log(L_ERR "nl_parse_addr: Malformed message received");
471
      return;
472
    }
473

    
474
  ifi = if_find_by_index(i->ifa_index);
475
  if (!ifi)
476
    {
477
      log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
478
      return;
479
    }
480

    
481
  bzero(&ifa, sizeof(ifa));
482
  ifa.iface = ifi;
483
  if (i->ifa_flags & IFA_F_SECONDARY)
484
    ifa.flags |= IA_SECONDARY;
485

    
486
  /* IFA_LOCAL can be unset for IPv6 interfaces */
487
  memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
488
  ipa_ntoh(ifa.ip);
489
  ifa.pxlen = i->ifa_prefixlen;
490
  if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
491
    {
492
      log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
493
      new = 0;
494
    }
495
  if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
496
    {
497
      ip_addr addr;
498
      memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
499
      ipa_ntoh(addr);
500
      ifa.prefix = ifa.brd = addr;
501

    
502
      /* It is either a host address or a peer address */
503
      if (ipa_equal(ifa.ip, addr))
504
        ifa.flags |= IA_HOST;
505
      else
506
        {
507
          ifa.flags |= IA_PEER;
508
          ifa.opposite = addr;
509
        }
510
    }
511
  else
512
    {
513
      ip_addr netmask = ipa_mkmask(ifa.pxlen);
514
      ifa.prefix = ipa_and(ifa.ip, netmask);
515
      ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
516
      if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
517
        ifa.opposite = ipa_opposite_m1(ifa.ip);
518

    
519
#ifndef IPV6
520
      if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
521
        ifa.opposite = ipa_opposite_m2(ifa.ip);
522

    
523
      if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
524
        {
525
          ip_addr xbrd;
526
          memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
527
          ipa_ntoh(xbrd);
528
          if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
529
            ifa.brd = xbrd;
530
          else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
531
            log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
532
        }
533
#endif
534
    }
535

    
536
  scope = ipa_classify(ifa.ip);
537
  if (scope < 0)
538
    {
539
      log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
540
      return;
541
    }
542
  ifa.scope = scope & IADDR_SCOPE_MASK;
543

    
544
  DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
545
      ifi->index, ifi->name,
546
      new ? "added" : "removed",
547
      ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
548
  if (new)
549
    ifa_update(&ifa);
550
  else
551
    ifa_delete(&ifa);
552
}
553

    
554
void
555
krt_if_scan(struct kif_proto *p UNUSED)
556
{
557
  struct nlmsghdr *h;
558

    
559
  if_start_update();
560

    
561
  nl_request_dump(RTM_GETLINK);
562
  while (h = nl_get_scan())
563
    if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
564
      nl_parse_link(h, 1);
565
    else
566
      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
567

    
568
  nl_request_dump(RTM_GETADDR);
569
  while (h = nl_get_scan())
570
    if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
571
      nl_parse_addr(h);
572
    else
573
      log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
574

    
575
  if_end_update();
576
}
577

    
578
/*
579
 *        Routes
580
 */
581

    
582
static struct krt_proto *nl_table_map[NL_NUM_TABLES];
583

    
584
int
585
krt_capable(rte *e)
586
{
587
  rta *a = e->attrs;
588

    
589
  if (a->cast != RTC_UNICAST)
590
    return 0;
591

    
592
  switch (a->dest)
593
    {
594
    case RTD_ROUTER:
595
      if (ipa_has_link_scope(a->gw) && (a->iface == NULL))
596
        return 0;
597
    case RTD_DEVICE:
598
    case RTD_BLACKHOLE:
599
    case RTD_UNREACHABLE:
600
    case RTD_PROHIBIT:
601
    case RTD_MULTIPATH:
602
      break;
603
    default:
604
      return 0;
605
    }
606
  return 1;
607
}
608

    
609
static inline int
610
nh_bufsize(struct mpnh *nh)
611
{
612
  int rv = 0;
613
  for (; nh != NULL; nh = nh->next)
614
    rv += RTNH_SIZE;
615
  return rv;
616
}
617

    
618
static void
619
nl_send_route(struct krt_proto *p, rte *e, int new)
620
{
621
  net *net = e->net;
622
  rta *a = e->attrs;
623
  struct {
624
    struct nlmsghdr h;
625
    struct rtmsg r;
626
    char buf[64 + nh_bufsize(a->nexthops)];
627
  } r;
628

    
629
  DBG("nl_send_route(%I/%d,new=%d)\n", net->n.prefix, net->n.pxlen, new);
630

    
631
  bzero(&r.h, sizeof(r.h));
632
  bzero(&r.r, sizeof(r.r));
633
  r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
634
  r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
635
  r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0);
636

    
637
  r.r.rtm_family = BIRD_AF;
638
  r.r.rtm_dst_len = net->n.pxlen;
639
  r.r.rtm_tos = 0;
640
  r.r.rtm_table = KRT_CF->scan.table_id;
641
  r.r.rtm_protocol = RTPROT_BIRD;
642
  r.r.rtm_scope = RT_SCOPE_UNIVERSE;
643
  nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
644
  switch (a->dest)
645
    {
646
    case RTD_ROUTER:
647
      r.r.rtm_type = RTN_UNICAST;
648
      nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw);
649

    
650
      /* a->iface != NULL checked in krt_capable() */
651
      if (ipa_has_link_scope(a->gw))
652
              nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
653

    
654
      break;
655
    case RTD_DEVICE:
656
      if (!a->iface)
657
        return;
658
      r.r.rtm_type = RTN_UNICAST;
659
      nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
660
      break;
661
    case RTD_BLACKHOLE:
662
      r.r.rtm_type = RTN_BLACKHOLE;
663
      break;
664
    case RTD_UNREACHABLE:
665
      r.r.rtm_type = RTN_UNREACHABLE;
666
      break;
667
    case RTD_PROHIBIT:
668
      r.r.rtm_type = RTN_PROHIBIT;
669
      break;
670
    case RTD_MULTIPATH:
671
      r.r.rtm_type = RTN_UNICAST;
672
      nl_add_multipath(&r.h, sizeof(r), a->nexthops);
673
      break;
674
    default:
675
      bug("krt_capable inconsistent with nl_send_route");
676
    }
677

    
678
  nl_exchange(&r.h);
679
}
680

    
681
void
682
krt_set_notify(struct krt_proto *p, net *n UNUSED, rte *new, rte *old)
683
{
684
  if (old)
685
    nl_send_route(p, old, 0);
686

    
687
  if (new)
688
    nl_send_route(p, new, 1);
689
}
690

    
691

    
692
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
693

    
694
static void
695
nl_parse_route(struct nlmsghdr *h, int scan)
696
{
697
  struct krt_proto *p;
698
  struct rtmsg *i;
699
  struct rtattr *a[RTA_CACHEINFO+1];
700
  int new = h->nlmsg_type == RTM_NEWROUTE;
701
  ip_addr dst;
702
  rte *e;
703
  net *net;
704
  u32 oif;
705
  int src;
706

    
707
  if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(RTM_RTA(i), a, sizeof(a)))
708
    return;
709
  if (i->rtm_family != BIRD_AF)
710
    return;
711
  if ((a[RTA_DST] && RTA_PAYLOAD(a[RTA_DST]) != sizeof(ip_addr)) ||
712
      (a[RTA_OIF] && RTA_PAYLOAD(a[RTA_OIF]) != 4) ||
713
      (a[RTA_PRIORITY] && RTA_PAYLOAD(a[RTA_PRIORITY]) != 4) ||
714
#ifdef IPV6
715
      (a[RTA_IIF] && RTA_PAYLOAD(a[RTA_IIF]) != 4) ||
716
#endif
717
      (a[RTA_GATEWAY] && RTA_PAYLOAD(a[RTA_GATEWAY]) != sizeof(ip_addr)))
718
    {
719
      log(L_ERR "KRT: Malformed message received");
720
      return;
721
    }
722

    
723
  if (a[RTA_DST])
724
    {
725
      memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
726
      ipa_ntoh(dst);
727
    }
728
  else
729
    dst = IPA_NONE;
730

    
731
  if (a[RTA_OIF])
732
    memcpy(&oif, RTA_DATA(a[RTA_OIF]), sizeof(oif));
733
  else
734
    oif = ~0;
735

    
736
  DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, i->rtm_table, i->rtm_protocol, p->p.name);
737

    
738
  p = nl_table_map[i->rtm_table];        /* Do we know this table? */
739
  if (!p)
740
    SKIP("unknown table %d", i->rtm_table);
741

    
742
#ifdef IPV6
743
  if (a[RTA_IIF])
744
    SKIP("IIF set\n");
745
#else
746
  if (i->rtm_tos != 0)                        /* We don't support TOS */
747
    SKIP("TOS %02x\n", i->rtm_tos);
748
#endif
749

    
750
  if (scan && !new)
751
    SKIP("RTM_DELROUTE in scan\n");
752

    
753
  int c = ipa_classify_net(dst);
754
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
755
    SKIP("strange class/scope\n");
756

    
757
  // ignore rtm_scope, it is not a real scope
758
  // if (i->rtm_scope != RT_SCOPE_UNIVERSE)
759
  //   SKIP("scope %u\n", i->rtm_scope);
760

    
761
  switch (i->rtm_protocol)
762
    {
763
    case RTPROT_UNSPEC:
764
      SKIP("proto unspec\n");
765

    
766
    case RTPROT_REDIRECT:
767
      src = KRT_SRC_REDIRECT;
768
      break;
769

    
770
    case RTPROT_KERNEL:
771
      src = KRT_SRC_KERNEL;
772
      return;
773

    
774
    case RTPROT_BIRD:
775
      if (!scan)
776
        SKIP("echo\n");
777
      src = KRT_SRC_BIRD;
778
      break;
779

    
780
    case RTPROT_BOOT:
781
    default:
782
      src = KRT_SRC_ALIEN;
783
    }
784

    
785
  net = net_get(p->p.table, dst, i->rtm_dst_len);
786

    
787
  rta ra = {
788
    .proto = &p->p,
789
    .source = RTS_INHERIT,
790
    .scope = SCOPE_UNIVERSE,
791
    .cast = RTC_UNICAST
792
  };
793

    
794
  switch (i->rtm_type)
795
    {
796
    case RTN_UNICAST:
797

    
798
      if (a[RTA_MULTIPATH])
799
        {
800
          ra.dest = RTD_MULTIPATH;
801
          ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
802
          if (!ra.nexthops)
803
            {
804
              log(L_ERR "KRT: Received strange multipath route %I/%d",
805
                  net->n.prefix, net->n.pxlen);
806
              return;
807
            }
808
            
809
          break;
810
        }
811

    
812
      ra.iface = if_find_by_index(oif);
813
      if (!ra.iface)
814
        {
815
          log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
816
              net->n.prefix, net->n.pxlen, oif);
817
          return;
818
        }
819

    
820
      if (a[RTA_GATEWAY])
821
        {
822
          neighbor *ng;
823
          ra.dest = RTD_ROUTER;
824
          memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
825
          ipa_ntoh(ra.gw);
826

    
827
          /* Silently skip strange 6to4 routes */
828
          if (ipa_in_net(ra.gw, IPA_NONE, 96))
829
            return;
830

    
831
          ng = neigh_find2(&p->p, &ra.gw, ra.iface,
832
                           (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
833
          if (!ng || (ng->scope == SCOPE_HOST))
834
            {
835
              log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
836
                  net->n.prefix, net->n.pxlen, ra.gw);
837
              return;
838
            }
839
        }
840
      else
841
        {
842
          ra.dest = RTD_DEVICE;
843

    
844
          /*
845
           * In Linux IPv6, 'native' device routes have proto
846
           * RTPROT_BOOT and not RTPROT_KERNEL (which they have in
847
           * IPv4 and which is expected). We cannot distinguish
848
           * 'native' and user defined device routes, so we ignore all
849
           * such device routes and for consistency, we have the same
850
           * behavior in IPv4. Anyway, users should use RTPROT_STATIC
851
           * for their 'alien' routes.
852
           */
853

    
854
          if (i->rtm_protocol == RTPROT_BOOT)
855
            src = KRT_SRC_KERNEL;
856
        }
857

    
858
      break;
859
    case RTN_BLACKHOLE:
860
      ra.dest = RTD_BLACKHOLE;
861
      break;
862
    case RTN_UNREACHABLE:
863
      ra.dest = RTD_UNREACHABLE;
864
      break;
865
    case RTN_PROHIBIT:
866
      ra.dest = RTD_PROHIBIT;
867
      break;
868
    /* FIXME: What about RTN_THROW? */
869
    default:
870
      SKIP("type %d\n", i->rtm_type);
871
      return;
872
    }
873

    
874
  e = rte_get_temp(&ra);
875
  e->net = net;
876
  e->u.krt.src = src;
877
  e->u.krt.proto = i->rtm_protocol;
878
  e->u.krt.type = i->rtm_type;
879
  if (a[RTA_PRIORITY])
880
    memcpy(&e->u.krt.metric, RTA_DATA(a[RTA_PRIORITY]), sizeof(e->u.krt.metric));
881
  else
882
    e->u.krt.metric = 0;
883
  if (scan)
884
    krt_got_route(p, e);
885
  else
886
    krt_got_route_async(p, e, new);
887
}
888

    
889
void
890
krt_scan_fire(struct krt_proto *p UNUSED)        /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
891
{
892
  struct nlmsghdr *h;
893

    
894
  nl_request_dump(RTM_GETROUTE);
895
  while (h = nl_get_scan())
896
    if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
897
      nl_parse_route(h, 1);
898
    else
899
      log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
900
}
901

    
902
/*
903
 *        Asynchronous Netlink interface
904
 */
905

    
906
static sock *nl_async_sk;                /* BIRD socket for asynchronous notifications */
907
static byte *nl_async_rx_buffer;        /* Receive buffer */
908

    
909
static void
910
nl_async_msg(struct nlmsghdr *h)
911
{
912
  switch (h->nlmsg_type)
913
    {
914
    case RTM_NEWROUTE:
915
    case RTM_DELROUTE:
916
      DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
917
      nl_parse_route(h, 0);
918
      break;
919
    case RTM_NEWLINK:
920
    case RTM_DELLINK:
921
      DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
922
      nl_parse_link(h, 0);
923
      break;
924
    case RTM_NEWADDR:
925
    case RTM_DELADDR:
926
      DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
927
      nl_parse_addr(h);
928
      break;
929
    default:
930
      DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
931
    }
932
}
933

    
934
static int
935
nl_async_hook(sock *sk, int size UNUSED)
936
{
937
  struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
938
  struct sockaddr_nl sa;
939
  struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
940
  struct nlmsghdr *h;
941
  int x;
942
  unsigned int len;
943

    
944
  x = recvmsg(sk->fd, &m, 0);
945
  if (x < 0)
946
    {
947
      if (errno == ENOBUFS)
948
        {
949
          /*
950
           *  Netlink reports some packets have been thrown away.
951
           *  One day we might react to it by asking for route table
952
           *  scan in near future.
953
           */
954
          return 1;        /* More data are likely to be ready */
955
        }
956
      else if (errno != EWOULDBLOCK)
957
        log(L_ERR "Netlink recvmsg: %m");
958
      return 0;
959
    }
960
  if (sa.nl_pid)                /* It isn't from the kernel */
961
    {
962
      DBG("Non-kernel packet\n");
963
      return 1;
964
    }
965
  h = (void *) nl_async_rx_buffer;
966
  len = x;
967
  if (m.msg_flags & MSG_TRUNC)
968
    {
969
      log(L_WARN "Netlink got truncated asynchronous message");
970
      return 1;
971
    }
972
  while (NLMSG_OK(h, len))
973
    {
974
      nl_async_msg(h);
975
      h = NLMSG_NEXT(h, len);
976
    }
977
  if (len)
978
    log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
979
  return 1;
980
}
981

    
982
static void
983
nl_open_async(void)
984
{
985
  sock *sk;
986
  struct sockaddr_nl sa;
987
  int fd;
988
  static int nl_open_tried = 0;
989

    
990
  if (nl_open_tried)
991
    return;
992
  nl_open_tried = 1;
993

    
994
  DBG("KRT: Opening async netlink socket\n");
995

    
996
  fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
997
  if (fd < 0)
998
    {
999
      log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1000
      return;
1001
    }
1002

    
1003
  bzero(&sa, sizeof(sa));
1004
  sa.nl_family = AF_NETLINK;
1005
#ifdef IPV6
1006
  sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1007
#else
1008
  sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
1009
#endif
1010
  if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1011
    {
1012
      log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
1013
      return;
1014
    }
1015

    
1016
  sk = nl_async_sk = sk_new(krt_pool);
1017
  sk->type = SK_MAGIC;
1018
  sk->rx_hook = nl_async_hook;
1019
  sk->fd = fd;
1020
  if (sk_open(sk))
1021
    bug("Netlink: sk_open failed");
1022

    
1023
  if (!nl_async_rx_buffer)
1024
    nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1025
}
1026

    
1027
/*
1028
 *        Interface to the UNIX krt module
1029
 */
1030

    
1031
static u8 nl_cf_table[(NL_NUM_TABLES+7) / 8];
1032

    
1033
void
1034
krt_scan_preconfig(struct config *c UNUSED)
1035
{
1036
  bzero(&nl_cf_table, sizeof(nl_cf_table));
1037
}
1038

    
1039
void
1040
krt_scan_postconfig(struct krt_config *x)
1041
{
1042
  int id = x->scan.table_id;
1043

    
1044
  if (nl_cf_table[id/8] & (1 << (id%8)))
1045
    cf_error("Multiple kernel syncers defined for table #%d", id);
1046
  nl_cf_table[id/8] |= (1 << (id%8));
1047
}
1048

    
1049
void
1050
krt_scan_construct(struct krt_config *x)
1051
{
1052
#ifndef IPV6
1053
  x->scan.table_id = RT_TABLE_MAIN;
1054
#else
1055
  x->scan.table_id = 254;
1056
#endif
1057
}
1058

    
1059
void
1060
krt_scan_start(struct krt_proto *p, int first)
1061
{
1062
  init_list(&p->scan.temp_ifs);
1063
  nl_table_map[KRT_CF->scan.table_id] = p;
1064
  if (first)
1065
    {
1066
      nl_open();
1067
      nl_open_async();
1068
    }
1069
}
1070

    
1071
void
1072
krt_scan_shutdown(struct krt_proto *p UNUSED, int last UNUSED)
1073
{
1074
}
1075

    
1076
void
1077
krt_if_start(struct kif_proto *p UNUSED)
1078
{
1079
  nl_open();
1080
  nl_open_async();
1081
}