Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 05476c4d

History | View | Annotate | Download (40.2 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11
   if _GNU_SOURCE is not defined. */
12
#define _GNU_SOURCE 1
13

    
14
#include <stdio.h>
15
#include <stdlib.h>
16
#include <time.h>
17
#include <sys/time.h>
18
#include <sys/types.h>
19
#include <sys/socket.h>
20
#include <sys/uio.h>
21
#include <sys/un.h>
22
#include <unistd.h>
23
#include <fcntl.h>
24
#include <errno.h>
25
#include <net/if.h>
26
#include <netinet/in.h>
27
#include <netinet/tcp.h>
28
#include <netinet/udp.h>
29
#include <netinet/icmp6.h>
30

    
31
#include "nest/bird.h"
32
#include "lib/lists.h"
33
#include "lib/resource.h"
34
#include "lib/timer.h"
35
#include "lib/socket.h"
36
#include "lib/event.h"
37
#include "lib/string.h"
38
#include "nest/iface.h"
39

    
40
#include "lib/unix.h"
41
#include "lib/sysio.h"
42

    
43
/* Maximum number of calls of tx handler for one socket in one
44
 * select iteration. Should be small enough to not monopolize CPU by
45
 * one protocol instance.
46
 */
47
#define MAX_STEPS 4
48

    
49
/* Maximum number of calls of rx handler for all sockets in one select
50
   iteration. RX callbacks are often much more costly so we limit
51
   this to gen small latencies */
52
#define MAX_RX_STEPS 4
53

    
54
/*
55
 *        Tracked Files
56
 */
57

    
58
struct rfile {
59
  resource r;
60
  FILE *f;
61
};
62

    
63
static void
64
rf_free(resource *r)
65
{
66
  struct rfile *a = (struct rfile *) r;
67

    
68
  fclose(a->f);
69
}
70

    
71
static void
72
rf_dump(resource *r)
73
{
74
  struct rfile *a = (struct rfile *) r;
75

    
76
  debug("(FILE *%p)\n", a->f);
77
}
78

    
79
static struct resclass rf_class = {
80
  "FILE",
81
  sizeof(struct rfile),
82
  rf_free,
83
  rf_dump,
84
  NULL,
85
  NULL
86
};
87

    
88
void *
89
tracked_fopen(pool *p, char *name, char *mode)
90
{
91
  FILE *f = fopen(name, mode);
92

    
93
  if (f)
94
    {
95
      struct rfile *r = ralloc(p, &rf_class);
96
      r->f = f;
97
    }
98
  return f;
99
}
100

    
101
/**
102
 * DOC: Timers
103
 *
104
 * Timers are resources which represent a wish of a module to call
105
 * a function at the specified time. The platform dependent code
106
 * doesn't guarantee exact timing, only that a timer function
107
 * won't be called before the requested time.
108
 *
109
 * In BIRD, time is represented by values of the &bird_clock_t type
110
 * which are integral numbers interpreted as a relative number of seconds since
111
 * some fixed time point in past. The current time can be read
112
 * from variable @now with reasonable accuracy and is monotonic. There is also
113
 * a current 'absolute' time in variable @now_real reported by OS.
114
 *
115
 * Each timer is described by a &timer structure containing a pointer
116
 * to the handler function (@hook), data private to this function (@data),
117
 * time the function should be called at (@expires, 0 for inactive timers),
118
 * for the other fields see |timer.h|.
119
 */
120

    
121
#define NEAR_TIMER_LIMIT 4
122

    
123
static list near_timers, far_timers;
124
static bird_clock_t first_far_timer = TIME_INFINITY;
125

    
126
/* now must be different from 0, because 0 is a special value in timer->expires */
127
bird_clock_t now = 1, now_real, boot_time;
128

    
129
static void
130
update_times_plain(void)
131
{
132
  bird_clock_t new_time = time(NULL);
133
  int delta = new_time - now_real;
134

    
135
  if ((delta >= 0) && (delta < 60))
136
    now += delta;
137
  else if (now_real != 0)
138
   log(L_WARN "Time jump, delta %d s", delta);
139

    
140
  now_real = new_time;
141
}
142

    
143
static void
144
update_times_gettime(void)
145
{
146
  struct timespec ts;
147
  int rv;
148

    
149
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
150
  if (rv != 0)
151
    die("clock_gettime: %m");
152

    
153
  if (ts.tv_sec != now) {
154
    if (ts.tv_sec < now)
155
      log(L_ERR "Monotonic timer is broken");
156

    
157
    now = ts.tv_sec;
158
    now_real = time(NULL);
159
  }
160
}
161

    
162
static int clock_monotonic_available;
163

    
164
static inline void
165
update_times(void)
166
{
167
  if (clock_monotonic_available)
168
    update_times_gettime();
169
  else
170
    update_times_plain();
171
}
172

    
173
static inline void
174
init_times(void)
175
{
176
 struct timespec ts;
177
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
178
 if (!clock_monotonic_available)
179
   log(L_WARN "Monotonic timer is missing");
180
}
181

    
182

    
183
static void
184
tm_free(resource *r)
185
{
186
  timer *t = (timer *) r;
187

    
188
  tm_stop(t);
189
}
190

    
191
static void
192
tm_dump(resource *r)
193
{
194
  timer *t = (timer *) r;
195

    
196
  debug("(code %p, data %p, ", t->hook, t->data);
197
  if (t->randomize)
198
    debug("rand %d, ", t->randomize);
199
  if (t->recurrent)
200
    debug("recur %d, ", t->recurrent);
201
  if (t->expires)
202
    debug("expires in %d sec)\n", t->expires - now);
203
  else
204
    debug("inactive)\n");
205
}
206

    
207
static struct resclass tm_class = {
208
  "Timer",
209
  sizeof(timer),
210
  tm_free,
211
  tm_dump,
212
  NULL,
213
  NULL
214
};
215

    
216
/**
217
 * tm_new - create a timer
218
 * @p: pool
219
 *
220
 * This function creates a new timer resource and returns
221
 * a pointer to it. To use the timer, you need to fill in
222
 * the structure fields and call tm_start() to start timing.
223
 */
224
timer *
225
tm_new(pool *p)
226
{
227
  timer *t = ralloc(p, &tm_class);
228
  return t;
229
}
230

    
231
static inline void
232
tm_insert_near(timer *t)
233
{
234
  node *n = HEAD(near_timers);
235

    
236
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
237
    n = n->next;
238
  insert_node(&t->n, n->prev);
239
}
240

    
241
/**
242
 * tm_start - start a timer
243
 * @t: timer
244
 * @after: number of seconds the timer should be run after
245
 *
246
 * This function schedules the hook function of the timer to
247
 * be called after @after seconds. If the timer has been already
248
 * started, it's @expire time is replaced by the new value.
249
 *
250
 * You can have set the @randomize field of @t, the timeout
251
 * will be increased by a random number of seconds chosen
252
 * uniformly from range 0 .. @randomize.
253
 *
254
 * You can call tm_start() from the handler function of the timer
255
 * to request another run of the timer. Also, you can set the @recurrent
256
 * field to have the timer re-added automatically with the same timeout.
257
 */
258
void
259
tm_start(timer *t, unsigned after)
260
{
261
  bird_clock_t when;
262

    
263
  if (t->randomize)
264
    after += random() % (t->randomize + 1);
265
  when = now + after;
266
  if (t->expires == when)
267
    return;
268
  if (t->expires)
269
    rem_node(&t->n);
270
  t->expires = when;
271
  if (after <= NEAR_TIMER_LIMIT)
272
    tm_insert_near(t);
273
  else
274
    {
275
      if (!first_far_timer || first_far_timer > when)
276
        first_far_timer = when;
277
      add_tail(&far_timers, &t->n);
278
    }
279
}
280

    
281
/**
282
 * tm_stop - stop a timer
283
 * @t: timer
284
 *
285
 * This function stops a timer. If the timer is already stopped,
286
 * nothing happens.
287
 */
288
void
289
tm_stop(timer *t)
290
{
291
  if (t->expires)
292
    {
293
      rem_node(&t->n);
294
      t->expires = 0;
295
    }
296
}
297

    
298
static void
299
tm_dump_them(char *name, list *l)
300
{
301
  node *n;
302
  timer *t;
303

    
304
  debug("%s timers:\n", name);
305
  WALK_LIST(n, *l)
306
    {
307
      t = SKIP_BACK(timer, n, n);
308
      debug("%p ", t);
309
      tm_dump(&t->r);
310
    }
311
  debug("\n");
312
}
313

    
314
void
315
tm_dump_all(void)
316
{
317
  tm_dump_them("Near", &near_timers);
318
  tm_dump_them("Far", &far_timers);
319
}
320

    
321
static inline time_t
322
tm_first_shot(void)
323
{
324
  time_t x = first_far_timer;
325

    
326
  if (!EMPTY_LIST(near_timers))
327
    {
328
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
329
      if (t->expires < x)
330
        x = t->expires;
331
    }
332
  return x;
333
}
334

    
335
static void
336
tm_shot(void)
337
{
338
  timer *t;
339
  node *n, *m;
340

    
341
  if (first_far_timer <= now)
342
    {
343
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
344
      first_far_timer = TIME_INFINITY;
345
      n = HEAD(far_timers);
346
      while (m = n->next)
347
        {
348
          t = SKIP_BACK(timer, n, n);
349
          if (t->expires <= limit)
350
            {
351
              rem_node(n);
352
              tm_insert_near(t);
353
            }
354
          else if (t->expires < first_far_timer)
355
            first_far_timer = t->expires;
356
          n = m;
357
        }
358
    }
359
  while ((n = HEAD(near_timers)) -> next)
360
    {
361
      int delay;
362
      t = SKIP_BACK(timer, n, n);
363
      if (t->expires > now)
364
        break;
365
      rem_node(n);
366
      delay = t->expires - now;
367
      t->expires = 0;
368
      if (t->recurrent)
369
        {
370
          int i = t->recurrent - delay;
371
          if (i < 0)
372
            i = 0;
373
          tm_start(t, i);
374
        }
375
      t->hook(t);
376
    }
377
}
378

    
379
/**
380
 * tm_parse_datetime - parse a date and time
381
 * @x: datetime string
382
 *
383
 * tm_parse_datetime() takes a textual representation of
384
 * a date and time (dd-mm-yyyy hh:mm:ss)
385
 * and converts it to the corresponding value of type &bird_clock_t.
386
 */
387
bird_clock_t
388
tm_parse_datetime(char *x)
389
{
390
  struct tm tm;
391
  int n;
392
  time_t t;
393

    
394
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
395
    return tm_parse_date(x);
396
  tm.tm_mon--;
397
  tm.tm_year -= 1900;
398
  t = mktime(&tm);
399
  if (t == (time_t) -1)
400
    return 0;
401
  return t;
402
}
403
/**
404
 * tm_parse_date - parse a date
405
 * @x: date string
406
 *
407
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
408
 * and converts it to the corresponding value of type &bird_clock_t.
409
 */
410
bird_clock_t
411
tm_parse_date(char *x)
412
{
413
  struct tm tm;
414
  int n;
415
  time_t t;
416

    
417
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
418
    return 0;
419
  tm.tm_mon--;
420
  tm.tm_year -= 1900;
421
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
422
  t = mktime(&tm);
423
  if (t == (time_t) -1)
424
    return 0;
425
  return t;
426
}
427

    
428
static void
429
tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
430
{
431
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
432
                                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
433

    
434
  if (delta < 20*3600)
435
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
436
  else if (delta < 360*86400)
437
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
438
  else
439
    bsprintf(x, "%d", tm->tm_year+1900);
440
}
441

    
442
#include "conf/conf.h"
443

    
444
/**
445
 * tm_format_datetime - convert date and time to textual representation
446
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
447
 * @t: time
448
 *
449
 * This function formats the given relative time value @t to a textual
450
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
451
 */
452
void
453
tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
454
{
455
  const char *fmt_used;
456
  struct tm *tm;
457
  bird_clock_t delta = now - t;
458
  t = now_real - delta;
459
  tm = localtime(&t);
460

    
461
  if (fmt_spec->fmt1 == NULL)
462
    return tm_format_reltime(x, tm, delta);
463

    
464
  if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
465
    fmt_used = fmt_spec->fmt1;
466
  else
467
    fmt_used = fmt_spec->fmt2;
468

    
469
  int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
470
  if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
471
    strcpy(x, "<too-long>");
472
}
473

    
474

    
475
/**
476
 * DOC: Sockets
477
 *
478
 * Socket resources represent network connections. Their data structure (&socket)
479
 * contains a lot of fields defining the exact type of the socket, the local and
480
 * remote addresses and ports, pointers to socket buffers and finally pointers to
481
 * hook functions to be called when new data have arrived to the receive buffer
482
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
483
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
484
 *
485
 * Freeing of sockets from inside socket hooks is perfectly safe.
486
 */
487

    
488
#ifndef SOL_IP
489
#define SOL_IP IPPROTO_IP
490
#endif
491

    
492
#ifndef SOL_IPV6
493
#define SOL_IPV6 IPPROTO_IPV6
494
#endif
495

    
496
#ifndef SOL_ICMPV6
497
#define SOL_ICMPV6 IPPROTO_ICMPV6
498
#endif
499

    
500

    
501
/*
502
 *        Sockaddr helper functions
503
 */
504

    
505
static inline int sockaddr_length(int af)
506
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
507

    
508
static inline void
509
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port)
510
{
511
  memset(sa, 0, sizeof(struct sockaddr_in));
512
#ifdef HAVE_SIN_LEN
513
  sa->sin_len = sizeof(struct sockaddr_in);
514
#endif
515
  sa->sin_family = AF_INET;
516
  sa->sin_port = htons(port);
517
  sa->sin_addr = ipa_to_in4(a);
518
}
519

    
520
static inline void
521
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
522
{
523
  memset(sa, 0, sizeof(struct sockaddr_in6));
524
#ifdef SIN6_LEN
525
  sa->sin6_len = sizeof(struct sockaddr_in6);
526
#endif
527
  sa->sin6_family = AF_INET6;
528
  sa->sin6_port = htons(port);
529
  sa->sin6_flowinfo = 0;
530
  sa->sin6_addr = ipa_to_in6(a);
531

    
532
  if (ifa && ipa_is_link_local(a))
533
    sa->sin6_scope_id = ifa->index;
534
}
535

    
536
void
537
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
538
{
539
  if (af == AF_INET)
540
    sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
541
  else if (af == AF_INET6)
542
    sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
543
  else
544
    bug("Unknown AF");
545
}
546

    
547
static inline void
548
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port)
549
{
550
  *port = ntohs(sa->sin_port);
551
  *a = ipa_from_in4(sa->sin_addr);
552
}
553

    
554
static inline void
555
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
556
{
557
  *port = ntohs(sa->sin6_port);
558
  *a = ipa_from_in6(sa->sin6_addr);
559

    
560
  if (ifa && ipa_is_link_local(*a))
561
    *ifa = if_find_by_index(sa->sin6_scope_id);
562
}
563

    
564
int
565
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
566
{
567
  if (sa->sa.sa_family != af)
568
    goto fail;
569

    
570
  if (af == AF_INET)
571
    sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
572
  else if (af == AF_INET6)
573
    sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
574
  else
575
    goto fail;
576

    
577
  return 0;
578

    
579
 fail:
580
  *a = IPA_NONE;
581
  *port = 0;
582
  return -1;
583
}
584

    
585

    
586
/*
587
 *        IPv6 multicast syscalls
588
 */
589

    
590
/* Fortunately standardized in RFC 3493 */
591

    
592
#define INIT_MREQ6(maddr,ifa) \
593
  { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
594

    
595
static inline int
596
sk_setup_multicast6(sock *s)
597
{
598
  int index = s->iface->index;
599
  int ttl = s->ttl;
600
  int n = 0;
601

    
602
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
603
    ERR("IPV6_MULTICAST_IF");
604

    
605
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
606
    ERR("IPV6_MULTICAST_HOPS");
607

    
608
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
609
    ERR("IPV6_MULTICAST_LOOP");
610

    
611
  return 0;
612
}
613

    
614
static inline int
615
sk_join_group6(sock *s, ip_addr maddr)
616
{
617
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
618

    
619
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
620
    ERR("IPV6_JOIN_GROUP");
621

    
622
  return 0;
623
}
624

    
625
static inline int
626
sk_leave_group6(sock *s, ip_addr maddr)
627
{
628
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
629

    
630
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
631
    ERR("IPV6_LEAVE_GROUP");
632

    
633
  return 0;
634
}
635

    
636

    
637
/*
638
 *        IPv6 packet control messages
639
 */
640

    
641
/* Also standardized, in RFC 3542 */
642

    
643
/*
644
 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
645
 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
646
 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
647
 * RFC and we use IPV6_PKTINFO.
648
 */
649
#ifndef IPV6_RECVPKTINFO
650
#define IPV6_RECVPKTINFO IPV6_PKTINFO
651
#endif
652
/*
653
 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
654
 */
655
#ifndef IPV6_RECVHOPLIMIT
656
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
657
#endif
658

    
659

    
660
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
661
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
662

    
663
static inline int
664
sk_request_cmsg6_pktinfo(sock *s)
665
{
666
  int y = 1;
667

    
668
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
669
    ERR("IPV6_RECVPKTINFO");
670

    
671
  return 0;
672
}
673

    
674
static inline int
675
sk_request_cmsg6_ttl(sock *s)
676
{
677
  int y = 1;
678

    
679
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
680
    ERR("IPV6_RECVHOPLIMIT");
681

    
682
  return 0;
683
}
684

    
685
static inline void
686
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
687
{
688
  if (cm->cmsg_type == IPV6_PKTINFO)
689
  {
690
    struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
691
    s->laddr = ipa_from_in6(pi->ipi6_addr);
692
    s->lifindex = pi->ipi6_ifindex;
693
  }
694
}
695

    
696
static inline void
697
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
698
{
699
  if (cm->cmsg_type == IPV6_HOPLIMIT)
700
    s->rcv_ttl = * (int *) CMSG_DATA(cm);
701
}
702

    
703
static inline void
704
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
705
{
706
  struct cmsghdr *cm;
707
  struct in6_pktinfo *pi;
708

    
709
  msg->msg_control = cbuf;
710
  msg->msg_controllen = cbuflen;
711

    
712
  cm = CMSG_FIRSTHDR(msg);
713
  cm->cmsg_level = SOL_IPV6;
714
  cm->cmsg_type = IPV6_PKTINFO;
715
  cm->cmsg_len = CMSG_LEN(sizeof(*pi));
716

    
717
  pi = (struct in6_pktinfo *) CMSG_DATA(cm);
718
  pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
719
  pi->ipi6_addr = ipa_to_in6(s->saddr);
720

    
721
  msg->msg_controllen = cm->cmsg_len;
722
}
723

    
724

    
725
/*
726
 *        Miscellaneous socket syscalls
727
 */
728

    
729
static inline int
730
sk_set_ttl4(sock *s, int ttl)
731
{
732
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
733
    ERR("IP_TTL");
734

    
735
  return 0;
736
}
737

    
738
static inline int
739
sk_set_ttl6(sock *s, int ttl)
740
{
741
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
742
    ERR("IPV6_UNICAST_HOPS");
743

    
744
  return 0;
745
}
746

    
747
static inline int
748
sk_set_tos4(sock *s, int tos)
749
{
750
  if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
751
    ERR("IP_TOS");
752

    
753
  return 0;
754
}
755

    
756
static inline int
757
sk_set_tos6(sock *s, int tos)
758
{
759
  if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
760
    ERR("IPV6_TCLASS");
761

    
762
  return 0;
763
}
764

    
765

    
766
/*
767
 *        Public socket functions
768
 */
769

    
770
/**
771
 * sk_setup_multicast - enable multicast for given socket
772
 * @s: socket
773
 *
774
 * Prepare transmission of multicast packets for given datagram socket.
775
 * The socket must have defined @iface.
776
 *
777
 * Result: 0 for success, -1 for an error.
778
 */
779

    
780
int
781
sk_setup_multicast(sock *s)
782
{
783
  ASSERT(s->iface);
784

    
785
  if (sk_is_ipv4(s))
786
    return sk_setup_multicast4(s);
787
  else
788
    return sk_setup_multicast6(s);
789
}
790

    
791
/**
792
 * sk_join_group - join multicast group for given socket
793
 * @s: socket
794
 * @maddr: multicast address
795
 *
796
 * Join multicast group for given datagram socket and associated interface.
797
 * The socket must have defined @iface.
798
 *
799
 * Result: 0 for success, -1 for an error.
800
 */
801

    
802
int
803
sk_join_group(sock *s, ip_addr maddr)
804
{
805
  if (sk_is_ipv4(s))
806
    return sk_join_group4(s, maddr);
807
  else
808
    return sk_join_group6(s, maddr);
809
}
810

    
811
/**
812
 * sk_leave_group - leave multicast group for given socket
813
 * @s: socket
814
 * @maddr: multicast address
815
 *
816
 * Leave multicast group for given datagram socket and associated interface.
817
 * The socket must have defined @iface.
818
 *
819
 * Result: 0 for success, -1 for an error.
820
 */
821

    
822
int
823
sk_leave_group(sock *s, ip_addr maddr)
824
{
825
  if (sk_is_ipv4(s))
826
    return sk_leave_group4(s, maddr);
827
  else
828
    return sk_leave_group6(s, maddr);
829
}
830

    
831
/**
832
 * sk_setup_broadcast - enable broadcast for given socket
833
 * @s: socket
834
 *
835
 * Allow reception and transmission of broadcast packets for given datagram
836
 * socket. The socket must have defined @iface. For transmission, packets should
837
 * be send to @brd address of @iface.
838
 *
839
 * Result: 0 for success, -1 for an error.
840
 */
841

    
842
int
843
sk_setup_broadcast(sock *s)
844
{
845
  int y = 1;
846

    
847
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
848
    ERR("SO_BROADCAST");
849

    
850
  return 0;
851
}
852

    
853
/**
854
 * sk_set_ttl - set transmit TTL for given socket
855
 * @s: socket
856
 * @ttl: TTL value
857
 *
858
 * Set TTL for already opened connections when TTL was not set before. Useful
859
 * for accepted connections when different ones should have different TTL.
860
 *
861
 * Result: 0 for success, -1 for an error.
862
 */
863

    
864
int
865
sk_set_ttl(sock *s, int ttl)
866
{
867
  s->ttl = ttl;
868

    
869
  if (sk_is_ipv4(s))
870
    return sk_set_ttl4(s, ttl);
871
  else
872
    return sk_set_ttl6(s, ttl);
873
}
874

    
875
/**
876
 * sk_set_min_ttl - set minimal accepted TTL for given socket
877
 * @s: socket
878
 * @ttl: TTL value
879
 *
880
 * Set minimal accepted TTL for given socket. Can be used for TTL security.
881
 * implementations.
882
 *
883
 * Result: 0 for success, -1 for an error.
884
 */
885

    
886
int
887
sk_set_min_ttl(sock *s, int ttl)
888
{
889
  if (sk_is_ipv4(s))
890
    return sk_set_min_ttl4(s, ttl);
891
  else
892
    return sk_set_min_ttl6(s, ttl);
893
}
894

    
895
#if 0
896
/**
897
 * sk_set_md5_auth - add / remove MD5 security association for given socket
898
 * @s: socket
899
 * @a: IP address of the other side
900
 * @ifa: Interface for link-local IP address
901
 * @passwd: password used for MD5 authentication
902
 *
903
 * In TCP MD5 handling code in kernel, there is a set of pairs (address,
904
 * password) used to choose password according to address of the other side.
905
 * This function is useful for listening socket, for active sockets it is enough
906
 * to set s->password field.
907
 *
908
 * When called with passwd != NULL, the new pair is added,
909
 * When called with passwd == NULL, the existing pair is removed.
910
 *
911
 * Result: 0 for success, -1 for an error.
912
 */
913

914
int
915
sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
916
{ DUMMY; }
917
#endif
918

    
919
/**
920
 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
921
 * @s: socket
922
 * @offset: offset
923
 *
924
 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
925
 * kernel will automatically fill it for outgoing packets and check it for
926
 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
927
 * known to the kernel.
928
 *
929
 * Result: 0 for success, -1 for an error.
930
 */
931

    
932
int
933
sk_set_ipv6_checksum(sock *s, int offset)
934
{
935
  if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
936
    ERR("IPV6_CHECKSUM");
937

    
938
  return 0;
939
}
940

    
941
int
942
sk_set_icmp6_filter(sock *s, int p1, int p2)
943
{
944
  /* a bit of lame interface, but it is here only for Radv */
945
  struct icmp6_filter f;
946

    
947
  ICMP6_FILTER_SETBLOCKALL(&f);
948
  ICMP6_FILTER_SETPASS(p1, &f);
949
  ICMP6_FILTER_SETPASS(p2, &f);
950

    
951
  if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
952
    ERR("ICMP6_FILTER");
953

    
954
  return 0;
955
}
956

    
957
void
958
sk_log_error(sock *s, const char *p)
959
{
960
  log(L_ERR "%s: Socket error: %s%#m", p, s->err);
961
}
962

    
963

    
964
/*
965
 *        Actual struct birdsock code
966
 */
967

    
968
static list sock_list;
969
static struct birdsock *current_sock;
970
static struct birdsock *stored_sock;
971
static int sock_recalc_fdsets_p;
972

    
973
static inline sock *
974
sk_next(sock *s)
975
{
976
  if (!s->n.next->next)
977
    return NULL;
978
  else
979
    return SKIP_BACK(sock, n, s->n.next);
980
}
981

    
982
static void
983
sk_alloc_bufs(sock *s)
984
{
985
  if (!s->rbuf && s->rbsize)
986
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
987
  s->rpos = s->rbuf;
988
  if (!s->tbuf && s->tbsize)
989
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
990
  s->tpos = s->ttx = s->tbuf;
991
}
992

    
993
static void
994
sk_free_bufs(sock *s)
995
{
996
  if (s->rbuf_alloc)
997
  {
998
    xfree(s->rbuf_alloc);
999
    s->rbuf = s->rbuf_alloc = NULL;
1000
  }
1001
  if (s->tbuf_alloc)
1002
  {
1003
    xfree(s->tbuf_alloc);
1004
    s->tbuf = s->tbuf_alloc = NULL;
1005
  }
1006
}
1007

    
1008
static void
1009
sk_free(resource *r)
1010
{
1011
  sock *s = (sock *) r;
1012

    
1013
  sk_free_bufs(s);
1014
  if (s->fd >= 0)
1015
  {
1016
    close(s->fd);
1017

    
1018
    /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1019
    if (s->flags & SKF_THREAD)
1020
      return;
1021

    
1022
    if (s == current_sock)
1023
      current_sock = sk_next(s);
1024
    if (s == stored_sock)
1025
      stored_sock = sk_next(s);
1026
    rem_node(&s->n);
1027
    sock_recalc_fdsets_p = 1;
1028
  }
1029
}
1030

    
1031
void
1032
sk_set_rbsize(sock *s, uint val)
1033
{
1034
  ASSERT(s->rbuf_alloc == s->rbuf);
1035

    
1036
  if (s->rbsize == val)
1037
    return;
1038

    
1039
  s->rbsize = val;
1040
  xfree(s->rbuf_alloc);
1041
  s->rbuf_alloc = xmalloc(val);
1042
  s->rpos = s->rbuf = s->rbuf_alloc;
1043
}
1044

    
1045
void
1046
sk_set_tbsize(sock *s, uint val)
1047
{
1048
  ASSERT(s->tbuf_alloc == s->tbuf);
1049

    
1050
  if (s->tbsize == val)
1051
    return;
1052

    
1053
  byte *old_tbuf = s->tbuf;
1054

    
1055
  s->tbsize = val;
1056
  s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1057
  s->tpos = s->tbuf + (s->tpos - old_tbuf);
1058
  s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
1059
}
1060

    
1061
void
1062
sk_set_tbuf(sock *s, void *tbuf)
1063
{
1064
  s->tbuf = tbuf ?: s->tbuf_alloc;
1065
  s->ttx = s->tpos = s->tbuf;
1066
}
1067

    
1068
void
1069
sk_reallocate(sock *s)
1070
{
1071
  sk_free_bufs(s);
1072
  sk_alloc_bufs(s);
1073
}
1074

    
1075
static void
1076
sk_dump(resource *r)
1077
{
1078
  sock *s = (sock *) r;
1079
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1080

    
1081
  debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1082
        sk_type_names[s->type],
1083
        s->data,
1084
        s->saddr,
1085
        s->sport,
1086
        s->daddr,
1087
        s->dport,
1088
        s->tos,
1089
        s->ttl,
1090
        s->iface ? s->iface->name : "none");
1091
}
1092

    
1093
static struct resclass sk_class = {
1094
  "Socket",
1095
  sizeof(sock),
1096
  sk_free,
1097
  sk_dump,
1098
  NULL,
1099
  NULL
1100
};
1101

    
1102
/**
1103
 * sk_new - create a socket
1104
 * @p: pool
1105
 *
1106
 * This function creates a new socket resource. If you want to use it,
1107
 * you need to fill in all the required fields of the structure and
1108
 * call sk_open() to do the actual opening of the socket.
1109
 *
1110
 * The real function name is sock_new(), sk_new() is a macro wrapper
1111
 * to avoid collision with OpenSSL.
1112
 */
1113
sock *
1114
sock_new(pool *p)
1115
{
1116
  sock *s = ralloc(p, &sk_class);
1117
  s->pool = p;
1118
  // s->saddr = s->daddr = IPA_NONE;
1119
  s->tos = s->priority = s->ttl = -1;
1120
  s->fd = -1;
1121
  return s;
1122
}
1123

    
1124
static int
1125
sk_setup(sock *s)
1126
{
1127
  int y = 1;
1128
  int fd = s->fd;
1129

    
1130
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1131
    ERR("O_NONBLOCK");
1132

    
1133
  if (!s->af)
1134
    return 0;
1135

    
1136
  if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1137
    s->flags |= SKF_PKTINFO;
1138

    
1139
#ifdef CONFIG_USE_HDRINCL
1140
  if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1141
  {
1142
    s->flags &= ~SKF_PKTINFO;
1143
    s->flags |= SKF_HDRINCL;
1144
    if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1145
      ERR("IP_HDRINCL");
1146
  }
1147
#endif
1148

    
1149
  if (s->iface)
1150
  {
1151
#ifdef SO_BINDTODEVICE
1152
    struct ifreq ifr;
1153
    strcpy(ifr.ifr_name, s->iface->name);
1154
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1155
      ERR("SO_BINDTODEVICE");
1156
#endif
1157

    
1158
#ifdef CONFIG_UNIX_DONTROUTE
1159
    if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1160
      ERR("SO_DONTROUTE");
1161
#endif
1162
  }
1163

    
1164
  if (s->priority >= 0)
1165
    if (sk_set_priority(s, s->priority) < 0)
1166
      return -1;
1167

    
1168
  if (sk_is_ipv4(s))
1169
  {
1170
    if (s->flags & SKF_LADDR_RX)
1171
      if (sk_request_cmsg4_pktinfo(s) < 0)
1172
        return -1;
1173

    
1174
    if (s->flags & SKF_TTL_RX)
1175
      if (sk_request_cmsg4_ttl(s) < 0)
1176
        return -1;
1177

    
1178
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1179
      if (sk_disable_mtu_disc4(s) < 0)
1180
        return -1;
1181

    
1182
    if (s->ttl >= 0)
1183
      if (sk_set_ttl4(s, s->ttl) < 0)
1184
        return -1;
1185

    
1186
    if (s->tos >= 0)
1187
      if (sk_set_tos4(s, s->tos) < 0)
1188
        return -1;
1189
  }
1190

    
1191
  if (sk_is_ipv6(s))
1192
  {
1193
    if (s->flags & SKF_V6ONLY)
1194
      if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1195
        ERR("IPV6_V6ONLY");
1196

    
1197
    if (s->flags & SKF_LADDR_RX)
1198
      if (sk_request_cmsg6_pktinfo(s) < 0)
1199
        return -1;
1200

    
1201
    if (s->flags & SKF_TTL_RX)
1202
      if (sk_request_cmsg6_ttl(s) < 0)
1203
        return -1;
1204

    
1205
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1206
      if (sk_disable_mtu_disc6(s) < 0)
1207
        return -1;
1208

    
1209
    if (s->ttl >= 0)
1210
      if (sk_set_ttl6(s, s->ttl) < 0)
1211
        return -1;
1212

    
1213
    if (s->tos >= 0)
1214
      if (sk_set_tos6(s, s->tos) < 0)
1215
        return -1;
1216
  }
1217

    
1218
  return 0;
1219
}
1220

    
1221
static void
1222
sk_insert(sock *s)
1223
{
1224
  add_tail(&sock_list, &s->n);
1225
  sock_recalc_fdsets_p = 1;
1226
}
1227

    
1228
static void
1229
sk_tcp_connected(sock *s)
1230
{
1231
  sockaddr sa;
1232
  int sa_len = sizeof(sa);
1233

    
1234
  if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1235
      (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1236
    log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1237

    
1238
  s->type = SK_TCP;
1239
  sk_alloc_bufs(s);
1240
  s->tx_hook(s);
1241
}
1242

    
1243
static int
1244
sk_passive_connected(sock *s, int type)
1245
{
1246
  sockaddr loc_sa, rem_sa;
1247
  int loc_sa_len = sizeof(loc_sa);
1248
  int rem_sa_len = sizeof(rem_sa);
1249

    
1250
  int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1251
  if (fd < 0)
1252
  {
1253
    if ((errno != EINTR) && (errno != EAGAIN))
1254
      s->err_hook(s, errno);
1255
    return 0;
1256
  }
1257

    
1258
  sock *t = sk_new(s->pool);
1259
  t->type = type;
1260
  t->fd = fd;
1261
  t->af = s->af;
1262
  t->ttl = s->ttl;
1263
  t->tos = s->tos;
1264
  t->rbsize = s->rbsize;
1265
  t->tbsize = s->tbsize;
1266

    
1267
  if (type == SK_TCP)
1268
  {
1269
    if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1270
        (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1271
      log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1272

    
1273
    if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1274
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1275
  }
1276

    
1277
  if (sk_setup(t) < 0)
1278
  {
1279
    /* FIXME: Call err_hook instead ? */
1280
    log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1281

    
1282
    /* FIXME: handle it better in rfree() */
1283
    close(t->fd);        
1284
    t->fd = -1;
1285
    rfree(t);
1286
    return 1;
1287
  }
1288

    
1289
  sk_insert(t);
1290
  sk_alloc_bufs(t);
1291
  s->rx_hook(t, 0);
1292
  return 1;
1293
}
1294

    
1295
/**
1296
 * sk_open - open a socket
1297
 * @s: socket
1298
 *
1299
 * This function takes a socket resource created by sk_new() and
1300
 * initialized by the user and binds a corresponding network connection
1301
 * to it.
1302
 *
1303
 * Result: 0 for success, -1 for an error.
1304
 */
1305
int
1306
sk_open(sock *s)
1307
{
1308
  int af = BIRD_AF;
1309
  int fd = -1;
1310
  int do_bind = 0;
1311
  int bind_port = 0;
1312
  ip_addr bind_addr = IPA_NONE;
1313
  sockaddr sa;
1314

    
1315
  switch (s->type)
1316
  {
1317
  case SK_TCP_ACTIVE:
1318
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1319
    /* Fall thru */
1320
  case SK_TCP_PASSIVE:
1321
    fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1322
    bind_port = s->sport;
1323
    bind_addr = s->saddr;
1324
    do_bind = bind_port || ipa_nonzero(bind_addr);
1325
    break;
1326
  
1327
  case SK_UDP:
1328
    fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1329
    bind_port = s->sport;
1330
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1331
    do_bind = 1;
1332
    break;
1333

    
1334
  case SK_IP:
1335
    fd = socket(af, SOCK_RAW, s->dport);
1336
    bind_port = 0;
1337
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1338
    do_bind = ipa_nonzero(bind_addr);
1339
    break;
1340

    
1341
  case SK_MAGIC:
1342
    af = 0;
1343
    fd = s->fd;
1344
    break;
1345

    
1346
  default:
1347
    bug("sk_open() called for invalid sock type %d", s->type);
1348
  }
1349

    
1350
  if (fd < 0)
1351
    ERR("socket");
1352

    
1353
  s->af = af;
1354
  s->fd = fd;
1355

    
1356
  if (sk_setup(s) < 0)
1357
    goto err;
1358

    
1359
  if (do_bind)
1360
  {
1361
    if (bind_port)
1362
    {
1363
      int y = 1;
1364

    
1365
      if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1366
        ERR2("SO_REUSEADDR");
1367

    
1368
#ifdef CONFIG_NO_IFACE_BIND
1369
      /* Workaround missing ability to bind to an iface */
1370
      if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1371
      {
1372
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1373
          ERR2("SO_REUSEPORT");
1374
      }
1375
#endif
1376
    }
1377

    
1378
    sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
1379
    if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1380
      ERR2("bind");
1381
  }
1382

    
1383
  if (s->password)
1384
    if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0)
1385
      goto err;
1386

    
1387
  switch (s->type)
1388
  {
1389
  case SK_TCP_ACTIVE:
1390
    sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
1391
    if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1392
      sk_tcp_connected(s);
1393
    else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1394
             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1395
      ERR2("connect");
1396
    break;
1397

    
1398
  case SK_TCP_PASSIVE:
1399
    if (listen(fd, 8) < 0)
1400
      ERR2("listen");
1401
    break;
1402

    
1403
  case SK_MAGIC:
1404
    break;
1405

    
1406
  default:
1407
    sk_alloc_bufs(s);
1408
  }
1409

    
1410
  if (!(s->flags & SKF_THREAD))
1411
    sk_insert(s);
1412
  return 0;
1413

    
1414
err:
1415
  close(fd);
1416
  s->fd = -1;
1417
  return -1;
1418
}
1419

    
1420
int
1421
sk_open_unix(sock *s, char *name)
1422
{
1423
  struct sockaddr_un sa;
1424
  int fd;
1425

    
1426
  /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1427

    
1428
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1429
  if (fd < 0)
1430
    return -1;
1431

    
1432
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1433
    return -1;
1434

    
1435
  /* Path length checked in test_old_bird() */
1436
  sa.sun_family = AF_UNIX;
1437
  strcpy(sa.sun_path, name);
1438

    
1439
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1440
    return -1;
1441

    
1442
  if (listen(fd, 8) < 0)
1443
    return -1;
1444

    
1445
  s->fd = fd;
1446
  sk_insert(s);
1447
  return 0;
1448
}
1449

    
1450

    
1451
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1452
                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1453
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1454

    
1455
static void
1456
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1457
{
1458
  if (sk_is_ipv4(s))
1459
    sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1460
  else
1461
    sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1462
}
1463

    
1464
static void
1465
sk_process_cmsgs(sock *s, struct msghdr *msg)
1466
{
1467
  struct cmsghdr *cm;
1468

    
1469
  s->laddr = IPA_NONE;
1470
  s->lifindex = 0;
1471
  s->rcv_ttl = -1;
1472

    
1473
  for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1474
  {
1475
    if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1476
    {
1477
      sk_process_cmsg4_pktinfo(s, cm);
1478
      sk_process_cmsg4_ttl(s, cm);
1479
    }
1480

    
1481
    if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1482
    {
1483
      sk_process_cmsg6_pktinfo(s, cm);
1484
      sk_process_cmsg6_ttl(s, cm);
1485
    }
1486
  }
1487
}
1488

    
1489

    
1490
static inline int
1491
sk_sendmsg(sock *s)
1492
{
1493
  struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1494
  byte cmsg_buf[CMSG_TX_SPACE];
1495
  sockaddr dst;
1496

    
1497
  sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
1498

    
1499
  struct msghdr msg = {
1500
    .msg_name = &dst.sa,
1501
    .msg_namelen = SA_LEN(dst),
1502
    .msg_iov = &iov,
1503
    .msg_iovlen = 1
1504
  };
1505

    
1506
#ifdef CONFIG_USE_HDRINCL
1507
  byte hdr[20];
1508
  struct iovec iov2[2] = { {hdr, 20}, iov };
1509

    
1510
  if (s->flags & SKF_HDRINCL)
1511
  {
1512
    sk_prepare_ip_header(s, hdr, iov.iov_len);
1513
    msg.msg_iov = iov2;
1514
    msg.msg_iovlen = 2;
1515
  }
1516
#endif
1517

    
1518
  if (s->flags & SKF_PKTINFO)
1519
    sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1520

    
1521
  return sendmsg(s->fd, &msg, 0);
1522
}
1523

    
1524
static inline int
1525
sk_recvmsg(sock *s)
1526
{
1527
  struct iovec iov = {s->rbuf, s->rbsize};
1528
  byte cmsg_buf[CMSG_RX_SPACE];
1529
  sockaddr src;
1530

    
1531
  struct msghdr msg = {
1532
    .msg_name = &src.sa,
1533
    .msg_namelen = sizeof(src), // XXXX ??
1534
    .msg_iov = &iov,
1535
    .msg_iovlen = 1,
1536
    .msg_control = cmsg_buf,
1537
    .msg_controllen = sizeof(cmsg_buf),
1538
    .msg_flags = 0
1539
  };
1540

    
1541
  int rv = recvmsg(s->fd, &msg, 0);
1542
  if (rv < 0)
1543
    return rv;
1544

    
1545
  //ifdef IPV4
1546
  //  if (cf_type == SK_IP)
1547
  //    rv = ipv4_skip_header(pbuf, rv);
1548
  //endif
1549

    
1550
  sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1551
  sk_process_cmsgs(s, &msg);
1552

    
1553
  if (msg.msg_flags & MSG_TRUNC)
1554
    s->flags |= SKF_TRUNCATED;
1555
  else
1556
    s->flags &= ~SKF_TRUNCATED;
1557

    
1558
  return rv;
1559
}
1560

    
1561

    
1562
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1563

    
1564
static int
1565
sk_maybe_write(sock *s)
1566
{
1567
  int e;
1568

    
1569
  switch (s->type)
1570
  {
1571
  case SK_TCP:
1572
  case SK_MAGIC:
1573
  case SK_UNIX:
1574
    while (s->ttx != s->tpos)
1575
    {
1576
      e = write(s->fd, s->ttx, s->tpos - s->ttx);
1577

    
1578
      if (e < 0)
1579
      {
1580
        if (errno != EINTR && errno != EAGAIN)
1581
        {
1582
          reset_tx_buffer(s);
1583
          /* EPIPE is just a connection close notification during TX */
1584
          s->err_hook(s, (errno != EPIPE) ? errno : 0);
1585
          return -1;
1586
        }
1587
        return 0;
1588
      }
1589
      s->ttx += e;
1590
    }
1591
    reset_tx_buffer(s);
1592
    return 1;
1593

    
1594
  case SK_UDP:
1595
  case SK_IP:
1596
    {
1597
      if (s->tbuf == s->tpos)
1598
        return 1;
1599

    
1600
      e = sk_sendmsg(s);
1601

    
1602
      if (e < 0)
1603
      {
1604
        if (errno != EINTR && errno != EAGAIN)
1605
        {
1606
          reset_tx_buffer(s);
1607
          s->err_hook(s, errno);
1608
          return -1;
1609
        }
1610

    
1611
        if (!s->tx_hook)
1612
          reset_tx_buffer(s);
1613
        return 0;
1614
      }
1615
      reset_tx_buffer(s);
1616
      return 1;
1617
    }
1618
  default:
1619
    bug("sk_maybe_write: unknown socket type %d", s->type);
1620
  }
1621
}
1622

    
1623
int
1624
sk_rx_ready(sock *s)
1625
{
1626
  fd_set rd, wr;
1627
  struct timeval timo;
1628
  int rv;
1629

    
1630
  FD_ZERO(&rd);
1631
  FD_ZERO(&wr);
1632
  FD_SET(s->fd, &rd);
1633

    
1634
  timo.tv_sec = 0;
1635
  timo.tv_usec = 0;
1636

    
1637
 redo:
1638
  rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1639
  
1640
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1641
    goto redo;
1642

    
1643
  return rv;
1644
}
1645

    
1646
/**
1647
 * sk_send - send data to a socket
1648
 * @s: socket
1649
 * @len: number of bytes to send
1650
 *
1651
 * This function sends @len bytes of data prepared in the
1652
 * transmit buffer of the socket @s to the network connection.
1653
 * If the packet can be sent immediately, it does so and returns
1654
 * 1, else it queues the packet for later processing, returns 0
1655
 * and calls the @tx_hook of the socket when the tranmission
1656
 * takes place.
1657
 */
1658
int
1659
sk_send(sock *s, unsigned len)
1660
{
1661
  s->ttx = s->tbuf;
1662
  s->tpos = s->tbuf + len;
1663
  return sk_maybe_write(s);
1664
}
1665

    
1666
/**
1667
 * sk_send_to - send data to a specific destination
1668
 * @s: socket
1669
 * @len: number of bytes to send
1670
 * @addr: IP address to send the packet to
1671
 * @port: port to send the packet to
1672
 *
1673
 * This is a sk_send() replacement for connection-less packet sockets
1674
 * which allows destination of the packet to be chosen dynamically.
1675
 * Raw IP sockets should use 0 for @port.
1676
 */
1677
int
1678
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1679
{
1680
  s->daddr = addr;
1681
  if (port)
1682
    s->dport = port;
1683

    
1684
  s->ttx = s->tbuf;
1685
  s->tpos = s->tbuf + len;
1686
  return sk_maybe_write(s);
1687
}
1688

    
1689
/*
1690
int
1691
sk_send_full(sock *s, unsigned len, struct iface *ifa,
1692
             ip_addr saddr, ip_addr daddr, unsigned dport)
1693
{
1694
  s->iface = ifa;
1695
  s->saddr = saddr;
1696
  s->daddr = daddr;
1697
  s->dport = dport;
1698
  s->ttx = s->tbuf;
1699
  s->tpos = s->tbuf + len;
1700
  return sk_maybe_write(s);
1701
}
1702
*/
1703

    
1704
 /* sk_read() and sk_write() are called from BFD's event loop */
1705

    
1706
int
1707
sk_read(sock *s)
1708
{
1709
  switch (s->type)
1710
  {
1711
  case SK_TCP_PASSIVE:
1712
    return sk_passive_connected(s, SK_TCP);
1713

    
1714
  case SK_UNIX_PASSIVE:
1715
    return sk_passive_connected(s, SK_UNIX);
1716

    
1717
  case SK_TCP:
1718
  case SK_UNIX:
1719
    {
1720
      int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1721

    
1722
      if (c < 0)
1723
      {
1724
        if (errno != EINTR && errno != EAGAIN)
1725
          s->err_hook(s, errno);
1726
      }
1727
      else if (!c)
1728
        s->err_hook(s, 0);
1729
      else
1730
      {
1731
        s->rpos += c;
1732
        if (s->rx_hook(s, s->rpos - s->rbuf))
1733
        {
1734
          /* We need to be careful since the socket could have been deleted by the hook */
1735
          if (current_sock == s)
1736
            s->rpos = s->rbuf;
1737
        }
1738
        return 1;
1739
      }
1740
      return 0;
1741
    }
1742

    
1743
  case SK_MAGIC:
1744
    return s->rx_hook(s, 0);
1745

    
1746
  default:
1747
    {
1748
      int e = sk_recvmsg(s);
1749

    
1750
      if (e < 0)
1751
      {
1752
        if (errno != EINTR && errno != EAGAIN)
1753
          s->err_hook(s, errno);
1754
        return 0;
1755
      }
1756

    
1757
      s->rpos = s->rbuf + e;
1758
      s->rx_hook(s, e);
1759
      return 1;
1760
    }
1761
  }
1762
}
1763

    
1764
int
1765
sk_write(sock *s)
1766
{
1767
  switch (s->type)
1768
  {
1769
  case SK_TCP_ACTIVE:
1770
    {
1771
      sockaddr sa;
1772
      sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1773

    
1774
      if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1775
        sk_tcp_connected(s);
1776
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1777
        s->err_hook(s, errno);
1778
      return 0;
1779
    }
1780

    
1781
  default:
1782
    if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1783
    {
1784
      if (s->tx_hook)
1785
        s->tx_hook(s);
1786
      return 1;
1787
    }
1788
    return 0;
1789
  }
1790
}
1791

    
1792
void
1793
sk_dump_all(void)
1794
{
1795
  node *n;
1796
  sock *s;
1797

    
1798
  debug("Open sockets:\n");
1799
  WALK_LIST(n, sock_list)
1800
  {
1801
    s = SKIP_BACK(sock, n, n);
1802
    debug("%p ", s);
1803
    sk_dump(&s->r);
1804
  }
1805
  debug("\n");
1806
}
1807

    
1808

    
1809
/*
1810
 *        Main I/O Loop
1811
 */
1812

    
1813
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
1814
volatile int async_dump_flag;
1815

    
1816
void
1817
io_init(void)
1818
{
1819
  init_list(&near_timers);
1820
  init_list(&far_timers);
1821
  init_list(&sock_list);
1822
  init_list(&global_event_list);
1823
  krt_io_init();
1824
  init_times();
1825
  update_times();
1826
  boot_time = now;
1827
  srandom((int) now_real);
1828
}
1829

    
1830
static int short_loops = 0;
1831
#define SHORT_LOOP_MAX 10
1832

    
1833
void
1834
io_loop(void)
1835
{
1836
  fd_set rd, wr;
1837
  struct timeval timo;
1838
  time_t tout;
1839
  int hi, events;
1840
  sock *s;
1841
  node *n;
1842

    
1843
  sock_recalc_fdsets_p = 1;
1844
  for(;;)
1845
    {
1846
      events = ev_run_list(&global_event_list);
1847
      update_times();
1848
      tout = tm_first_shot();
1849
      if (tout <= now)
1850
        {
1851
          tm_shot();
1852
          continue;
1853
        }
1854
      timo.tv_sec = events ? 0 : MIN(tout - now, 3);
1855
      timo.tv_usec = 0;
1856

    
1857
      if (sock_recalc_fdsets_p)
1858
        {
1859
          sock_recalc_fdsets_p = 0;
1860
          FD_ZERO(&rd);
1861
          FD_ZERO(&wr);
1862
        }
1863

    
1864
      hi = 0;
1865
      WALK_LIST(n, sock_list)
1866
        {
1867
          s = SKIP_BACK(sock, n, n);
1868
          if (s->rx_hook)
1869
            {
1870
              FD_SET(s->fd, &rd);
1871
              if (s->fd > hi)
1872
                hi = s->fd;
1873
            }
1874
          else
1875
            FD_CLR(s->fd, &rd);
1876
          if (s->tx_hook && s->ttx != s->tpos)
1877
            {
1878
              FD_SET(s->fd, &wr);
1879
              if (s->fd > hi)
1880
                hi = s->fd;
1881
            }
1882
          else
1883
            FD_CLR(s->fd, &wr);
1884
        }
1885

    
1886
      /*
1887
       * Yes, this is racy. But even if the signal comes before this test
1888
       * and entering select(), it gets caught on the next timer tick.
1889
       */
1890

    
1891
      if (async_config_flag)
1892
        {
1893
          async_config();
1894
          async_config_flag = 0;
1895
          continue;
1896
        }
1897
      if (async_dump_flag)
1898
        {
1899
          async_dump();
1900
          async_dump_flag = 0;
1901
          continue;
1902
        }
1903
      if (async_shutdown_flag)
1904
        {
1905
          async_shutdown();
1906
          async_shutdown_flag = 0;
1907
          continue;
1908
        }
1909

    
1910
      /* And finally enter select() to find active sockets */
1911
      hi = select(hi+1, &rd, &wr, NULL, &timo);
1912

    
1913
      if (hi < 0)
1914
        {
1915
          if (errno == EINTR || errno == EAGAIN)
1916
            continue;
1917
          die("select: %m");
1918
        }
1919
      if (hi)
1920
        {
1921
          /* guaranteed to be non-empty */
1922
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1923

    
1924
          while (current_sock)
1925
            {
1926
              sock *s = current_sock;
1927
              int e;
1928
              int steps;
1929

    
1930
              steps = MAX_STEPS;
1931
              if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1932
                do
1933
                  {
1934
                    steps--;
1935
                    e = sk_read(s);
1936
                    if (s != current_sock)
1937
                      goto next;
1938
                  }
1939
                while (e && s->rx_hook && steps);
1940

    
1941
              steps = MAX_STEPS;
1942
              if (FD_ISSET(s->fd, &wr))
1943
                do
1944
                  {
1945
                    steps--;
1946
                    e = sk_write(s);
1947
                    if (s != current_sock)
1948
                      goto next;
1949
                  }
1950
                while (e && steps);
1951
              current_sock = sk_next(s);
1952
            next: ;
1953
            }
1954

    
1955
          short_loops++;
1956
          if (events && (short_loops < SHORT_LOOP_MAX))
1957
            continue;
1958
          short_loops = 0;
1959

    
1960
          int count = 0;
1961
          current_sock = stored_sock;
1962
          if (current_sock == NULL)
1963
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1964

    
1965
          while (current_sock && count < MAX_RX_STEPS)
1966
            {
1967
              sock *s = current_sock;
1968
              int e;
1969

    
1970
              if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1971
                {
1972
                  count++;
1973
                  e = sk_read(s);
1974
                  if (s != current_sock)
1975
                      goto next2;
1976
                }
1977
              current_sock = sk_next(s);
1978
            next2: ;
1979
            }
1980

    
1981
          stored_sock = current_sock;
1982
        }
1983
    }
1984
}
1985

    
1986
void
1987
test_old_bird(char *path)
1988
{
1989
  int fd;
1990
  struct sockaddr_un sa;
1991

    
1992
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1993
  if (fd < 0)
1994
    die("Cannot create socket: %m");
1995
  if (strlen(path) >= sizeof(sa.sun_path))
1996
    die("Socket path too long");
1997
  bzero(&sa, sizeof(sa));
1998
  sa.sun_family = AF_UNIX;
1999
  strcpy(sa.sun_path, path);
2000
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2001
    die("I found another BIRD running.");
2002
  close(fd);
2003
}
2004

    
2005