Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ b1a1faba

History | View | Annotate | Download (23.6 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <time.h>
13
#include <sys/time.h>
14
#include <sys/types.h>
15
#include <sys/socket.h>
16
#include <sys/fcntl.h>
17
#include <sys/un.h>
18
#include <unistd.h>
19
#include <errno.h>
20

    
21
#include "nest/bird.h"
22
#include "lib/lists.h"
23
#include "lib/resource.h"
24
#include "lib/timer.h"
25
#include "lib/socket.h"
26
#include "lib/event.h"
27
#include "lib/string.h"
28
#include "nest/iface.h"
29

    
30
#include "lib/unix.h"
31
#include "lib/sysio.h"
32

    
33
#define LOCAL_DEBUG
34
/*
35
 *        Tracked Files
36
 */
37

    
38
struct rfile {
39
  resource r;
40
  FILE *f;
41
};
42

    
43
static void
44
rf_free(resource *r)
45
{
46
  struct rfile *a = (struct rfile *) r;
47

    
48
  fclose(a->f);
49
}
50

    
51
static void
52
rf_dump(resource *r)
53
{
54
  struct rfile *a = (struct rfile *) r;
55

    
56
  debug("(FILE *%p)\n", a->f);
57
}
58

    
59
static struct resclass rf_class = {
60
  "FILE",
61
  sizeof(struct rfile),
62
  rf_free,
63
  rf_dump
64
};
65

    
66
void *
67
tracked_fopen(pool *p, char *name, char *mode)
68
{
69
  FILE *f = fopen(name, mode);
70

    
71
  if (f)
72
    {
73
      struct rfile *r = ralloc(p, &rf_class);
74
      r->f = f;
75
    }
76
  return f;
77
}
78

    
79
/**
80
 * DOC: Timers
81
 *
82
 * Timers are resources which represent a wish of a module to call
83
 * a function at the specified time. The platform dependent code
84
 * doesn't guarantee exact timing, only that a timer function
85
 * won't be called before the requested time.
86
 *
87
 * In BIRD, real time is represented by values of the &bird_clock_t type
88
 * which are integral numbers interpreted as a number of seconds since
89
 * a fixed (but platform dependent) epoch. The current time can be read
90
 * from a variable @now with reasonable accuracy.
91
 *
92
 * Each timer is described by a &timer structure containing a pointer
93
 * to the handler function (@hook), data private to this function (@data),
94
 * time the function should be called at (@expires, 0 for inactive timers),
95
 * for the other fields see |timer.h|.
96
 */
97

    
98
#define NEAR_TIMER_LIMIT 4
99

    
100
static list near_timers, far_timers;
101
static bird_clock_t first_far_timer = TIME_INFINITY;
102

    
103
bird_clock_t now;
104

    
105
static void
106
tm_free(resource *r)
107
{
108
  timer *t = (timer *) r;
109

    
110
  tm_stop(t);
111
}
112

    
113
static void
114
tm_dump(resource *r)
115
{
116
  timer *t = (timer *) r;
117

    
118
  debug("(code %p, data %p, ", t->hook, t->data);
119
  if (t->randomize)
120
    debug("rand %d, ", t->randomize);
121
  if (t->recurrent)
122
    debug("recur %d, ", t->recurrent);
123
  if (t->expires)
124
    debug("expires in %d sec)\n", t->expires - now);
125
  else
126
    debug("inactive)\n");
127
}
128

    
129
static struct resclass tm_class = {
130
  "Timer",
131
  sizeof(timer),
132
  tm_free,
133
  tm_dump
134
};
135

    
136
/**
137
 * tm_new - create a timer
138
 * @p: pool
139
 *
140
 * This function creates a new timer resource and returns
141
 * a pointer to it. To use the timer, you need to fill in
142
 * the structure fields and call tm_start() to start timing.
143
 */
144
timer *
145
tm_new(pool *p)
146
{
147
  timer *t = ralloc(p, &tm_class);
148
  t->hook = NULL;
149
  t->data = NULL;
150
  t->randomize = 0;
151
  t->expires = 0;
152
  return t;
153
}
154

    
155
static inline void
156
tm_insert_near(timer *t)
157
{
158
  node *n = HEAD(near_timers);
159

    
160
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
161
    n = n->next;
162
  insert_node(&t->n, n->prev);
163
}
164

    
165
/**
166
 * tm_start - start a timer
167
 * @t: timer
168
 * @after: number of seconds the timer should be run after
169
 *
170
 * This function schedules the hook function of the timer to
171
 * be called after @after seconds. If the timer has been already
172
 * started, it's @expire time is replaced by the new value.
173
 *
174
 * You can have set the @randomize field of @t, the timeout
175
 * will be increased by a random number of seconds chosen
176
 * uniformly from range 0 .. @randomize.
177
 *
178
 * You can call tm_start() from the handler function of the timer
179
 * to request another run of the timer. Also, you can set the @recurrent
180
 * field to have the timer re-added automatically with the same timeout.
181
 */
182
void
183
tm_start(timer *t, unsigned after)
184
{
185
  bird_clock_t when;
186

    
187
  if (t->randomize)
188
    after += random() % (t->randomize + 1);
189
  when = now + after;
190
  if (t->expires == when)
191
    return;
192
  if (t->expires)
193
    rem_node(&t->n);
194
  t->expires = when;
195
  if (after <= NEAR_TIMER_LIMIT)
196
    tm_insert_near(t);
197
  else
198
    {
199
      if (!first_far_timer || first_far_timer > when)
200
        first_far_timer = when;
201
      add_tail(&far_timers, &t->n);
202
    }
203
}
204

    
205
/**
206
 * tm_stop - stop a timer
207
 * @t: timer
208
 *
209
 * This function stops a timer. If the timer is already stopped,
210
 * nothing happens.
211
 */
212
void
213
tm_stop(timer *t)
214
{
215
  if (t->expires)
216
    {
217
      rem_node(&t->n);
218
      t->expires = 0;
219
    }
220
}
221

    
222
static void
223
tm_dump_them(char *name, list *l)
224
{
225
  node *n;
226
  timer *t;
227

    
228
  debug("%s timers:\n", name);
229
  WALK_LIST(n, *l)
230
    {
231
      t = SKIP_BACK(timer, n, n);
232
      debug("%p ", t);
233
      tm_dump(&t->r);
234
    }
235
  debug("\n");
236
}
237

    
238
void
239
tm_dump_all(void)
240
{
241
  tm_dump_them("Near", &near_timers);
242
  tm_dump_them("Far", &far_timers);
243
}
244

    
245
static inline time_t
246
tm_first_shot(void)
247
{
248
  time_t x = first_far_timer;
249

    
250
  if (!EMPTY_LIST(near_timers))
251
    {
252
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
253
      if (t->expires < x)
254
        x = t->expires;
255
    }
256
  return x;
257
}
258

    
259
static void
260
tm_shot(void)
261
{
262
  timer *t;
263
  node *n, *m;
264

    
265
  if (first_far_timer <= now)
266
    {
267
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
268
      first_far_timer = TIME_INFINITY;
269
      n = HEAD(far_timers);
270
      while (m = n->next)
271
        {
272
          t = SKIP_BACK(timer, n, n);
273
          if (t->expires <= limit)
274
            {
275
              rem_node(n);
276
              tm_insert_near(t);
277
            }
278
          else if (t->expires < first_far_timer)
279
            first_far_timer = t->expires;
280
          n = m;
281
        }
282
    }
283
  while ((n = HEAD(near_timers)) -> next)
284
    {
285
      int delay;
286
      t = SKIP_BACK(timer, n, n);
287
      if (t->expires > now)
288
        break;
289
      rem_node(n);
290
      delay = t->expires - now;
291
      t->expires = 0;
292
      if (t->recurrent)
293
        {
294
          int i = t->recurrent - delay;
295
          if (i < 0)
296
            i = 0;
297
          tm_start(t, i);
298
        }
299
      t->hook(t);
300
    }
301
}
302

    
303
/**
304
 * tm_parse_date - parse a date
305
 * @x: date string
306
 *
307
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
308
 * and converts it to the corresponding value of type &bird_clock_t.
309
 */
310
bird_clock_t
311
tm_parse_date(char *x)
312
{
313
  struct tm tm;
314
  int n;
315
  time_t t;
316

    
317
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
318
    return 0;
319
  tm.tm_mon--;
320
  tm.tm_year -= 1900;
321
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
322
  t = mktime(&tm);
323
  if (t == (time_t) -1)
324
    return 0;
325
  return t;
326
}
327

    
328
/**
329
 * tm_format_date - convert date to textual representation
330
 * @x: destination buffer of size %TM_DATE_BUFFER_SIZE
331
 * @t: time
332
 *
333
 * This function formats the given time value @t to a textual
334
 * date representation (dd-mm-yyyy).
335
 */
336
void
337
tm_format_date(char *x, bird_clock_t t)
338
{
339
  struct tm *tm;
340

    
341
  tm = localtime(&t);
342
  bsprintf(x, "%02d-%02d-%04d", tm->tm_mday, tm->tm_mon+1, tm->tm_year+1900);
343
}
344

    
345
/**
346
 * tm_format_datetime - convert date and time to textual representation
347
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
348
 * @t: time
349
 *
350
 * This function formats the given time value @t to a textual
351
 * date/time representation (dd-mm-yyyy hh:mm:ss).
352
 */
353
void
354
tm_format_datetime(char *x, bird_clock_t t)
355
{
356
  struct tm *tm;
357

    
358
  tm = localtime(&t);
359
  if (strftime(x, TM_DATETIME_BUFFER_SIZE, "%d-%m-%Y %H:%M:%S", tm) == TM_DATETIME_BUFFER_SIZE)
360
    strcpy(x, "<too-long>");
361
}
362

    
363
/**
364
 * tm_format_reltime - convert date and time to relative textual representation
365
 * @x: destination buffer of size %TM_RELTIME_BUFFER_SIZE
366
 * @t: time
367
 *
368
 * This function formats the given time value @t to a short
369
 * textual representation relative to the current time.
370
 */
371
void
372
tm_format_reltime(char *x, bird_clock_t t)
373
{
374
  struct tm *tm;
375
  bird_clock_t delta = (t < now) ? (now - t) : (t - now);
376
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
377

    
378
  tm = localtime(&t);
379
  if (delta < 20*3600)
380
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
381
  else if (delta < 360*86400)
382
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
383
  else
384
    bsprintf(x, "%d", tm->tm_year+1900);
385
}
386

    
387
/**
388
 * DOC: Sockets
389
 *
390
 * Socket resources represent network connections. Their data structure (&socket)
391
 * contains a lot of fields defining the exact type of the socket, the local and
392
 * remote addresses and ports, pointers to socket buffers and finally pointers to
393
 * hook functions to be called when new data have arrived to the receive buffer
394
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
395
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
396
 *
397
 * You should not use rfree() from inside a socket hook, please use sk_close() instead.
398
 */
399

    
400
#ifndef SOL_IP
401
#define SOL_IP IPPROTO_IP
402
#endif
403

    
404
#ifndef SOL_IPV6
405
#define SOL_IPV6 IPPROTO_IPV6
406
#endif
407

    
408
#ifndef IPV6_ADD_MEMBERSHIP
409
#define IPV6_ADD_MEMBERSHIP IP_ADD_MEMBERSHIP
410
#endif
411

    
412
static list sock_list;
413

    
414
static void
415
sk_free(resource *r)
416
{
417
  sock *s = (sock *) r;
418

    
419
  if (s->fd >= 0)
420
    {
421
      close(s->fd);
422
      rem_node(&s->n);
423
    }
424
}
425

    
426
static void
427
sk_dump(resource *r)
428
{
429
  sock *s = (sock *) r;
430
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
431

    
432
  debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
433
        sk_type_names[s->type],
434
        s->data,
435
        s->saddr,
436
        s->sport,
437
        s->daddr,
438
        s->dport,
439
        s->tos,
440
        s->ttl,
441
        s->iface ? s->iface->name : "none");
442
}
443

    
444
static struct resclass sk_class = {
445
  "Socket",
446
  sizeof(sock),
447
  sk_free,
448
  sk_dump
449
};
450

    
451
/**
452
 * sk_new - create a socket
453
 * @p: pool
454
 *
455
 * This function creates a new socket resource. If you want to use it,
456
 * you need to fill in all the required fields of the structure and
457
 * call sk_open() to do the actual opening of the socket.
458
 */
459
sock *
460
sk_new(pool *p)
461
{
462
  sock *s = ralloc(p, &sk_class);
463
  s->pool = p;
464
  s->data = NULL;
465
  s->saddr = s->daddr = IPA_NONE;
466
  s->sport = s->dport = 0;
467
  s->tos = s->ttl = -1;
468
  s->iface = NULL;
469
  s->rbuf = NULL;
470
  s->rx_hook = NULL;
471
  s->rbsize = 0;
472
  s->tbuf = NULL;
473
  s->tx_hook = NULL;
474
  s->tbsize = 0;
475
  s->err_hook = NULL;
476
  s->fd = -1;
477
  s->entered = 0;
478
  return s;
479
}
480

    
481
#define ERR(x) do { err = x; goto bad; } while(0)
482
#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
483

    
484
#ifdef IPV6
485

    
486
void
487
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
488
{
489
  memset (sa, 0, sizeof (struct sockaddr_in6));
490
  sa->sin6_family = AF_INET6;
491
  sa->sin6_port = htons(port);
492
  sa->sin6_flowinfo = 0;
493
#ifdef HAVE_SIN_LEN
494
  sa->sin6_len = sizeof(struct sockaddr_in6);
495
#endif
496
  set_inaddr(&sa->sin6_addr, a);
497
}
498

    
499
void
500
get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check)
501
{
502
  if (check && sa->sin6_family != AF_INET6)
503
    bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
504
  if (port)
505
    *port = ntohs(sa->sin6_port);
506
  memcpy(a, &sa->sin6_addr, sizeof(*a));
507
  ipa_ntoh(*a);
508
}
509

    
510
#else
511

    
512
void
513
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
514
{
515
  memset (sa, 0, sizeof (struct sockaddr_in));
516
  sa->sin_family = AF_INET;
517
  sa->sin_port = htons(port);
518
#ifdef HAVE_SIN_LEN
519
  sa->sin_len = sizeof(struct sockaddr_in);
520
#endif
521
  set_inaddr(&sa->sin_addr, a);
522
}
523

    
524
void
525
get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check)
526
{
527
  if (check && sa->sin_family != AF_INET)
528
    bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
529
  if (port)
530
    *port = ntohs(sa->sin_port);
531
  memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
532
  ipa_ntoh(*a);
533
}
534

    
535
#endif
536

    
537
static char *
538
sk_setup(sock *s)
539
{
540
  int fd = s->fd;
541
  int one = 1;
542
  char *err;
543

    
544
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
545
    ERR("fcntl(O_NONBLOCK)");
546
  if (s->type == SK_UNIX)
547
    return NULL;
548
#ifdef IPV6
549
  if (s->ttl >= 0 && s->type != SK_UDP_MC && s->type != SK_IP_MC &&
550
      setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
551
    ERR("IPV6_UNICAST_HOPS");
552
#else
553
  if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
554
    WARN("IP_TOS");
555
  if (s->ttl >= 0 && setsockopt(fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
556
    ERR("IP_TTL");
557
  //if (s->ttl == 1 && setsockopt(fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
558
  //  ERR("SO_DONTROUTE");
559
#endif
560
  err = NULL;
561
bad:
562
  return err;
563
}
564

    
565
static void
566
sk_alloc_bufs(sock *s)
567
{
568
  if (!s->rbuf && s->rbsize)
569
    s->rbuf = mb_alloc(s->pool, s->rbsize);
570
  s->rpos = s->rbuf;
571
  if (!s->tbuf && s->tbsize)
572
    s->tbuf = mb_alloc(s->pool, s->tbsize);
573
  s->tpos = s->ttx = s->tbuf;
574
}
575

    
576
static void
577
sk_tcp_connected(sock *s)
578
{
579
  s->type = SK_TCP;
580
  sk_alloc_bufs(s);
581
  s->tx_hook(s);
582
}
583

    
584
static int
585
sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
586
{
587
  int fd = accept(s->fd, sa, &al);
588
  if (fd >= 0)
589
    {
590
      sock *t = sk_new(s->pool);
591
      char *err;
592
      t->type = type;
593
      t->fd = fd;
594
      t->ttl = s->ttl;
595
      t->tos = s->tos;
596
      t->rbsize = s->rbsize;
597
      t->tbsize = s->tbsize;
598
      if (type == SK_TCP)
599
        get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1);
600
      add_tail(&sock_list, &t->n);
601
      if (err = sk_setup(t))
602
        {
603
          log(L_ERR "Incoming connection: %s: %m", err);
604
          rfree(t);
605
          return 1;
606
        }
607
      sk_alloc_bufs(t);
608
      s->rx_hook(t, 0);
609
      return 1;
610
    }
611
  else if (errno != EINTR && errno != EAGAIN)
612
    {
613
      log(L_ERR "accept: %m");
614
      s->err_hook(s, errno);
615
    }
616
  return 0;
617
}
618

    
619
/**
620
 * sk_open - open a socket
621
 * @s: socket
622
 *
623
 * This function takes a socket resource created by sk_new() and
624
 * initialized by the user and binds a corresponding network connection
625
 * to it.
626
 *
627
 * Result: 0 for success, -1 for an error.
628
 */
629
int
630
sk_open(sock *s)
631
{
632
  int fd;
633
  sockaddr sa;
634
  int one = 1;
635
  int type = s->type;
636
  int has_src = ipa_nonzero(s->saddr) || s->sport;
637
  char *err;
638

    
639
  switch (type)
640
    {
641
    case SK_TCP_ACTIVE:
642
      s->ttx = "";                        /* Force s->ttx != s->tpos */
643
      /* Fall thru */
644
    case SK_TCP_PASSIVE:
645
      fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
646
      break;
647
    case SK_UDP:
648
    case SK_UDP_MC:
649
      fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
650
      break;
651
    case SK_IP:
652
    case SK_IP_MC:
653
      fd = socket(BIRD_PF, SOCK_RAW, s->dport);
654
      break;
655
    case SK_MAGIC:
656
      fd = s->fd;
657
      break;
658
    default:
659
      bug("sk_open() called for invalid sock type %d", type);
660
    }
661
  if (fd < 0)
662
    die("sk_open: socket: %m");
663
  s->fd = fd;
664

    
665
  if (err = sk_setup(s))
666
  {
667
    goto bad;
668
  }
669
  switch (type)
670
    {
671
    case SK_UDP:
672
    case SK_IP:
673
      if (s->iface)                        /* It's a broadcast socket */
674
#ifdef IPV6
675
        bug("IPv6 has no broadcasts");
676
#else
677
        if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0)
678
          ERR("SO_BROADCAST");
679
#endif
680
      break;
681
    case SK_UDP_MC:
682
    case SK_IP_MC:
683
      {
684
#ifdef IPV6
685
        /* Fortunately, IPv6 socket interface is recent enough and therefore standardized */
686
        ASSERT(s->iface && s->iface->addr);
687
        if (ipa_nonzero(s->daddr))
688
          {
689
            int t = s->iface->index;
690
            int zero = 0;
691
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
692
              ERR("IPV6_MULTICAST_HOPS");
693
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
694
              ERR("IPV6_MULTICAST_LOOP");
695
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_IF, &t, sizeof(t)) < 0)
696
              ERR("IPV6_MULTICAST_IF");
697
          }
698
        if (has_src)
699
          {
700
            struct ipv6_mreq mreq;
701
            set_inaddr(&mreq.ipv6mr_multiaddr, s->daddr);
702
#ifdef CONFIG_IPV6_GLIBC_20
703
            mreq.ipv6mr_ifindex = s->iface->index;
704
#else
705
            mreq.ipv6mr_interface = s->iface->index;
706
#endif /* CONFIG_IPV6_GLIBC_20 */
707
            if (setsockopt(fd, SOL_IPV6, IPV6_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
708
              ERR("IPV6_ADD_MEMBERSHIP");
709
          }
710
#else
711
        /* With IPv4 there are zillions of different socket interface variants. Ugh. */
712
        ASSERT(s->iface && s->iface->addr);
713
        if (err = sysio_mcast_join(s))
714
          goto bad;
715
#endif /* IPV6 */
716
      break;
717
      }
718
    }
719
  if (has_src)
720
    {
721
      int port;
722

    
723
      if (type == SK_IP || type == SK_IP_MC)
724
        port = 0;
725
      else
726
        {
727
          port = s->sport;
728
          if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
729
            ERR("SO_REUSEADDR");
730
        }
731
      fill_in_sockaddr(&sa, s->saddr, port);
732
      if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
733
        ERR("bind");
734
    }
735
  fill_in_sockaddr(&sa, s->daddr, s->dport);
736
  switch (type)
737
    {
738
    case SK_TCP_ACTIVE:
739
      if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
740
        sk_tcp_connected(s);
741
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
742
               errno != ECONNREFUSED && errno != EHOSTUNREACH)
743
        ERR("connect");
744
      break;
745
    case SK_TCP_PASSIVE:
746
      if (listen(fd, 8))
747
        ERR("listen");
748
      break;
749
    case SK_MAGIC:
750
      break;
751
    default:
752
      sk_alloc_bufs(s);
753
#ifdef IPV6
754
#ifdef IPV6_MTU_DISCOVER
755
      {
756
        int dont = IPV6_PMTUDISC_DONT;
757
        if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
758
          ERR("IPV6_MTU_DISCOVER");
759
      }
760
#endif
761
#else
762
#ifdef IP_PMTUDISC
763
      {
764
        int dont = IP_PMTUDISC_DONT;
765
        if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
766
          ERR("IP_PMTUDISC");
767
      }
768
#endif
769
#endif
770
    }
771

    
772
  add_tail(&sock_list, &s->n);
773
  return 0;
774

    
775
bad:
776
  log(L_ERR "sk_open: %s: %m", err);
777
  close(fd);
778
  s->fd = -1;
779
  return -1;
780
}
781

    
782
int
783
sk_open_unix(sock *s, char *name)
784
{
785
  int fd;
786
  struct sockaddr_un sa;
787
  char *err;
788

    
789
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
790
  if (fd < 0)
791
    die("sk_open_unix: socket: %m");
792
  s->fd = fd;
793
  if (err = sk_setup(s))
794
    goto bad;
795
  unlink(name);
796
  sa.sun_family = AF_UNIX;
797
  strcpy(sa.sun_path, name);
798
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
799
    ERR("bind");
800
  if (listen(fd, 8))
801
    ERR("listen");
802
  add_tail(&sock_list, &s->n);
803
  return 0;
804

    
805
bad:
806
  log(L_ERR "sk_open_unix: %s: %m", err);
807
  close(fd);
808
  s->fd = -1;
809
  return -1;
810
}
811

    
812
/**
813
 * sk_close - close a socket
814
 * @s: a socket
815
 *
816
 * If sk_close() has been called from outside of any socket hook,
817
 * it translates to a rfree(), else it just marks the socket for
818
 * deletion as soon as the socket hook returns.
819
 */
820
void
821
sk_close(sock *s)
822
{
823
  if (s && s->entered)
824
    s->type = SK_DELETED;
825
  else
826
    rfree(s);
827
}
828

    
829
static int
830
sk_maybe_write(sock *s)
831
{
832
  int e;
833

    
834
  switch (s->type)
835
    {
836
    case SK_TCP:
837
    case SK_MAGIC:
838
    case SK_UNIX:
839
      while (s->ttx != s->tpos)
840
        {
841
          e = write(s->fd, s->ttx, s->tpos - s->ttx);
842
          if (e < 0)
843
            {
844
              if (errno != EINTR && errno != EAGAIN)
845
                {
846
                  s->err_hook(s, errno);
847
                  return -1;
848
                }
849
              return 0;
850
            }
851
          s->ttx += e;
852
        }
853
      s->ttx = s->tpos = s->tbuf;
854
      return 1;
855
    case SK_UDP:
856
    case SK_UDP_MC:
857
    case SK_IP:
858
    case SK_IP_MC:
859
      {
860
        sockaddr sa;
861

    
862
        if (s->tbuf == s->tpos)
863
          return 1;
864
        fill_in_sockaddr(&sa, s->faddr, s->fport);
865

    
866
        e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa));
867
        if (e < 0)
868
          {
869
            if (errno != EINTR && errno != EAGAIN)
870
              {
871
                s->err_hook(s, errno);
872
                return -1;
873
              }
874
            return 0;
875
          }
876
        s->tpos = s->tbuf;
877
        return 1;
878
      }
879
    default:
880
      bug("sk_maybe_write: unknown socket type %d", s->type);
881
    }
882
}
883

    
884
/**
885
 * sk_send - send data to a socket
886
 * @s: socket
887
 * @len: number of bytes to send
888
 *
889
 * This function sends @len bytes of data prepared in the
890
 * transmit buffer of the socket @s to the network connection.
891
 * If the packet can be sent immediately, it does so and returns
892
 * 1, else it queues the packet for later processing, returns 0
893
 * and calls the @tx_hook of the socket when the tranmission
894
 * takes place.
895
 */
896
int
897
sk_send(sock *s, unsigned len)
898
{
899
  s->faddr = s->daddr;
900
  s->fport = s->dport;
901
  s->ttx = s->tbuf;
902
  s->tpos = s->tbuf + len;
903
  return sk_maybe_write(s);
904
}
905

    
906
/**
907
 * sk_send_to - send data to a specific destination
908
 * @s: socket
909
 * @len: number of bytes to send
910
 * @addr: IP address to send the packet to
911
 * @port: port to send the packet to
912
 *
913
 * This is a sk_send() replacement for connection-less packet sockets
914
 * which allows destination of the packet to be chosen dynamically.
915
 */
916
int
917
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
918
{
919
  s->faddr = addr;
920
  s->fport = port;
921
  s->ttx = s->tbuf;
922
  s->tpos = s->tbuf + len;
923
  return sk_maybe_write(s);
924
}
925

    
926
static int
927
sk_read(sock *s)
928
{
929
  switch (s->type)
930
    {
931
    case SK_TCP_PASSIVE:
932
      {
933
        sockaddr sa;
934
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
935
      }
936
    case SK_UNIX_PASSIVE:
937
      {
938
        struct sockaddr_un sa;
939
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
940
      }
941
    case SK_TCP:
942
    case SK_UNIX:
943
      {
944
        int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
945

    
946
        if (c < 0)
947
          {
948
            if (errno != EINTR && errno != EAGAIN)
949
              s->err_hook(s, errno);
950
          }
951
        else if (!c)
952
          s->err_hook(s, 0);
953
        else
954
          {
955
            s->rpos += c;
956
            if (s->rx_hook(s, s->rpos - s->rbuf))
957
              s->rpos = s->rbuf;
958
            return 1;
959
          }
960
        return 0;
961
      }
962
    case SK_MAGIC:
963
      return s->rx_hook(s, 0);
964
    case SK_DELETED:
965
      return 0;
966
    default:
967
      {
968
        sockaddr sa;
969
        int al = sizeof(sa);
970
        int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al);
971

    
972
        if (e < 0)
973
          {
974
            if (errno != EINTR && errno != EAGAIN)
975
              s->err_hook(s, errno);
976
            return 0;
977
          }
978
        s->rpos = s->rbuf + e;
979
        get_sockaddr(&sa, &s->faddr, &s->fport, 1);
980
        s->rx_hook(s, e);
981
        return 1;
982
      }
983
    }
984
}
985

    
986
static void
987
sk_write(sock *s)
988
{
989
  switch (s->type)
990
    {
991
    case SK_TCP_ACTIVE:
992
      {
993
        sockaddr sa;
994
        fill_in_sockaddr(&sa, s->daddr, s->dport);
995
        if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
996
          sk_tcp_connected(s);
997
        else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
998
          s->err_hook(s, errno);
999
        break;
1000
      }
1001
    case SK_DELETED:
1002
      return;
1003
    default:
1004
      while (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1005
        s->tx_hook(s);
1006
    }
1007
}
1008

    
1009
void
1010
sk_dump_all(void)
1011
{
1012
  node *n;
1013
  sock *s;
1014

    
1015
  debug("Open sockets:\n");
1016
  WALK_LIST(n, sock_list)
1017
    {
1018
      s = SKIP_BACK(sock, n, n);
1019
      debug("%p ", s);
1020
      sk_dump(&s->r);
1021
    }
1022
  debug("\n");
1023
}
1024

    
1025
#undef ERR
1026
#undef WARN
1027

    
1028
/*
1029
 *        Main I/O Loop
1030
 */
1031

    
1032
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
1033
volatile int async_dump_flag;
1034

    
1035
void
1036
io_init(void)
1037
{
1038
  init_list(&near_timers);
1039
  init_list(&far_timers);
1040
  init_list(&sock_list);
1041
  init_list(&global_event_list);
1042
  krt_io_init();
1043
  now = time(NULL);
1044
  srandom((int) now);
1045
}
1046

    
1047
void
1048
io_loop(void)
1049
{
1050
  fd_set rd, wr;
1051
  struct timeval timo;
1052
  time_t tout;
1053
  int hi, events;
1054
  sock *s;
1055
  node *n, *p;
1056

    
1057
  FD_ZERO(&rd);
1058
  FD_ZERO(&wr);
1059
  for(;;)
1060
    {
1061
      events = ev_run_list(&global_event_list);
1062
      now = time(NULL);
1063
      tout = tm_first_shot();
1064
      if (tout <= now)
1065
        {
1066
          tm_shot();
1067
          continue;
1068
        }
1069
      timo.tv_sec = events ? 0 : tout - now;
1070
      timo.tv_usec = 0;
1071

    
1072
      hi = 0;
1073
      WALK_LIST(n, sock_list)
1074
        {
1075
          s = SKIP_BACK(sock, n, n);
1076
          if (s->rx_hook)
1077
            {
1078
              FD_SET(s->fd, &rd);
1079
              if (s->fd > hi)
1080
                hi = s->fd;
1081
            }
1082
          if (s->tx_hook && s->ttx != s->tpos)
1083
            {
1084
              FD_SET(s->fd, &wr);
1085
              if (s->fd > hi)
1086
                hi = s->fd;
1087
            }
1088
        }
1089

    
1090
      /*
1091
       * Yes, this is racy. But even if the signal comes before this test
1092
       * and entering select(), it gets caught on the next timer tick.
1093
       */
1094

    
1095
      if (async_config_flag)
1096
        {
1097
          async_config();
1098
          async_config_flag = 0;
1099
          continue;
1100
        }
1101
      if (async_dump_flag)
1102
        {
1103
          async_dump();
1104
          async_dump_flag = 0;
1105
          continue;
1106
        }
1107
      if (async_shutdown_flag)
1108
        {
1109
          async_shutdown();
1110
          async_shutdown_flag = 0;
1111
          continue;
1112
        }
1113

    
1114
      /* And finally enter select() to find active sockets */
1115

    
1116
      hi = select(hi+1, &rd, &wr, NULL, &timo);
1117
      if (hi < 0)
1118
        {
1119
          if (errno == EINTR || errno == EAGAIN)
1120
            continue;
1121
          die("select: %m");
1122
        }
1123
      if (hi)
1124
        {
1125
          WALK_LIST_DELSAFE(n, p, sock_list)
1126
            {
1127
              s = SKIP_BACK(sock, n, n);
1128
              s->entered = 1;
1129
              if (FD_ISSET(s->fd, &rd))
1130
                {
1131
                  FD_CLR(s->fd, &rd);
1132
                  while (sk_read(s))
1133
                    ;
1134
                }
1135
              if (s->type != SK_DELETED && FD_ISSET(s->fd, &wr))
1136
                {
1137
                  FD_CLR(s->fd, &wr);
1138
                  sk_write(s);
1139
                }
1140
              s->entered = 0;
1141
              if (s->type == SK_DELETED)
1142
                rfree(s);
1143
            }
1144
        }
1145
    }
1146
}