Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 0b3bf4b1

History | View | Annotate | Download (23 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#include <stdio.h>
10
#include <stdlib.h>
11
#include <sys/time.h>
12
#include <sys/types.h>
13
#include <sys/socket.h>
14
#include <sys/fcntl.h>
15
#include <sys/un.h>
16
#include <unistd.h>
17
#include <errno.h>
18

    
19
#include "nest/bird.h"
20
#include "lib/lists.h"
21
#include "lib/resource.h"
22
#include "lib/timer.h"
23
#include "lib/socket.h"
24
#include "lib/event.h"
25
#include "lib/string.h"
26
#include "nest/iface.h"
27

    
28
#include "lib/unix.h"
29
#include "lib/sysio.h"
30

    
31
/*
32
 *        Tracked Files
33
 */
34

    
35
struct rfile {
36
  resource r;
37
  FILE *f;
38
};
39

    
40
static void
41
rf_free(resource *r)
42
{
43
  struct rfile *a = (struct rfile *) r;
44

    
45
  fclose(a->f);
46
}
47

    
48
static void
49
rf_dump(resource *r)
50
{
51
  struct rfile *a = (struct rfile *) r;
52

    
53
  debug("(FILE *%p)\n", a->f);
54
}
55

    
56
static struct resclass rf_class = {
57
  "FILE",
58
  sizeof(struct rfile),
59
  rf_free,
60
  rf_dump
61
};
62

    
63
void *
64
tracked_fopen(pool *p, char *name, char *mode)
65
{
66
  FILE *f = fopen(name, mode);
67

    
68
  if (f)
69
    {
70
      struct rfile *r = ralloc(p, &rf_class);
71
      r->f = f;
72
    }
73
  return f;
74
}
75

    
76
/**
77
 * DOC: Timers
78
 *
79
 * Timers are resources which represent a wish of a module to call
80
 * a function at the specified time. The platform dependent code
81
 * doesn't guarantee exact timing, only that a timer function
82
 * won't be called before the requested time.
83
 *
84
 * In BIRD, real time is represented by values of the &bird_clock_t type
85
 * which are integral numbers interpreted as a number of seconds since
86
 * a fixed (but platform dependent) epoch. The current time can be read
87
 * from a variable @now with reasonable accuracy.
88
 *
89
 * Each timer is described by a &timer structure containing a pointer
90
 * to the handler function (@hook), data private to this function (@data),
91
 * time the function should be called at (@expires, 0 for inactive timers),
92
 * for the other fields see |timer.h|.
93
 */
94

    
95
#define NEAR_TIMER_LIMIT 4
96

    
97
static list near_timers, far_timers;
98
static bird_clock_t first_far_timer = TIME_INFINITY;
99

    
100
bird_clock_t now;
101

    
102
static void
103
tm_free(resource *r)
104
{
105
  timer *t = (timer *) r;
106

    
107
  tm_stop(t);
108
}
109

    
110
static void
111
tm_dump(resource *r)
112
{
113
  timer *t = (timer *) r;
114

    
115
  debug("(code %p, data %p, ", t->hook, t->data);
116
  if (t->randomize)
117
    debug("rand %d, ", t->randomize);
118
  if (t->recurrent)
119
    debug("recur %d, ", t->recurrent);
120
  if (t->expires)
121
    debug("expires in %d sec)\n", t->expires - now);
122
  else
123
    debug("inactive)\n");
124
}
125

    
126
static struct resclass tm_class = {
127
  "Timer",
128
  sizeof(timer),
129
  tm_free,
130
  tm_dump
131
};
132

    
133
/**
134
 * tm_new - create a timer
135
 * @p: pool
136
 *
137
 * This function creates a new timer resource and returns
138
 * a pointer to it. To use the timer, you need to fill in
139
 * the structure fields and call tm_start() to start timing.
140
 */
141
timer *
142
tm_new(pool *p)
143
{
144
  timer *t = ralloc(p, &tm_class);
145
  t->hook = NULL;
146
  t->data = NULL;
147
  t->randomize = 0;
148
  t->expires = 0;
149
  return t;
150
}
151

    
152
static inline void
153
tm_insert_near(timer *t)
154
{
155
  node *n = HEAD(near_timers);
156

    
157
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
158
    n = n->next;
159
  insert_node(&t->n, n->prev);
160
}
161

    
162
/**
163
 * tm_start - start a timer
164
 * @t: timer
165
 * @after: number of seconds the timer should be run after
166
 *
167
 * This function schedules the hook function of the timer to
168
 * be called after @after seconds. If the timer has been already
169
 * started, it's @expire time is replaced by the new value.
170
 *
171
 * You can have set the @randomize field of @t, the timeout
172
 * will be increased by a random number of seconds chosen
173
 * uniformly from range 0 .. @randomize.
174
 *
175
 * You can call tm_start() from the handler function of the timer
176
 * to request another run of the timer. Also, you can set the @recurrent
177
 * field to have the timer re-added automatically with the same timeout.
178
 */
179
void
180
tm_start(timer *t, unsigned after)
181
{
182
  bird_clock_t when;
183

    
184
  if (t->randomize)
185
    after += random() % (t->randomize + 1);
186
  when = now + after;
187
  if (t->expires == when)
188
    return;
189
  if (t->expires)
190
    rem_node(&t->n);
191
  t->expires = when;
192
  if (after <= NEAR_TIMER_LIMIT)
193
    tm_insert_near(t);
194
  else
195
    {
196
      if (!first_far_timer || first_far_timer > when)
197
        first_far_timer = when;
198
      add_tail(&far_timers, &t->n);
199
    }
200
}
201

    
202
/**
203
 * tm_stop - stop a timer
204
 * @t: timer
205
 *
206
 * This function stops a timer. If the timer is already stopped,
207
 * nothing happens.
208
 */
209
void
210
tm_stop(timer *t)
211
{
212
  if (t->expires)
213
    {
214
      rem_node(&t->n);
215
      t->expires = 0;
216
    }
217
}
218

    
219
static void
220
tm_dump_them(char *name, list *l)
221
{
222
  node *n;
223
  timer *t;
224

    
225
  debug("%s timers:\n", name);
226
  WALK_LIST(n, *l)
227
    {
228
      t = SKIP_BACK(timer, n, n);
229
      debug("%p ", t);
230
      tm_dump(&t->r);
231
    }
232
  debug("\n");
233
}
234

    
235
void
236
tm_dump_all(void)
237
{
238
  tm_dump_them("Near", &near_timers);
239
  tm_dump_them("Far", &far_timers);
240
}
241

    
242
static inline time_t
243
tm_first_shot(void)
244
{
245
  time_t x = first_far_timer;
246

    
247
  if (!EMPTY_LIST(near_timers))
248
    {
249
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
250
      if (t->expires < x)
251
        x = t->expires;
252
    }
253
  return x;
254
}
255

    
256
static void
257
tm_shot(void)
258
{
259
  timer *t;
260
  node *n, *m;
261

    
262
  if (first_far_timer <= now)
263
    {
264
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
265
      first_far_timer = TIME_INFINITY;
266
      n = HEAD(far_timers);
267
      while (m = n->next)
268
        {
269
          t = SKIP_BACK(timer, n, n);
270
          if (t->expires <= limit)
271
            {
272
              rem_node(n);
273
              tm_insert_near(t);
274
            }
275
          else if (t->expires < first_far_timer)
276
            first_far_timer = t->expires;
277
          n = m;
278
        }
279
    }
280
  while ((n = HEAD(near_timers)) -> next)
281
    {
282
      int delay;
283
      t = SKIP_BACK(timer, n, n);
284
      if (t->expires > now)
285
        break;
286
      rem_node(n);
287
      delay = t->expires - now;
288
      t->expires = 0;
289
      if (t->recurrent)
290
        {
291
          int i = t->recurrent - delay;
292
          if (i < 0)
293
            i = 0;
294
          tm_start(t, i);
295
        }
296
      t->hook(t);
297
    }
298
}
299

    
300
/**
301
 * tm_parse_date - parse a date
302
 * @x: date string
303
 *
304
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
305
 * and converts it to the corresponding value of type &bird_clock_t.
306
 */
307
bird_clock_t
308
tm_parse_date(char *x)
309
{
310
  struct tm tm;
311
  int n;
312
  time_t t;
313

    
314
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
315
    return 0;
316
  tm.tm_mon--;
317
  tm.tm_year -= 1900;
318
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
319
  t = mktime(&tm);
320
  if (t == (time_t) -1)
321
    return 0;
322
  return t;
323
}
324

    
325
/**
326
 * tm_format_date - convert date to textual representation
327
 * @x: destination buffer of size %TM_DATE_BUFFER_SIZE
328
 * @t: time
329
 *
330
 * This function formats the given time value @t to a textual
331
 * date representation (dd-mm-yyyy).
332
 */
333
void
334
tm_format_date(char *x, bird_clock_t t)
335
{
336
  struct tm *tm;
337

    
338
  tm = localtime(&t);
339
  bsprintf(x, "%02d-%02d-%04d", tm->tm_mday, tm->tm_mon+1, tm->tm_year+1900);
340
}
341

    
342
/**
343
 * tm_format_datetime - convert date and time to textual representation
344
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
345
 * @t: time
346
 *
347
 * This function formats the given time value @t to a textual
348
 * date/time representation (dd-mm-yyyy hh:mm:ss).
349
 */
350
void
351
tm_format_datetime(char *x, bird_clock_t t)
352
{
353
  struct tm *tm;
354

    
355
  tm = localtime(&t);
356
  if (strftime(x, TM_DATETIME_BUFFER_SIZE, "%d-%m-%Y %H:%M:%S", tm) == TM_DATETIME_BUFFER_SIZE)
357
    strcpy(x, "<too-long>");
358
}
359

    
360
/**
361
 * tm_format_reltime - convert date and time to relative textual representation
362
 * @x: destination buffer of size %TM_RELTIME_BUFFER_SIZE
363
 * @t: time
364
 *
365
 * This function formats the given time value @t to a short
366
 * textual representation relative to the current time.
367
 */
368
void
369
tm_format_reltime(char *x, bird_clock_t t)
370
{
371
  struct tm *tm;
372
  bird_clock_t delta = (t < now) ? (now - t) : (t - now);
373
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
374

    
375
  tm = localtime(&t);
376
  if (delta < 20*3600)
377
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
378
  else if (delta < 360*86400)
379
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
380
  else
381
    bsprintf(x, "%d", tm->tm_year+1900);
382
}
383

    
384
/**
385
 * DOC: Sockets
386
 *
387
 * Socket resources represent network connections. Their data structure (&socket)
388
 * contains a lot of fields defining the exact type of the socket, the local and
389
 * remote addresses and ports, pointers to socket buffers and finally pointers to
390
 * hook functions to be called when new data have arrived to the receive buffer
391
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
392
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
393
 *
394
 * You should not use rfree() from inside a socket hook, please use sk_close() instead.
395
 */
396

    
397
#ifndef SOL_IP
398
#define SOL_IP IPPROTO_IP
399
#endif
400

    
401
static list sock_list;
402

    
403
static void
404
sk_free(resource *r)
405
{
406
  sock *s = (sock *) r;
407

    
408
  if (s->fd >= 0)
409
    {
410
      close(s->fd);
411
      rem_node(&s->n);
412
    }
413
}
414

    
415
static void
416
sk_dump(resource *r)
417
{
418
  sock *s = (sock *) r;
419
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
420

    
421
  debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
422
        sk_type_names[s->type],
423
        s->data,
424
        s->saddr,
425
        s->sport,
426
        s->daddr,
427
        s->dport,
428
        s->tos,
429
        s->ttl,
430
        s->iface ? s->iface->name : "none");
431
}
432

    
433
static struct resclass sk_class = {
434
  "Socket",
435
  sizeof(sock),
436
  sk_free,
437
  sk_dump
438
};
439

    
440
/**
441
 * sk_new - create a socket
442
 * @p: pool
443
 *
444
 * This function creates a new socket resource. If you want to use it,
445
 * you need to fill in all the required fields of the structure and
446
 * call sk_open() to do the actual opening of the socket.
447
 */
448
sock *
449
sk_new(pool *p)
450
{
451
  sock *s = ralloc(p, &sk_class);
452
  s->pool = p;
453
  s->data = NULL;
454
  s->saddr = s->daddr = IPA_NONE;
455
  s->sport = s->dport = 0;
456
  s->tos = s->ttl = -1;
457
  s->iface = NULL;
458
  s->rbuf = NULL;
459
  s->rx_hook = NULL;
460
  s->rbsize = 0;
461
  s->tbuf = NULL;
462
  s->tx_hook = NULL;
463
  s->tbsize = 0;
464
  s->err_hook = NULL;
465
  s->fd = -1;
466
  s->entered = 0;
467
  return s;
468
}
469

    
470
#define ERR(x) do { err = x; goto bad; } while(0)
471
#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
472

    
473
#ifdef IPV6
474

    
475
void
476
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
477
{
478
  sa->sin6_family = AF_INET6;
479
  sa->sin6_port = htons(port);
480
  sa->sin6_flowinfo = 0;
481
  set_inaddr(&sa->sin6_addr, a);
482
}
483

    
484
void
485
get_sockaddr(sockaddr *sa, ip_addr *a, unsigned *port)
486
{
487
  if (sa->sin6_family != AF_INET6)
488
    bug("get_sockaddr called for wrong address family");
489
  if (port)
490
    *port = ntohs(sa->sin6_port);
491
  memcpy(a, &sa->sin6_addr, sizeof(*a));
492
  ipa_ntoh(*a);
493
}
494

    
495
#else
496

    
497
void
498
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
499
{
500
  sa->sin_family = AF_INET;
501
  sa->sin_port = htons(port);
502
  set_inaddr(&sa->sin_addr, a);
503
}
504

    
505
void
506
get_sockaddr(sockaddr *sa, ip_addr *a, unsigned *port)
507
{
508
  if (sa->sin_family != AF_INET)
509
    bug("get_sockaddr called for wrong address family");
510
  if (port)
511
    *port = ntohs(sa->sin_port);
512
  memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
513
  ipa_ntoh(*a);
514
}
515

    
516
#endif
517

    
518
static char *
519
sk_setup(sock *s)
520
{
521
  int fd = s->fd;
522
  int one = 1;
523
  char *err;
524

    
525
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
526
    ERR("fcntl(O_NONBLOCK)");
527
  if (s->type == SK_UNIX)
528
    return NULL;
529
#ifdef IPV6
530
  if (s->ttl >= 0 && s->type != SK_UDP_MC && s->type != SK_IP_MC &&
531
      setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
532
    ERR("IPV6_UNICAST_HOPS");
533
#else
534
  if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
535
    WARN("IP_TOS");
536
  if (s->ttl >= 0 && setsockopt(fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
537
    ERR("IP_TTL");
538
  if (s->ttl == 1 && setsockopt(fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
539
    ERR("SO_DONTROUTE");
540
#endif
541
  err = NULL;
542
bad:
543
  return err;
544
}
545

    
546
static void
547
sk_alloc_bufs(sock *s)
548
{
549
  if (!s->rbuf && s->rbsize)
550
    s->rbuf = mb_alloc(s->pool, s->rbsize);
551
  s->rpos = s->rbuf;
552
  if (!s->tbuf && s->tbsize)
553
    s->tbuf = mb_alloc(s->pool, s->tbsize);
554
  s->tpos = s->ttx = s->tbuf;
555
}
556

    
557
static void
558
sk_tcp_connected(sock *s)
559
{
560
  s->type = SK_TCP;
561
  sk_alloc_bufs(s);
562
  s->tx_hook(s);
563
}
564

    
565
static int
566
sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
567
{
568
  int fd = accept(s->fd, sa, &al);
569
  if (fd >= 0)
570
    {
571
      sock *t = sk_new(s->pool);
572
      char *err;
573
      t->type = type;
574
      t->fd = fd;
575
      t->ttl = s->ttl;
576
      t->tos = s->tos;
577
      t->rbsize = s->rbsize;
578
      t->tbsize = s->tbsize;
579
      if (type == SK_TCP)
580
        get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport);
581
      add_tail(&sock_list, &t->n);
582
      if (err = sk_setup(t))
583
        {
584
          log(L_ERR "Incoming connection: %s: %m", err);
585
          rfree(t);
586
          return 1;
587
        }
588
      sk_alloc_bufs(t);
589
      s->rx_hook(t, 0);
590
      return 1;
591
    }
592
  else if (errno != EINTR && errno != EAGAIN)
593
    {
594
      log(L_ERR "accept: %m");
595
      s->err_hook(s, errno);
596
    }
597
  return 0;
598
}
599

    
600
/**
601
 * sk_open - open a socket
602
 * @s: socket
603
 *
604
 * This function takes a socket resource created by sk_new() and
605
 * initialized by the user and binds a corresponding network connection
606
 * to it.
607
 *
608
 * Result: 0 for success, -1 for an error.
609
 */
610
int
611
sk_open(sock *s)
612
{
613
  int fd;
614
  sockaddr sa;
615
  int one = 1;
616
  int type = s->type;
617
  int has_src = ipa_nonzero(s->saddr) || s->sport;
618
  char *err;
619

    
620
  switch (type)
621
    {
622
    case SK_TCP_ACTIVE:
623
      s->ttx = "";                        /* Force s->ttx != s->tpos */
624
      /* Fall thru */
625
    case SK_TCP_PASSIVE:
626
      fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
627
      break;
628
    case SK_UDP:
629
    case SK_UDP_MC:
630
      fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
631
      break;
632
    case SK_IP:
633
    case SK_IP_MC:
634
      fd = socket(BIRD_PF, SOCK_RAW, s->dport);
635
      break;
636
    case SK_MAGIC:
637
      fd = s->fd;
638
      break;
639
    default:
640
      bug("sk_open() called for invalid sock type %d", type);
641
    }
642
  if (fd < 0)
643
    die("sk_open: socket: %m");
644
  s->fd = fd;
645

    
646
  if (err = sk_setup(s))
647
    goto bad;
648
  switch (type)
649
    {
650
    case SK_UDP:
651
    case SK_IP:
652
      if (s->iface)                        /* It's a broadcast socket */
653
#ifdef IPV6
654
        bug("IPv6 has no broadcasts");
655
#else
656
        if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0)
657
          ERR("SO_BROADCAST");
658
#endif
659
      break;
660
    case SK_UDP_MC:
661
    case SK_IP_MC:
662
      {
663
#ifdef IPV6
664
        /* Fortunately, IPv6 socket interface is recent enough and therefore standardized */
665
        ASSERT(s->iface && s->iface->addr);
666
        if (ipa_nonzero(s->daddr))
667
          {
668
            int t = s->iface->index;
669
            int zero = 0;
670
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
671
              ERR("IPV6_MULTICAST_HOPS");
672
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
673
              ERR("IPV6_MULTICAST_LOOP");
674
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_IF, &t, sizeof(t)) < 0)
675
              ERR("IPV6_MULTICAST_IF");
676
          }
677
        if (has_src)
678
          {
679
            struct ipv6_mreq mreq;
680
            set_inaddr(&mreq.ipv6mr_multiaddr, s->daddr);
681
#ifdef CONFIG_IPV6_GLIBC_20
682
            mreq.ipv6mr_ifindex = s->iface->index;
683
#else
684
            mreq.ipv6mr_interface = s->iface->index;
685
#endif
686
            if (setsockopt(fd, SOL_IPV6, IPV6_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
687
              ERR("IPV6_ADD_MEMBERSHIP");
688
          }
689
#else
690
        /* With IPv4 there are zillions of different socket interface variants. Ugh. */
691
        ASSERT(s->iface && s->iface->addr);
692
        if (err = sysio_mcast_join(s))
693
          goto bad;
694
#endif
695
      break;
696
      }
697
    }
698
  if (has_src)
699
    {
700
      int port;
701

    
702
      if (type == SK_IP || type == SK_IP_MC)
703
        port = 0;
704
      else
705
        {
706
          port = s->sport;
707
          if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
708
            ERR("SO_REUSEADDR");
709
        }
710
      fill_in_sockaddr(&sa, s->saddr, port);
711
      if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
712
        ERR("bind");
713
    }
714
  fill_in_sockaddr(&sa, s->daddr, s->dport);
715
  switch (type)
716
    {
717
    case SK_TCP_ACTIVE:
718
      if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
719
        sk_tcp_connected(s);
720
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
721
               errno != ECONNREFUSED && errno != EHOSTUNREACH)
722
        ERR("connect");
723
      break;
724
    case SK_TCP_PASSIVE:
725
      if (listen(fd, 8))
726
        ERR("listen");
727
      break;
728
    case SK_MAGIC:
729
      break;
730
    default:
731
      sk_alloc_bufs(s);
732
#ifdef IPV6
733
#ifdef IPV6_MTU_DISCOVER
734
      {
735
        int dont = IPV6_PMTUDISC_DONT;
736
        if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
737
          ERR("IPV6_MTU_DISCOVER");
738
      }
739
#endif
740
#else
741
#ifdef IP_PMTUDISC
742
      {
743
        int dont = IP_PMTUDISC_DONT;
744
        if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
745
          ERR("IP_PMTUDISC");
746
      }
747
#endif
748
#endif
749
    }
750

    
751
  add_tail(&sock_list, &s->n);
752
  return 0;
753

    
754
bad:
755
  log(L_ERR "sk_open: %s: %m", err);
756
  close(fd);
757
  s->fd = -1;
758
  return -1;
759
}
760

    
761
int
762
sk_open_unix(sock *s, char *name)
763
{
764
  int fd;
765
  struct sockaddr_un sa;
766
  char *err;
767

    
768
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
769
  if (fd < 0)
770
    die("sk_open_unix: socket: %m");
771
  s->fd = fd;
772
  if (err = sk_setup(s))
773
    goto bad;
774
  unlink(name);
775
  sa.sun_family = AF_UNIX;
776
  strcpy(sa.sun_path, name);
777
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
778
    ERR("bind");
779
  if (listen(fd, 8))
780
    ERR("listen");
781
  add_tail(&sock_list, &s->n);
782
  return 0;
783

    
784
bad:
785
  log(L_ERR "sk_open_unix: %s: %m", err);
786
  close(fd);
787
  s->fd = -1;
788
  return -1;
789
}
790

    
791
/**
792
 * sk_close - close a socket
793
 * @s: a socket
794
 *
795
 * If sk_close() has been called from outside of any socket hook,
796
 * it translates to a rfree(), else it just marks the socket for
797
 * deletion as soon as the socket hook returns.
798
 */
799
void
800
sk_close(sock *s)
801
{
802
  if (s && s->entered)
803
    s->type = SK_DELETED;
804
  else
805
    rfree(s);
806
}
807

    
808
static int
809
sk_maybe_write(sock *s)
810
{
811
  int e;
812

    
813
  switch (s->type)
814
    {
815
    case SK_TCP:
816
    case SK_MAGIC:
817
    case SK_UNIX:
818
      while (s->ttx != s->tpos)
819
        {
820
          e = write(s->fd, s->ttx, s->tpos - s->ttx);
821
          if (e < 0)
822
            {
823
              if (errno != EINTR && errno != EAGAIN)
824
                {
825
                  s->err_hook(s, errno);
826
                  return -1;
827
                }
828
              return 0;
829
            }
830
          s->ttx += e;
831
        }
832
      s->ttx = s->tpos = s->tbuf;
833
      return 1;
834
    case SK_UDP:
835
    case SK_UDP_MC:
836
    case SK_IP:
837
    case SK_IP_MC:
838
      {
839
        sockaddr sa;
840

    
841
        if (s->tbuf == s->tpos)
842
          return 1;
843
        fill_in_sockaddr(&sa, s->faddr, s->fport);
844
        e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa));
845
        if (e < 0)
846
          {
847
            if (errno != EINTR && errno != EAGAIN)
848
              {
849
                s->err_hook(s, errno);
850
                return -1;
851
              }
852
            return 0;
853
          }
854
        s->tpos = s->tbuf;
855
        return 1;
856
      }
857
    default:
858
      bug("sk_maybe_write: unknown socket type %d", s->type);
859
    }
860
}
861

    
862
/**
863
 * sk_send - send data to a socket
864
 * @s: socket
865
 * @len: number of bytes to send
866
 *
867
 * This function sends @len bytes of data prepared in the
868
 * transmit buffer of the socket @s to the network connection.
869
 * If the packet can be sent immediately, it does so and returns
870
 * 1, else it queues the packet for later processing, returns 0
871
 * and calls the @tx_hook of the socket when the tranmission
872
 * takes place.
873
 */
874
int
875
sk_send(sock *s, unsigned len)
876
{
877
  s->faddr = s->daddr;
878
  s->fport = s->dport;
879
  s->ttx = s->tbuf;
880
  s->tpos = s->tbuf + len;
881
  return sk_maybe_write(s);
882
}
883

    
884
/**
885
 * sk_send_to - send data to a specific destination
886
 * @s: socket
887
 * @len: number of bytes to send
888
 * @addr: IP address to send the packet to
889
 * @port: port to send the packet to
890
 *
891
 * This is a sk_send() replacement for connection-less packet sockets
892
 * which allows destination of the packet to be chosen dynamically.
893
 */
894
int
895
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
896
{
897
  s->faddr = addr;
898
  s->fport = port;
899
  s->ttx = s->tbuf;
900
  s->tpos = s->tbuf + len;
901
  return sk_maybe_write(s);
902
}
903

    
904
static int
905
sk_read(sock *s)
906
{
907
  switch (s->type)
908
    {
909
    case SK_TCP_PASSIVE:
910
      {
911
        sockaddr sa;
912
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
913
      }
914
    case SK_UNIX_PASSIVE:
915
      {
916
        struct sockaddr_un sa;
917
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
918
      }
919
    case SK_TCP:
920
    case SK_UNIX:
921
      {
922
        int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
923

    
924
        if (c < 0)
925
          {
926
            if (errno != EINTR && errno != EAGAIN)
927
              s->err_hook(s, errno);
928
          }
929
        else if (!c)
930
          s->err_hook(s, 0);
931
        else
932
          {
933
            s->rpos += c;
934
            if (s->rx_hook(s, s->rpos - s->rbuf))
935
              s->rpos = s->rbuf;
936
            return 1;
937
          }
938
        return 0;
939
      }
940
    case SK_MAGIC:
941
      return s->rx_hook(s, 0);
942
    case SK_DELETED:
943
      return 0;
944
    default:
945
      {
946
        sockaddr sa;
947
        int al = sizeof(sa);
948
        int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al);
949

    
950
        if (e < 0)
951
          {
952
            if (errno != EINTR && errno != EAGAIN)
953
              s->err_hook(s, errno);
954
            return 0;
955
          }
956
        s->rpos = s->rbuf + e;
957
        get_sockaddr(&sa, &s->faddr, &s->fport);
958
        s->rx_hook(s, e);
959
        return 1;
960
      }
961
    }
962
}
963

    
964
static void
965
sk_write(sock *s)
966
{
967
  switch (s->type)
968
    {
969
    case SK_TCP_ACTIVE:
970
      {
971
        sockaddr sa;
972
        fill_in_sockaddr(&sa, s->daddr, s->dport);
973
        if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
974
          sk_tcp_connected(s);
975
        else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
976
          s->err_hook(s, errno);
977
        break;
978
      }
979
    case SK_DELETED:
980
      return;
981
    default:
982
      while (s->ttx != s->tpos && sk_maybe_write(s) > 0)
983
        s->tx_hook(s);
984
    }
985
}
986

    
987
void
988
sk_dump_all(void)
989
{
990
  node *n;
991
  sock *s;
992

    
993
  debug("Open sockets:\n");
994
  WALK_LIST(n, sock_list)
995
    {
996
      s = SKIP_BACK(sock, n, n);
997
      debug("%p ", s);
998
      sk_dump(&s->r);
999
    }
1000
  debug("\n");
1001
}
1002

    
1003
#undef ERR
1004
#undef WARN
1005

    
1006
/*
1007
 *        Main I/O Loop
1008
 */
1009

    
1010
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
1011
volatile int async_dump_flag;
1012

    
1013
void
1014
io_init(void)
1015
{
1016
  init_list(&near_timers);
1017
  init_list(&far_timers);
1018
  init_list(&sock_list);
1019
  init_list(&global_event_list);
1020
  krt_io_init();
1021
  now = time(NULL);
1022
  srandom((int) now);
1023
}
1024

    
1025
void
1026
io_loop(void)
1027
{
1028
  fd_set rd, wr;
1029
  struct timeval timo;
1030
  time_t tout;
1031
  int hi, events;
1032
  sock *s;
1033
  node *n, *p;
1034

    
1035
  FD_ZERO(&rd);
1036
  FD_ZERO(&wr);
1037
  for(;;)
1038
    {
1039
      events = ev_run_list(&global_event_list);
1040
      now = time(NULL);
1041
      tout = tm_first_shot();
1042
      if (tout <= now)
1043
        {
1044
          tm_shot();
1045
          continue;
1046
        }
1047
      timo.tv_sec = events ? 0 : tout - now;
1048
      timo.tv_usec = 0;
1049

    
1050
      hi = 0;
1051
      WALK_LIST(n, sock_list)
1052
        {
1053
          s = SKIP_BACK(sock, n, n);
1054
          if (s->rx_hook)
1055
            {
1056
              FD_SET(s->fd, &rd);
1057
              if (s->fd > hi)
1058
                hi = s->fd;
1059
            }
1060
          if (s->tx_hook && s->ttx != s->tpos)
1061
            {
1062
              FD_SET(s->fd, &wr);
1063
              if (s->fd > hi)
1064
                hi = s->fd;
1065
            }
1066
        }
1067

    
1068
      /*
1069
       * Yes, this is racy. But even if the signal comes before this test
1070
       * and entering select(), it gets caught on the next timer tick.
1071
       */
1072

    
1073
      if (async_config_flag)
1074
        {
1075
          async_config();
1076
          async_config_flag = 0;
1077
          continue;
1078
        }
1079
      if (async_dump_flag)
1080
        {
1081
          async_dump();
1082
          async_dump_flag = 0;
1083
          continue;
1084
        }
1085
      if (async_shutdown_flag)
1086
        {
1087
          async_shutdown();
1088
          async_shutdown_flag = 0;
1089
          continue;
1090
        }
1091

    
1092
      /* And finally enter select() to find active sockets */
1093

    
1094
      hi = select(hi+1, &rd, &wr, NULL, &timo);
1095
      if (hi < 0)
1096
        {
1097
          if (errno == EINTR || errno == EAGAIN)
1098
            continue;
1099
          die("select: %m");
1100
        }
1101
      if (hi)
1102
        {
1103
          WALK_LIST_DELSAFE(n, p, sock_list)
1104
            {
1105
              s = SKIP_BACK(sock, n, n);
1106
              s->entered = 1;
1107
              if (FD_ISSET(s->fd, &rd))
1108
                {
1109
                  FD_CLR(s->fd, &rd);
1110
                  while (sk_read(s))
1111
                    ;
1112
                }
1113
              if (s->type != SK_DELETED && FD_ISSET(s->fd, &wr))
1114
                {
1115
                  FD_CLR(s->fd, &wr);
1116
                  sk_write(s);
1117
                }
1118
              s->entered = 0;
1119
              if (s->type == SK_DELETED)
1120
                rfree(s);
1121
            }
1122
        }
1123
    }
1124
}