Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 61fb537c

History | View | Annotate | Download (14.6 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--1999 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#include <stdio.h>
10
#include <stdlib.h>
11
#include <string.h>
12
#include <sys/time.h>
13
#include <sys/types.h>
14
#include <sys/socket.h>
15
#include <sys/fcntl.h>
16
#include <unistd.h>
17
#include <errno.h>
18

    
19
#include "nest/bird.h"
20
#include "lib/lists.h"
21
#include "lib/resource.h"
22
#include "lib/timer.h"
23
#include "lib/socket.h"
24
#include "lib/event.h"
25
#include "nest/iface.h"
26

    
27
#include "lib/unix.h"
28

    
29
/*
30
 *        Timers
31
 */
32

    
33
#define NEAR_TIMER_LIMIT 4
34

    
35
#ifdef TIME_T_IS_64BIT
36
#define TIME_INFINITY 0x7fffffffffffffff
37
#else
38
#ifdef TIME_T_IS_SIGNED
39
#define TIME_INFINITY 0x7fffffff
40
#else
41
#define TIME_INFINITY 0xffffffff
42
#endif
43
#endif
44

    
45
static list near_timers, far_timers;
46
static bird_clock_t first_far_timer = TIME_INFINITY;
47

    
48
bird_clock_t now;
49

    
50
static void
51
tm_free(resource *r)
52
{
53
  timer *t = (timer *) r;
54

    
55
  tm_stop(t);
56
}
57

    
58
static void
59
tm_dump(resource *r)
60
{
61
  timer *t = (timer *) r;
62

    
63
  debug("(code %p, data %p, ", t->hook, t->data);
64
  if (t->randomize)
65
    debug("rand %d, ", t->randomize);
66
  if (t->recurrent)
67
    debug("recur %d, ", t->recurrent);
68
  if (t->expires)
69
    debug("expires in %d sec)\n", t->expires - now);
70
  else
71
    debug("inactive)\n");
72
}
73

    
74
static struct resclass tm_class = {
75
  "Timer",
76
  sizeof(timer),
77
  tm_free,
78
  tm_dump
79
};
80

    
81
timer *
82
tm_new(pool *p)
83
{
84
  timer *t = ralloc(p, &tm_class);
85
  t->hook = NULL;
86
  t->data = NULL;
87
  t->randomize = 0;
88
  t->expires = 0;
89
  return t;
90
}
91

    
92
static inline void
93
tm_insert_near(timer *t)
94
{
95
  node *n = HEAD(near_timers);
96

    
97
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
98
    n = n->next;
99
  insert_node(&t->n, n->prev);
100
}
101

    
102
void
103
tm_start(timer *t, unsigned after)
104
{
105
  bird_clock_t when;
106

    
107
  if (t->randomize)
108
    after += random() % (t->randomize + 1);
109
  when = now + after;
110
  if (t->expires == when)
111
    return;
112
  if (t->expires)
113
    rem_node(&t->n);
114
  t->expires = when;
115
  if (after <= NEAR_TIMER_LIMIT)
116
    tm_insert_near(t);
117
  else
118
    {
119
      if (!first_far_timer || first_far_timer > when)
120
        first_far_timer = when;
121
      add_tail(&far_timers, &t->n);
122
    }
123
}
124

    
125
void
126
tm_stop(timer *t)
127
{
128
  if (t->expires)
129
    {
130
      rem_node(&t->n);
131
      t->expires = 0;
132
    }
133
}
134

    
135
static void
136
tm_dump_them(char *name, list *l)
137
{
138
  node *n;
139
  timer *t;
140

    
141
  debug("%s timers:\n", name);
142
  WALK_LIST(n, *l)
143
    {
144
      t = SKIP_BACK(timer, n, n);
145
      debug("%p ", t);
146
      tm_dump(&t->r);
147
    }
148
  debug("\n");
149
}
150

    
151
void
152
tm_dump_all(void)
153
{
154
  tm_dump_them("Near", &near_timers);
155
  tm_dump_them("Far", &far_timers);
156
}
157

    
158
static inline time_t
159
tm_first_shot(void)
160
{
161
  time_t x = first_far_timer;
162

    
163
  if (!EMPTY_LIST(near_timers))
164
    {
165
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
166
      if (t->expires < x)
167
        x = t->expires;
168
    }
169
  return x;
170
}
171

    
172
static void
173
tm_shot(void)
174
{
175
  timer *t;
176
  node *n, *m;
177

    
178
  if (first_far_timer <= now)
179
    {
180
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
181
      first_far_timer = TIME_INFINITY;
182
      n = HEAD(far_timers);
183
      while (m = n->next)
184
        {
185
          t = SKIP_BACK(timer, n, n);
186
          if (t->expires <= limit)
187
            {
188
              rem_node(n);
189
              tm_insert_near(t);
190
            }
191
          else if (t->expires < first_far_timer)
192
            first_far_timer = t->expires;
193
          n = m;
194
        }
195
    }
196
  while ((n = HEAD(near_timers)) -> next)
197
    {
198
      int delay;
199
      t = SKIP_BACK(timer, n, n);
200
      if (t->expires > now)
201
        break;
202
      rem_node(n);
203
      delay = t->expires - now;
204
      t->expires = 0;
205
      if (t->recurrent)
206
        {
207
          int i = t->recurrent - delay;
208
          if (i < 0)
209
            i = 0;
210
          tm_start(t, i);
211
        }
212
      t->hook(t);
213
    }
214
}
215

    
216
/*
217
 *        Sockets
218
 */
219

    
220
#ifndef SOL_IP
221
#define SOL_IP IPPROTO_IP
222
#endif
223

    
224
static list sock_list;
225

    
226
static void
227
sk_free(resource *r)
228
{
229
  sock *s = (sock *) r;
230

    
231
  if (s->fd >= 0)
232
    rem_node(&s->n);
233
}
234

    
235
static void
236
sk_dump(resource *r)
237
{
238
  sock *s = (sock *) r;
239
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC" };
240

    
241
  debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
242
        sk_type_names[s->type],
243
        s->data,
244
        s->saddr,
245
        s->sport,
246
        s->daddr,
247
        s->dport,
248
        s->tos,
249
        s->ttl,
250
        s->iface ? s->iface->name : "none");
251
}
252

    
253
static struct resclass sk_class = {
254
  "Socket",
255
  sizeof(sock),
256
  sk_free,
257
  sk_dump
258
};
259

    
260
sock *
261
sk_new(pool *p)
262
{
263
  sock *s = ralloc(p, &sk_class);
264
  s->pool = p;
265
  s->data = NULL;
266
  s->saddr = s->daddr = IPA_NONE;
267
  s->sport = s->dport = 0;
268
  s->tos = s->ttl = -1;
269
  s->iface = NULL;
270
  s->rbuf = NULL;
271
  s->rx_hook = NULL;
272
  s->rbsize = 0;
273
  s->tbuf = NULL;
274
  s->tx_hook = NULL;
275
  s->tbsize = 0;
276
  s->err_hook = NULL;
277
  s->fd = -1;
278
  return s;
279
}
280

    
281
#define ERR(x) do { err = x; goto bad; } while(0)
282

    
283
static inline void
284
set_inaddr(struct in_addr *ia, ip_addr a)
285
{
286
  a = ipa_hton(a);
287
  memcpy(&ia->s_addr, &a, sizeof(a));
288
}
289

    
290
void
291
fill_in_sockaddr(struct sockaddr_in *sa, ip_addr a, unsigned port)
292
{
293
  sa->sin_family = AF_INET;
294
  sa->sin_port = htons(port);
295
  set_inaddr(&sa->sin_addr, a);
296
}
297

    
298
void
299
get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port)
300
{
301
  if (sa->sin_family != AF_INET)
302
    bug("get_sockaddr called for wrong address family");
303
  if (port)
304
    *port = ntohs(sa->sin_port);
305
  memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
306
  *a = ipa_ntoh(*a);
307
}
308

    
309
static char *
310
sk_setup(sock *s)
311
{
312
  int fd = s->fd;
313
  int one = 1;
314
  char *err;
315

    
316
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
317
    ERR("fcntl(O_NONBLOCK)");
318
  if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
319
    ERR("IP_TOS");
320
  if (s->ttl >= 0)
321
    {
322
      if (setsockopt(fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
323
        ERR("IP_TTL");
324
      if (setsockopt(fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
325
        ERR("SO_DONTROUTE");
326
    }
327
#ifdef IP_PMTUDISC
328
  if (s->type != SK_TCP_PASSIVE && s->type != SK_TCP_ACTIVE && s->type != SK_MAGIC)
329
    {
330
      int dont = IP_PMTUDISC_DONT;
331
      if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
332
        ERR("IP_PMTUDISC");
333
    }
334
#endif
335
  /* FIXME: Set send/receive buffers? */
336
  /* FIXME: Set keepalive for TCP connections? */
337
  err = NULL;
338
bad:
339
  return err;
340
}
341

    
342
static void
343
sk_alloc_bufs(sock *s)
344
{
345
  if (!s->rbuf && s->rbsize)
346
    s->rbuf = mb_alloc(s->pool, s->rbsize);
347
  s->rpos = s->rbuf;
348
  if (!s->tbuf && s->tbsize)
349
    s->tbuf = mb_alloc(s->pool, s->tbsize);
350
  s->tpos = s->ttx = s->tbuf;
351
}
352

    
353
void
354
sk_tcp_connected(sock *s)
355
{
356
  s->rx_hook(s, 0);
357
  s->type = SK_TCP;
358
  sk_alloc_bufs(s);
359
}
360

    
361
int
362
sk_open(sock *s)
363
{
364
  int fd, e;
365
  struct sockaddr_in sa;
366
  int zero = 0;
367
  int one = 1;
368
  int type = s->type;
369
  int has_src = ipa_nonzero(s->saddr) || s->sport;
370
  int has_dest = ipa_nonzero(s->daddr);
371
  char *err;
372

    
373
  switch (type)
374
    {
375
    case SK_TCP_ACTIVE:
376
    case SK_TCP_PASSIVE:
377
      fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
378
      break;
379
    case SK_UDP:
380
    case SK_UDP_MC:
381
      fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
382
      break;
383
    case SK_IP:
384
    case SK_IP_MC:
385
      fd = socket(PF_INET, SOCK_RAW, s->dport);
386
      break;
387
    case SK_MAGIC:
388
      fd = s->fd;
389
      break;
390
    default:
391
      bug("sk_open() called for invalid sock type %d", type);
392
    }
393
  if (fd < 0)
394
    die("sk_open: socket: %m");
395
  s->fd = fd;
396

    
397
  if (err = sk_setup(s))
398
    goto bad;
399
  switch (type)
400
    {
401
    case SK_UDP:
402
    case SK_IP:
403
      if (s->iface)                        /* It's a broadcast socket */
404
        if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0)
405
          ERR("SO_BROADCAST");
406
      break;
407
    case SK_UDP_MC:
408
    case SK_IP_MC:
409
      {
410
#ifdef HAVE_STRUCT_IP_MREQN
411
        struct ip_mreqn mreq;
412
#define mreq_add mreq
413
        ASSERT(s->iface);
414
        mreq.imr_ifindex = s->iface->index;
415
        set_inaddr(&mreq.imr_address, s->iface->ip);
416
#else
417
        struct in_addr mreq;
418
        struct ip_mreq mreq_add;
419
        ASSERT(s->iface);
420
        set_inaddr(&mreq, s->iface->ip);
421
        mreq_add.imr_interface = mreq;
422
#endif
423
        set_inaddr(&mreq_add.imr_multiaddr, s->daddr);
424
        if (has_dest)
425
          {
426
            if (
427
#ifdef IP_DEFAULT_MULTICAST_TTL
428
                s->ttl != IP_DEFAULT_MULTICAST_TTL &&
429
#endif
430
                setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &s->ttl, sizeof(s->ttl)) < 0)
431
              ERR("IP_MULTICAST_TTL");
432
            if (
433
#ifdef IP_DEFAULT_MULTICAST_LOOP
434
                IP_DEFAULT_MULTICAST_LOOP &&
435
#endif
436
                setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
437
              ERR("IP_MULTICAST_LOOP");
438
            /* This defines where should we send _outgoing_ multicasts */
439
            if (setsockopt(fd, SOL_IP, IP_MULTICAST_IF, &mreq, sizeof(mreq)) < 0)
440
              ERR("IP_MULTICAST_IF");
441
        }
442
      /* And this one sets interface for _receiving_ multicasts from */
443
      if (has_src && setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, &mreq_add, sizeof(mreq_add)) < 0)
444
        ERR("IP_ADD_MEMBERSHIP");
445
      break;
446
      }
447
    }
448
  if (has_src)
449
    {
450
      int port;
451

    
452
      if (type == SK_IP || type == SK_IP_MC)
453
        port = 0;
454
      else
455
        {
456
          port = s->sport;
457
          if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
458
            ERR("SO_REUSEADDR");
459
        }
460
      fill_in_sockaddr(&sa, s->saddr, port);
461
      if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
462
        ERR("bind");
463
    }
464
  fill_in_sockaddr(&sa, s->daddr, s->dport);
465
  switch (type)
466
    {
467
    case SK_TCP_ACTIVE:
468
      if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
469
        sk_tcp_connected(s);
470
      else if (errno != EINTR && errno != EAGAIN)
471
        ERR("connect");
472
      break;
473
    case SK_TCP_PASSIVE:
474
      if (listen(fd, 8))
475
        ERR("listen");
476
      break;
477
    }
478

    
479
  sk_alloc_bufs(s);
480
  add_tail(&sock_list, &s->n);
481
  return 0;
482

    
483
bad:
484
  log(L_ERR "sk_open: %s: %m", err);
485
  close(fd);
486
  s->fd = -1;
487
  return -1;
488
}
489

    
490
static int
491
sk_maybe_write(sock *s)
492
{
493
  int e;
494

    
495
  switch (s->type)
496
    {
497
    case SK_TCP:
498
    case SK_MAGIC:
499
      while (s->ttx != s->tpos)
500
        {
501
          e = write(s->fd, s->ttx, s->tpos - s->ttx);
502
          if (e < 0)
503
            {
504
              if (errno != EINTR && errno != EAGAIN)
505
                {
506
                  log(L_ERR "write: %m");
507
                  s->err_hook(s, errno);
508
                  return -1;
509
                }
510
              return 0;
511
            }
512
          s->ttx += e;
513
        }
514
      s->ttx = s->tpos = s->tbuf;
515
      return 1;
516
    case SK_UDP:
517
    case SK_UDP_MC:
518
    case SK_IP:
519
    case SK_IP_MC:
520
      {
521
        struct sockaddr_in sa;
522

    
523
        if (s->tbuf == s->tpos)
524
          return 1;
525
        fill_in_sockaddr(&sa, s->faddr, s->fport);
526
        e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa));
527
        if (e < 0)
528
          {
529
            if (errno != EINTR && errno != EAGAIN)
530
              {
531
                log(L_ERR "sendto: %m");
532
                s->err_hook(s, errno);
533
                return -1;
534
              }
535
            return 0;
536
          }
537
        s->tpos = s->tbuf;
538
        return 1;
539
      }
540
    default:
541
      bug("sk_maybe_write: unknown socket type %d", s->type);
542
    }
543
}
544

    
545
int
546
sk_send(sock *s, unsigned len)
547
{
548
  s->faddr = s->daddr;
549
  s->fport = s->dport;
550
  s->ttx = s->tbuf;
551
  s->tpos = s->tbuf + len;
552
  return sk_maybe_write(s);
553
}
554

    
555
int
556
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
557
{
558
  s->faddr = addr;
559
  s->fport = port;
560
  s->ttx = s->tbuf;
561
  s->tpos = s->tbuf + len;
562
  return sk_maybe_write(s);
563
}
564

    
565
static int
566
sk_read(sock *s)
567
{
568
  switch (s->type)
569
    {
570
    case SK_TCP_ACTIVE:
571
      {
572
        struct sockaddr_in sa;
573
        fill_in_sockaddr(&sa, s->daddr, s->dport);
574
        if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
575
          sk_tcp_connected(s);
576
        else if (errno != EINTR && errno != EAGAIN)
577
          {
578
            log(L_ERR "connect: %m");
579
            s->err_hook(s, errno);
580
          }
581
        return 0;
582
      }
583
    case SK_TCP_PASSIVE:
584
      {
585
        struct sockaddr_in sa;
586
        int al = sizeof(sa);
587
        int fd = accept(s->fd, (struct sockaddr *) &sa, &al);
588
        if (fd >= 0)
589
          {
590
            sock *t = sk_new(s->pool);
591
            char *err;
592
            t->type = SK_TCP;
593
            t->fd = fd;
594
            add_tail(&sock_list, &t->n);
595
            s->rx_hook(t, 0);
596
            if (err = sk_setup(t))
597
              {
598
                log(L_ERR "Incoming connection: %s: %m", err);
599
                s->err_hook(s, errno);
600
                return 0;
601
              }
602
            sk_alloc_bufs(t);
603
            return 1;
604
          }
605
        else if (errno != EINTR && errno != EAGAIN)
606
          {
607
            log(L_ERR "accept: %m");
608
            s->err_hook(s, errno);
609
          }
610
        return 0;
611
      }
612
    case SK_TCP:
613
      {
614
        int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
615

    
616
        if (c < 0)
617
          {
618
            if (errno != EINTR && errno != EAGAIN)
619
              {
620
                log(L_ERR "read: %m");
621
                s->err_hook(s, errno);
622
              }
623
          }
624
        else if (!c)
625
          s->err_hook(s, 0);
626
        else
627
          {
628
            s->rpos += c;
629
            if (s->rx_hook(s, s->rpos - s->rbuf))
630
              s->rpos = s->rbuf;
631
            return 1;
632
          }
633
        return 0;
634
      }
635
    case SK_MAGIC:
636
      return s->rx_hook(s, 0);
637
    default:
638
      {
639
        struct sockaddr_in sa;
640
        int al = sizeof(sa);
641
        int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al);
642

    
643
        if (e < 0)
644
          {
645
            if (errno != EINTR && errno != EAGAIN)
646
              {
647
                log(L_ERR "recvfrom: %m");
648
                s->err_hook(s, errno);
649
              }
650
            return 0;
651
          }
652
        s->rpos = s->rbuf + e;
653
        get_sockaddr(&sa, &s->faddr, &s->fport);
654
        s->rx_hook(s, e);
655
        return 1;
656
      }
657
    }
658
}
659

    
660
static void
661
sk_write(sock *s)
662
{
663
  while (s->ttx != s->tbuf && sk_maybe_write(s) > 0)
664
    s->tx_hook(s);
665
}
666

    
667
void
668
sk_dump_all(void)
669
{
670
  node *n;
671
  sock *s;
672

    
673
  debug("Open sockets:\n");
674
  WALK_LIST(n, sock_list)
675
    {
676
      s = SKIP_BACK(sock, n, n);
677
      debug("%p ", s);
678
      sk_dump(&s->r);
679
    }
680
  debug("\n");
681
}
682

    
683
#undef ERR
684

    
685
/*
686
 *        Main I/O Loop
687
 */
688

    
689
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
690
volatile int async_dump_flag;
691

    
692
void
693
io_init(void)
694
{
695
  init_list(&near_timers);
696
  init_list(&far_timers);
697
  init_list(&sock_list);
698
  init_list(&global_event_list);
699
  krt_io_init();
700
  now = time(NULL);
701
}
702

    
703
void
704
io_loop(void)
705
{
706
  fd_set rd, wr;
707
  struct timeval timo;
708
  time_t tout;
709
  int hi;
710
  sock *s;
711
  node *n;
712

    
713
  /* FIXME: Use poll() if available */
714

    
715
  FD_ZERO(&rd);
716
  FD_ZERO(&wr);
717
  for(;;)
718
    {
719
      ev_run_list(&global_event_list);
720
      now = time(NULL);
721
      tout = tm_first_shot();
722
      if (tout <= now)
723
        {
724
          tm_shot();
725
          continue;
726
        }
727
      else
728
        {
729
          timo.tv_sec = tout - now;
730
          timo.tv_usec = 0;
731
        }
732

    
733
      hi = 0;
734
      WALK_LIST(n, sock_list)
735
        {
736
          s = SKIP_BACK(sock, n, n);
737
          if (s->rx_hook)
738
            {
739
              FD_SET(s->fd, &rd);
740
              if (s->fd > hi)
741
                hi = s->fd;
742
            }
743
          if (s->tx_hook && s->ttx != s->tpos)
744
            {
745
              FD_SET(s->fd, &wr);
746
              if (s->fd > hi)
747
                hi = s->fd;
748
            }
749
        }
750

    
751
      /*
752
       * Yes, this is racy. But even if the signal comes before this test
753
       * and entering select(), it gets caught on the next timer tick.
754
       */
755

    
756
      if (async_config_flag)
757
        {
758
          async_config();
759
          async_config_flag = 0;
760
          continue;
761
        }
762
      if (async_dump_flag)
763
        {
764
          async_dump();
765
          async_dump_flag = 0;
766
          continue;
767
        }
768
      if (async_shutdown_flag)
769
        {
770
          async_shutdown();
771
          async_shutdown_flag = 0;
772
          continue;
773
        }
774

    
775
      /* And finally enter select() to find active sockets */
776

    
777
      hi = select(hi+1, &rd, &wr, NULL, &timo);
778
      if (hi < 0)
779
        {
780
          if (errno == EINTR || errno == EAGAIN)
781
            continue;
782
          die("select: %m");
783
        }
784
      if (hi)
785
        {
786
          WALK_LIST(n, sock_list)
787
            {
788
              s = SKIP_BACK(sock, n, n);
789
              if (FD_ISSET(s->fd, &rd))
790
                {
791
                  FD_CLR(s->fd, &rd);
792
                  while (sk_read(s))
793
                    ;
794
                }
795
              if (FD_ISSET(s->fd, &wr))
796
                {
797
                  FD_CLR(s->fd, &wr);
798
                  sk_write(s);
799
                }
800
            }
801
        }
802
    }
803
}