Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ e81b440f

History | View | Annotate | Download (30.3 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <time.h>
13
#include <sys/time.h>
14
#include <sys/types.h>
15
#include <sys/socket.h>
16
#include <sys/fcntl.h>
17
#include <sys/un.h>
18
#include <unistd.h>
19
#include <errno.h>
20

    
21
#include "nest/bird.h"
22
#include "lib/lists.h"
23
#include "lib/resource.h"
24
#include "lib/timer.h"
25
#include "lib/socket.h"
26
#include "lib/event.h"
27
#include "lib/string.h"
28
#include "nest/iface.h"
29

    
30
#include "lib/unix.h"
31
#include "lib/sysio.h"
32

    
33
/* Maximum number of calls of tx handler for one socket in one
34
 * select iteration. Should be small enough to not monopolize CPU by
35
 * one protocol instance.
36
 */
37
#define MAX_STEPS 4
38

    
39
/* Maximum number of calls of rx handler for all sockets in one select
40
   iteration. RX callbacks are often much more costly so we limit
41
   this to gen small latencies */
42
#define MAX_RX_STEPS 4
43

    
44
/*
45
 *        Tracked Files
46
 */
47

    
48
struct rfile {
49
  resource r;
50
  FILE *f;
51
};
52

    
53
static void
54
rf_free(resource *r)
55
{
56
  struct rfile *a = (struct rfile *) r;
57

    
58
  fclose(a->f);
59
}
60

    
61
static void
62
rf_dump(resource *r)
63
{
64
  struct rfile *a = (struct rfile *) r;
65

    
66
  debug("(FILE *%p)\n", a->f);
67
}
68

    
69
static struct resclass rf_class = {
70
  "FILE",
71
  sizeof(struct rfile),
72
  rf_free,
73
  rf_dump,
74
  NULL
75
};
76

    
77
void *
78
tracked_fopen(pool *p, char *name, char *mode)
79
{
80
  FILE *f = fopen(name, mode);
81

    
82
  if (f)
83
    {
84
      struct rfile *r = ralloc(p, &rf_class);
85
      r->f = f;
86
    }
87
  return f;
88
}
89

    
90
/**
91
 * DOC: Timers
92
 *
93
 * Timers are resources which represent a wish of a module to call
94
 * a function at the specified time. The platform dependent code
95
 * doesn't guarantee exact timing, only that a timer function
96
 * won't be called before the requested time.
97
 *
98
 * In BIRD, time is represented by values of the &bird_clock_t type
99
 * which are integral numbers interpreted as a relative number of seconds since
100
 * some fixed time point in past. The current time can be read
101
 * from variable @now with reasonable accuracy and is monotonic. There is also
102
 * a current 'absolute' time in variable @now_real reported by OS.
103
 *
104
 * Each timer is described by a &timer structure containing a pointer
105
 * to the handler function (@hook), data private to this function (@data),
106
 * time the function should be called at (@expires, 0 for inactive timers),
107
 * for the other fields see |timer.h|.
108
 */
109

    
110
#define NEAR_TIMER_LIMIT 4
111

    
112
static list near_timers, far_timers;
113
static bird_clock_t first_far_timer = TIME_INFINITY;
114

    
115
bird_clock_t now, now_real;
116

    
117
static void
118
update_times_plain(void)
119
{
120
  bird_clock_t new_time = time(NULL);
121
  int delta = new_time - now_real;
122

    
123
  if ((delta >= 0) && (delta < 60))
124
    now += delta;
125
  else if (now_real != 0)
126
   log(L_WARN "Time jump, delta %d s", delta);
127

    
128
  now_real = new_time;
129
}
130

    
131
static void
132
update_times_gettime(void)
133
{
134
  struct timespec ts;
135
  int rv;
136

    
137
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
138
  if (rv != 0)
139
    die("clock_gettime: %m");
140

    
141
  if (ts.tv_sec != now) {
142
    if (ts.tv_sec < now)
143
      log(L_ERR "Monotonic timer is broken");
144

    
145
    now = ts.tv_sec;
146
    now_real = time(NULL);
147
  }
148
}
149

    
150
static int clock_monotonic_available;
151

    
152
static inline void
153
update_times(void)
154
{
155
  if (clock_monotonic_available)
156
    update_times_gettime();
157
  else
158
    update_times_plain();
159
}
160

    
161
static inline void
162
init_times(void)
163
{
164
 struct timespec ts;
165
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
166
 if (!clock_monotonic_available)
167
   log(L_WARN "Monotonic timer is missing");
168
}
169

    
170

    
171
static void
172
tm_free(resource *r)
173
{
174
  timer *t = (timer *) r;
175

    
176
  tm_stop(t);
177
}
178

    
179
static void
180
tm_dump(resource *r)
181
{
182
  timer *t = (timer *) r;
183

    
184
  debug("(code %p, data %p, ", t->hook, t->data);
185
  if (t->randomize)
186
    debug("rand %d, ", t->randomize);
187
  if (t->recurrent)
188
    debug("recur %d, ", t->recurrent);
189
  if (t->expires)
190
    debug("expires in %d sec)\n", t->expires - now);
191
  else
192
    debug("inactive)\n");
193
}
194

    
195
static struct resclass tm_class = {
196
  "Timer",
197
  sizeof(timer),
198
  tm_free,
199
  tm_dump,
200
  NULL
201
};
202

    
203
/**
204
 * tm_new - create a timer
205
 * @p: pool
206
 *
207
 * This function creates a new timer resource and returns
208
 * a pointer to it. To use the timer, you need to fill in
209
 * the structure fields and call tm_start() to start timing.
210
 */
211
timer *
212
tm_new(pool *p)
213
{
214
  timer *t = ralloc(p, &tm_class);
215
  return t;
216
}
217

    
218
static inline void
219
tm_insert_near(timer *t)
220
{
221
  node *n = HEAD(near_timers);
222

    
223
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
224
    n = n->next;
225
  insert_node(&t->n, n->prev);
226
}
227

    
228
/**
229
 * tm_start - start a timer
230
 * @t: timer
231
 * @after: number of seconds the timer should be run after
232
 *
233
 * This function schedules the hook function of the timer to
234
 * be called after @after seconds. If the timer has been already
235
 * started, it's @expire time is replaced by the new value.
236
 *
237
 * You can have set the @randomize field of @t, the timeout
238
 * will be increased by a random number of seconds chosen
239
 * uniformly from range 0 .. @randomize.
240
 *
241
 * You can call tm_start() from the handler function of the timer
242
 * to request another run of the timer. Also, you can set the @recurrent
243
 * field to have the timer re-added automatically with the same timeout.
244
 */
245
void
246
tm_start(timer *t, unsigned after)
247
{
248
  bird_clock_t when;
249

    
250
  if (t->randomize)
251
    after += random() % (t->randomize + 1);
252
  when = now + after;
253
  if (t->expires == when)
254
    return;
255
  if (t->expires)
256
    rem_node(&t->n);
257
  t->expires = when;
258
  if (after <= NEAR_TIMER_LIMIT)
259
    tm_insert_near(t);
260
  else
261
    {
262
      if (!first_far_timer || first_far_timer > when)
263
        first_far_timer = when;
264
      add_tail(&far_timers, &t->n);
265
    }
266
}
267

    
268
/**
269
 * tm_stop - stop a timer
270
 * @t: timer
271
 *
272
 * This function stops a timer. If the timer is already stopped,
273
 * nothing happens.
274
 */
275
void
276
tm_stop(timer *t)
277
{
278
  if (t->expires)
279
    {
280
      rem_node(&t->n);
281
      t->expires = 0;
282
    }
283
}
284

    
285
static void
286
tm_dump_them(char *name, list *l)
287
{
288
  node *n;
289
  timer *t;
290

    
291
  debug("%s timers:\n", name);
292
  WALK_LIST(n, *l)
293
    {
294
      t = SKIP_BACK(timer, n, n);
295
      debug("%p ", t);
296
      tm_dump(&t->r);
297
    }
298
  debug("\n");
299
}
300

    
301
void
302
tm_dump_all(void)
303
{
304
  tm_dump_them("Near", &near_timers);
305
  tm_dump_them("Far", &far_timers);
306
}
307

    
308
static inline time_t
309
tm_first_shot(void)
310
{
311
  time_t x = first_far_timer;
312

    
313
  if (!EMPTY_LIST(near_timers))
314
    {
315
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
316
      if (t->expires < x)
317
        x = t->expires;
318
    }
319
  return x;
320
}
321

    
322
static void
323
tm_shot(void)
324
{
325
  timer *t;
326
  node *n, *m;
327

    
328
  if (first_far_timer <= now)
329
    {
330
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
331
      first_far_timer = TIME_INFINITY;
332
      n = HEAD(far_timers);
333
      while (m = n->next)
334
        {
335
          t = SKIP_BACK(timer, n, n);
336
          if (t->expires <= limit)
337
            {
338
              rem_node(n);
339
              tm_insert_near(t);
340
            }
341
          else if (t->expires < first_far_timer)
342
            first_far_timer = t->expires;
343
          n = m;
344
        }
345
    }
346
  while ((n = HEAD(near_timers)) -> next)
347
    {
348
      int delay;
349
      t = SKIP_BACK(timer, n, n);
350
      if (t->expires > now)
351
        break;
352
      rem_node(n);
353
      delay = t->expires - now;
354
      t->expires = 0;
355
      if (t->recurrent)
356
        {
357
          int i = t->recurrent - delay;
358
          if (i < 0)
359
            i = 0;
360
          tm_start(t, i);
361
        }
362
      t->hook(t);
363
    }
364
}
365

    
366
/**
367
 * tm_parse_datetime - parse a date and time
368
 * @x: datetime string
369
 *
370
 * tm_parse_datetime() takes a textual representation of
371
 * a date and time (dd-mm-yyyy hh:mm:ss)
372
 * and converts it to the corresponding value of type &bird_clock_t.
373
 */
374
bird_clock_t
375
tm_parse_datetime(char *x)
376
{
377
  struct tm tm;
378
  int n;
379
  time_t t;
380

    
381
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
382
    return tm_parse_date(x);
383
  tm.tm_mon--;
384
  tm.tm_year -= 1900;
385
  t = mktime(&tm);
386
  if (t == (time_t) -1)
387
    return 0;
388
  return t;
389
}
390
/**
391
 * tm_parse_date - parse a date
392
 * @x: date string
393
 *
394
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
395
 * and converts it to the corresponding value of type &bird_clock_t.
396
 */
397
bird_clock_t
398
tm_parse_date(char *x)
399
{
400
  struct tm tm;
401
  int n;
402
  time_t t;
403

    
404
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
405
    return 0;
406
  tm.tm_mon--;
407
  tm.tm_year -= 1900;
408
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
409
  t = mktime(&tm);
410
  if (t == (time_t) -1)
411
    return 0;
412
  return t;
413
}
414

    
415
static void
416
tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
417
{
418
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
419
                                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
420

    
421
  if (delta < 20*3600)
422
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
423
  else if (delta < 360*86400)
424
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
425
  else
426
    bsprintf(x, "%d", tm->tm_year+1900);
427
}
428

    
429
#include "conf/conf.h"
430

    
431
/**
432
 * tm_format_datetime - convert date and time to textual representation
433
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
434
 * @t: time
435
 *
436
 * This function formats the given relative time value @t to a textual
437
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
438
 */
439
void
440
tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
441
{
442
  const char *fmt_used;
443
  struct tm *tm;
444
  bird_clock_t delta = now - t;
445
  t = now_real - delta;
446
  tm = localtime(&t);
447

    
448
  if (fmt_spec->fmt1 == NULL)
449
    return tm_format_reltime(x, tm, delta);
450

    
451
  if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
452
    fmt_used = fmt_spec->fmt1;
453
  else
454
    fmt_used = fmt_spec->fmt2;
455

    
456
  int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
457
  if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
458
    strcpy(x, "<too-long>");
459
}
460

    
461
/**
462
 * DOC: Sockets
463
 *
464
 * Socket resources represent network connections. Their data structure (&socket)
465
 * contains a lot of fields defining the exact type of the socket, the local and
466
 * remote addresses and ports, pointers to socket buffers and finally pointers to
467
 * hook functions to be called when new data have arrived to the receive buffer
468
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
469
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
470
 *
471
 * Freeing of sockets from inside socket hooks is perfectly safe.
472
 */
473

    
474
#ifndef SOL_IP
475
#define SOL_IP IPPROTO_IP
476
#endif
477

    
478
#ifndef SOL_IPV6
479
#define SOL_IPV6 IPPROTO_IPV6
480
#endif
481

    
482
static list sock_list;
483
static struct birdsock *current_sock;
484
static struct birdsock *stored_sock;
485
static int sock_recalc_fdsets_p;
486

    
487
static inline sock *
488
sk_next(sock *s)
489
{
490
  if (!s->n.next->next)
491
    return NULL;
492
  else
493
    return SKIP_BACK(sock, n, s->n.next);
494
}
495

    
496
static void
497
sk_alloc_bufs(sock *s)
498
{
499
  if (!s->rbuf && s->rbsize)
500
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
501
  s->rpos = s->rbuf;
502
  if (!s->tbuf && s->tbsize)
503
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
504
  s->tpos = s->ttx = s->tbuf;
505
}
506

    
507
static void
508
sk_free_bufs(sock *s)
509
{
510
  if (s->rbuf_alloc)
511
    {
512
      xfree(s->rbuf_alloc);
513
      s->rbuf = s->rbuf_alloc = NULL;
514
    }
515
  if (s->tbuf_alloc)
516
    {
517
      xfree(s->tbuf_alloc);
518
      s->tbuf = s->tbuf_alloc = NULL;
519
    }
520
}
521

    
522
static void
523
sk_free(resource *r)
524
{
525
  sock *s = (sock *) r;
526

    
527
  sk_free_bufs(s);
528
  if (s->fd >= 0)
529
    {
530
      close(s->fd);
531
      if (s == current_sock)
532
        current_sock = sk_next(s);
533
      if (s == stored_sock)
534
        stored_sock = sk_next(s);
535
      rem_node(&s->n);
536
      sock_recalc_fdsets_p = 1;
537
    }
538
}
539

    
540
void
541
sk_reallocate(sock *s)
542
{
543
  sk_free_bufs(s);
544
  sk_alloc_bufs(s);
545
}
546

    
547
static void
548
sk_dump(resource *r)
549
{
550
  sock *s = (sock *) r;
551
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
552

    
553
  debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
554
        sk_type_names[s->type],
555
        s->data,
556
        s->saddr,
557
        s->sport,
558
        s->daddr,
559
        s->dport,
560
        s->tos,
561
        s->ttl,
562
        s->iface ? s->iface->name : "none");
563
}
564

    
565
static struct resclass sk_class = {
566
  "Socket",
567
  sizeof(sock),
568
  sk_free,
569
  sk_dump,
570
  NULL
571
};
572

    
573
/**
574
 * sk_new - create a socket
575
 * @p: pool
576
 *
577
 * This function creates a new socket resource. If you want to use it,
578
 * you need to fill in all the required fields of the structure and
579
 * call sk_open() to do the actual opening of the socket.
580
 */
581
sock *
582
sk_new(pool *p)
583
{
584
  sock *s = ralloc(p, &sk_class);
585
  s->pool = p;
586
  // s->saddr = s->daddr = IPA_NONE;
587
  s->tos = s->ttl = -1;
588
  s->fd = -1;
589
  return s;
590
}
591

    
592
static void
593
sk_insert(sock *s)
594
{
595
  add_tail(&sock_list, &s->n);
596
  sock_recalc_fdsets_p = 1;
597
}
598

    
599
#ifdef IPV6
600

    
601
void
602
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
603
{
604
  memset (sa, 0, sizeof (struct sockaddr_in6));
605
  sa->sin6_family = AF_INET6;
606
  sa->sin6_port = htons(port);
607
  sa->sin6_flowinfo = 0;
608
#ifdef HAVE_SIN_LEN
609
  sa->sin6_len = sizeof(struct sockaddr_in6);
610
#endif
611
  set_inaddr(&sa->sin6_addr, a);
612
}
613

    
614
static inline void
615
fill_in_sockifa(sockaddr *sa, struct iface *ifa)
616
{
617
  sa->sin6_scope_id = ifa ? ifa->index : 0;
618
}
619

    
620
void
621
get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check)
622
{
623
  if (check && sa->sin6_family != AF_INET6)
624
    bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
625
  if (port)
626
    *port = ntohs(sa->sin6_port);
627
  memcpy(a, &sa->sin6_addr, sizeof(*a));
628
  ipa_ntoh(*a);
629
}
630

    
631
#else
632

    
633
void
634
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
635
{
636
  memset (sa, 0, sizeof (struct sockaddr_in));
637
  sa->sin_family = AF_INET;
638
  sa->sin_port = htons(port);
639
#ifdef HAVE_SIN_LEN
640
  sa->sin_len = sizeof(struct sockaddr_in);
641
#endif
642
  set_inaddr(&sa->sin_addr, a);
643
}
644

    
645
static inline void
646
fill_in_sockifa(sockaddr *sa UNUSED, struct iface *ifa UNUSED)
647
{
648
}
649

    
650
void
651
get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check)
652
{
653
  if (check && sa->sin_family != AF_INET)
654
    bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
655
  if (port)
656
    *port = ntohs(sa->sin_port);
657
  memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
658
  ipa_ntoh(*a);
659
}
660

    
661
#endif
662

    
663
static char *
664
sk_set_ttl_int(sock *s)
665
{
666
  int one = 1;
667
#ifdef IPV6
668
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
669
    return "IPV6_UNICAST_HOPS";
670
#else
671
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
672
    return "IP_TTL";
673
#ifdef CONFIG_UNIX_DONTROUTE
674
  if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
675
    return "SO_DONTROUTE";
676
#endif 
677
#endif
678
  return NULL;
679
}
680

    
681
#define ERR(x) do { err = x; goto bad; } while(0)
682
#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
683

    
684
static char *
685
sk_setup(sock *s)
686
{
687
  int fd = s->fd;
688
  char *err;
689

    
690
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
691
    ERR("fcntl(O_NONBLOCK)");
692
  if (s->type == SK_UNIX)
693
    return NULL;
694
#ifndef IPV6
695
  if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
696
    WARN("IP_TOS");
697
#endif
698

    
699
#ifdef IPV6
700
  int v = 1;
701
  if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
702
    WARN("IPV6_V6ONLY");
703
#endif
704

    
705
  if (s->ttl >= 0)
706
    err = sk_set_ttl_int(s);
707
  else
708
    err = NULL;
709

    
710
bad:
711
  return err;
712
}
713

    
714
/**
715
 * sk_set_ttl - set TTL for given socket.
716
 * @s: socket
717
 * @ttl: TTL value
718
 *
719
 * Set TTL for already opened connections when TTL was not set before.
720
 * Useful for accepted connections when different ones should have 
721
 * different TTL.
722
 *
723
 * Result: 0 for success, -1 for an error.
724
 */
725

    
726
int
727
sk_set_ttl(sock *s, int ttl)
728
{
729
  char *err;
730

    
731
  s->ttl = ttl;
732
  if (err = sk_set_ttl_int(s))
733
    log(L_ERR "sk_set_ttl: %s: %m", err);
734

    
735
  return (err ? -1 : 0);
736
}
737

    
738

    
739
/**
740
 * sk_set_md5_auth - add / remove MD5 security association for given socket.
741
 * @s: socket
742
 * @a: IP address of the other side
743
 * @passwd: password used for MD5 authentication
744
 *
745
 * In TCP MD5 handling code in kernel, there is a set of pairs
746
 * (address, password) used to choose password according to
747
 * address of the other side. This function is useful for
748
 * listening socket, for active sockets it is enough to set
749
 * s->password field.
750
 *
751
 * When called with passwd != NULL, the new pair is added,
752
 * When called with passwd == NULL, the existing pair is removed.
753
 *
754
 * Result: 0 for success, -1 for an error.
755
 */
756

    
757
int
758
sk_set_md5_auth(sock *s, ip_addr a, char *passwd)
759
{
760
  sockaddr sa;
761
  fill_in_sockaddr(&sa, a, 0);
762
  return sk_set_md5_auth_int(s, &sa, passwd);
763
}
764

    
765
int
766
sk_set_broadcast(sock *s, int enable)
767
{
768
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)) < 0)
769
    {
770
      log(L_ERR "sk_set_broadcast: SO_BROADCAST: %m");
771
      return -1;
772
    }
773

    
774
  return 0;
775
}
776

    
777

    
778
#ifdef IPV6
779

    
780
int
781
sk_set_ipv6_checksum(sock *s, int offset)
782
{
783
  if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
784
    {
785
      log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
786
      return -1;
787
    }
788

    
789
  return 0;
790
}
791

    
792
int
793
sk_setup_multicast(sock *s)
794
{
795
  char *err;
796
  int zero = 0;
797
  int index;
798

    
799
  ASSERT(s->iface && s->iface->addr);
800

    
801
  index = s->iface->index;
802
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
803
    ERR("IPV6_MULTICAST_HOPS");
804
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
805
    ERR("IPV6_MULTICAST_LOOP");
806
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
807
    ERR("IPV6_MULTICAST_IF");
808

    
809
  return 0;
810

    
811
bad:
812
  log(L_ERR "sk_setup_multicast: %s: %m", err);
813
  return -1;
814
}
815

    
816
int
817
sk_join_group(sock *s, ip_addr maddr)
818
{
819
  struct ipv6_mreq mreq;
820
        
821
  set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
822

    
823
#ifdef CONFIG_IPV6_GLIBC_20
824
  mreq.ipv6mr_ifindex = s->iface->index;
825
#else
826
  mreq.ipv6mr_interface = s->iface->index;
827
#endif
828

    
829
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0)
830
    {
831
      log(L_ERR "sk_join_group: IPV6_JOIN_GROUP: %m");
832
      return -1;
833
    }
834

    
835
  return 0;
836
}
837

    
838
int
839
sk_leave_group(sock *s, ip_addr maddr)
840
{
841
  struct ipv6_mreq mreq;
842
        
843
  set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
844

    
845
#ifdef CONFIG_IPV6_GLIBC_20
846
  mreq.ipv6mr_ifindex = s->iface->index;
847
#else
848
  mreq.ipv6mr_interface = s->iface->index;
849
#endif
850

    
851
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0)
852
    {
853
      log(L_ERR "sk_leave_group: IPV6_LEAVE_GROUP: %m");
854
      return -1;
855
    }
856

    
857
  return 0;
858
}
859

    
860
#else /* IPV4 */
861

    
862
int
863
sk_setup_multicast(sock *s)
864
{
865
  char *err;
866

    
867
  ASSERT(s->iface && s->iface->addr);
868

    
869
  if (err = sysio_setup_multicast(s))
870
    {
871
      log(L_ERR "sk_setup_multicast: %s: %m", err);
872
      return -1;
873
    }
874

    
875
  return 0;
876
}
877

    
878
int
879
sk_join_group(sock *s, ip_addr maddr)
880
{
881
 char *err;
882

    
883
 if (err = sysio_join_group(s, maddr))
884
    {
885
      log(L_ERR "sk_join_group: %s: %m", err);
886
      return -1;
887
    }
888

    
889
  return 0;
890
}
891

    
892
int
893
sk_leave_group(sock *s, ip_addr maddr)
894
{
895
 char *err;
896

    
897
 if (err = sysio_leave_group(s, maddr))
898
    {
899
      log(L_ERR "sk_leave_group: %s: %m", err);
900
      return -1;
901
    }
902

    
903
  return 0;
904
}
905

    
906
#endif 
907

    
908

    
909
static void
910
sk_tcp_connected(sock *s)
911
{
912
  s->type = SK_TCP;
913
  sk_alloc_bufs(s);
914
  s->tx_hook(s);
915
}
916

    
917
static int
918
sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
919
{
920
  int fd = accept(s->fd, sa, &al);
921
  if (fd >= 0)
922
    {
923
      sock *t = sk_new(s->pool);
924
      char *err;
925
      t->type = type;
926
      t->fd = fd;
927
      t->ttl = s->ttl;
928
      t->tos = s->tos;
929
      t->rbsize = s->rbsize;
930
      t->tbsize = s->tbsize;
931
      if (type == SK_TCP)
932
        {
933
          sockaddr lsa;
934
          int lsa_len = sizeof(lsa);
935
          if (getsockname(fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
936
            get_sockaddr(&lsa, &t->saddr, &t->sport, 1);
937

    
938
          get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1);
939
        }
940
      sk_insert(t);
941
      if (err = sk_setup(t))
942
        {
943
          log(L_ERR "Incoming connection: %s: %m", err);
944
          rfree(t);
945
          return 1;
946
        }
947
      sk_alloc_bufs(t);
948
      s->rx_hook(t, 0);
949
      return 1;
950
    }
951
  else if (errno != EINTR && errno != EAGAIN)
952
    {
953
      s->err_hook(s, errno);
954
    }
955
  return 0;
956
}
957

    
958
/**
959
 * sk_open - open a socket
960
 * @s: socket
961
 *
962
 * This function takes a socket resource created by sk_new() and
963
 * initialized by the user and binds a corresponding network connection
964
 * to it.
965
 *
966
 * Result: 0 for success, -1 for an error.
967
 */
968
int
969
sk_open(sock *s)
970
{
971
  int fd;
972
  sockaddr sa;
973
  int one = 1;
974
  int type = s->type;
975
  int has_src = ipa_nonzero(s->saddr) || s->sport;
976
  char *err;
977

    
978
  switch (type)
979
    {
980
    case SK_TCP_ACTIVE:
981
      s->ttx = "";                        /* Force s->ttx != s->tpos */
982
      /* Fall thru */
983
    case SK_TCP_PASSIVE:
984
      fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
985
      break;
986
    case SK_UDP:
987
      fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
988
      break;
989
    case SK_IP:
990
      fd = socket(BIRD_PF, SOCK_RAW, s->dport);
991
      break;
992
    case SK_MAGIC:
993
      fd = s->fd;
994
      break;
995
    default:
996
      bug("sk_open() called for invalid sock type %d", type);
997
    }
998
  if (fd < 0)
999
    die("sk_open: socket: %m");
1000
  s->fd = fd;
1001

    
1002
  if (err = sk_setup(s))
1003
    goto bad;
1004

    
1005
  if (has_src)
1006
    {
1007
      int port;
1008

    
1009
      if (type == SK_IP)
1010
        port = 0;
1011
      else
1012
        {
1013
          port = s->sport;
1014
          if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
1015
            ERR("SO_REUSEADDR");
1016
        }
1017
      fill_in_sockaddr(&sa, s->saddr, port);
1018
      fill_in_sockifa(&sa, s->iface);
1019
      if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1020
        ERR("bind");
1021
    }
1022
  fill_in_sockaddr(&sa, s->daddr, s->dport);
1023

    
1024
  if (s->password)
1025
    {
1026
      int rv = sk_set_md5_auth_int(s, &sa, s->password);
1027
      if (rv < 0)
1028
        goto bad_no_log;
1029
    }
1030

    
1031
  switch (type)
1032
    {
1033
    case SK_TCP_ACTIVE:
1034
      if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
1035
        sk_tcp_connected(s);
1036
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1037
               errno != ECONNREFUSED && errno != EHOSTUNREACH)
1038
        ERR("connect");
1039
      break;
1040
    case SK_TCP_PASSIVE:
1041
      if (listen(fd, 8))
1042
        ERR("listen");
1043
      break;
1044
    case SK_MAGIC:
1045
      break;
1046
    default:
1047
      sk_alloc_bufs(s);
1048
#ifdef IPV6
1049
#ifdef IPV6_MTU_DISCOVER
1050
      {
1051
        int dont = IPV6_PMTUDISC_DONT;
1052
        if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
1053
          ERR("IPV6_MTU_DISCOVER");
1054
      }
1055
#endif
1056
#else
1057
#ifdef IP_PMTUDISC
1058
      {
1059
        int dont = IP_PMTUDISC_DONT;
1060
        if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
1061
          ERR("IP_PMTUDISC");
1062
      }
1063
#endif
1064
#endif
1065
    }
1066

    
1067
  sk_insert(s);
1068
  return 0;
1069

    
1070
bad:
1071
  log(L_ERR "sk_open: %s: %m", err);
1072
bad_no_log:
1073
  close(fd);
1074
  s->fd = -1;
1075
  return -1;
1076
}
1077

    
1078
int
1079
sk_open_unix(sock *s, char *name)
1080
{
1081
  int fd;
1082
  struct sockaddr_un sa;
1083
  char *err;
1084

    
1085
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1086
  if (fd < 0)
1087
    die("sk_open_unix: socket: %m");
1088
  s->fd = fd;
1089
  if (err = sk_setup(s))
1090
    goto bad;
1091
  unlink(name);
1092
 
1093
  if (strlen(name) >= sizeof(sa.sun_path))
1094
    die("sk_open_unix: path too long");
1095

    
1096
  sa.sun_family = AF_UNIX;
1097
  strcpy(sa.sun_path, name);
1098
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1099
    ERR("bind");
1100
  if (listen(fd, 8))
1101
    ERR("listen");
1102
  sk_insert(s);
1103
  return 0;
1104

    
1105
bad:
1106
  log(L_ERR "sk_open_unix: %s: %m", err);
1107
  close(fd);
1108
  s->fd = -1;
1109
  return -1;
1110
}
1111

    
1112
static int
1113
sk_maybe_write(sock *s)
1114
{
1115
  int e;
1116

    
1117
  switch (s->type)
1118
    {
1119
    case SK_TCP:
1120
    case SK_MAGIC:
1121
    case SK_UNIX:
1122
      while (s->ttx != s->tpos)
1123
        {
1124
          e = write(s->fd, s->ttx, s->tpos - s->ttx);
1125
          if (e < 0)
1126
            {
1127
              if (errno != EINTR && errno != EAGAIN)
1128
                {
1129
                  s->ttx = s->tpos;        /* empty tx buffer */
1130
                  s->err_hook(s, errno);
1131
                  return -1;
1132
                }
1133
              return 0;
1134
            }
1135
          s->ttx += e;
1136
        }
1137
      s->ttx = s->tpos = s->tbuf;
1138
      return 1;
1139
    case SK_UDP:
1140
    case SK_IP:
1141
      {
1142
        sockaddr sa;
1143

    
1144
        if (s->tbuf == s->tpos)
1145
          return 1;
1146

    
1147
        fill_in_sockaddr(&sa, s->faddr, s->fport);
1148
        fill_in_sockifa(&sa, s->iface);
1149
        e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa));
1150
        if (e < 0)
1151
          {
1152
            if (errno != EINTR && errno != EAGAIN)
1153
              {
1154
                s->ttx = s->tpos;        /* empty tx buffer */
1155
                s->err_hook(s, errno);
1156
                return -1;
1157
              }
1158
            return 0;
1159
          }
1160
        s->tpos = s->tbuf;
1161
        return 1;
1162
      }
1163
    default:
1164
      bug("sk_maybe_write: unknown socket type %d", s->type);
1165
    }
1166
}
1167

    
1168
int
1169
sk_rx_ready(sock *s)
1170
{
1171
  fd_set rd, wr;
1172
  struct timeval timo;
1173
  int rv;
1174

    
1175
  FD_ZERO(&rd);
1176
  FD_ZERO(&wr);
1177
  FD_SET(s->fd, &rd);
1178

    
1179
  timo.tv_sec = 0;
1180
  timo.tv_usec = 0;
1181

    
1182
 redo:
1183
  rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1184
  
1185
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1186
    goto redo;
1187

    
1188
  return rv;
1189
}
1190

    
1191
/**
1192
 * sk_send - send data to a socket
1193
 * @s: socket
1194
 * @len: number of bytes to send
1195
 *
1196
 * This function sends @len bytes of data prepared in the
1197
 * transmit buffer of the socket @s to the network connection.
1198
 * If the packet can be sent immediately, it does so and returns
1199
 * 1, else it queues the packet for later processing, returns 0
1200
 * and calls the @tx_hook of the socket when the tranmission
1201
 * takes place.
1202
 */
1203
int
1204
sk_send(sock *s, unsigned len)
1205
{
1206
  s->faddr = s->daddr;
1207
  s->fport = s->dport;
1208
  s->ttx = s->tbuf;
1209
  s->tpos = s->tbuf + len;
1210
  return sk_maybe_write(s);
1211
}
1212

    
1213
/**
1214
 * sk_send_to - send data to a specific destination
1215
 * @s: socket
1216
 * @len: number of bytes to send
1217
 * @addr: IP address to send the packet to
1218
 * @port: port to send the packet to
1219
 *
1220
 * This is a sk_send() replacement for connection-less packet sockets
1221
 * which allows destination of the packet to be chosen dynamically.
1222
 */
1223
int
1224
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1225
{
1226
  s->faddr = addr;
1227
  s->fport = port;
1228
  s->ttx = s->tbuf;
1229
  s->tpos = s->tbuf + len;
1230
  return sk_maybe_write(s);
1231
}
1232

    
1233
static int
1234
sk_read(sock *s)
1235
{
1236
  switch (s->type)
1237
    {
1238
    case SK_TCP_PASSIVE:
1239
      {
1240
        sockaddr sa;
1241
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1242
      }
1243
    case SK_UNIX_PASSIVE:
1244
      {
1245
        struct sockaddr_un sa;
1246
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
1247
      }
1248
    case SK_TCP:
1249
    case SK_UNIX:
1250
      {
1251
        int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1252

    
1253
        if (c < 0)
1254
          {
1255
            if (errno != EINTR && errno != EAGAIN)
1256
              s->err_hook(s, errno);
1257
          }
1258
        else if (!c)
1259
          s->err_hook(s, 0);
1260
        else
1261
          {
1262
            s->rpos += c;
1263
            if (s->rx_hook(s, s->rpos - s->rbuf))
1264
              {
1265
                /* We need to be careful since the socket could have been deleted by the hook */
1266
                if (current_sock == s)
1267
                  s->rpos = s->rbuf;
1268
              }
1269
            return 1;
1270
          }
1271
        return 0;
1272
      }
1273
    case SK_MAGIC:
1274
      return s->rx_hook(s, 0);
1275
    default:
1276
      {
1277
        sockaddr sa;
1278
        int al = sizeof(sa);
1279
        int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al);
1280

    
1281
        if (e < 0)
1282
          {
1283
            if (errno != EINTR && errno != EAGAIN)
1284
              s->err_hook(s, errno);
1285
            return 0;
1286
          }
1287
        s->rpos = s->rbuf + e;
1288
        get_sockaddr(&sa, &s->faddr, &s->fport, 1);
1289
        s->rx_hook(s, e);
1290
        return 1;
1291
      }
1292
    }
1293
}
1294

    
1295
static int
1296
sk_write(sock *s)
1297
{
1298
  switch (s->type)
1299
    {
1300
    case SK_TCP_ACTIVE:
1301
      {
1302
        sockaddr sa;
1303
        fill_in_sockaddr(&sa, s->daddr, s->dport);
1304
        if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
1305
          sk_tcp_connected(s);
1306
        else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1307
          s->err_hook(s, errno);
1308
        return 0;
1309
      }
1310
    default:
1311
      if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1312
        {
1313
          s->tx_hook(s);
1314
          return 1;
1315
        }
1316
      return 0;
1317
    }
1318
}
1319

    
1320
void
1321
sk_dump_all(void)
1322
{
1323
  node *n;
1324
  sock *s;
1325

    
1326
  debug("Open sockets:\n");
1327
  WALK_LIST(n, sock_list)
1328
    {
1329
      s = SKIP_BACK(sock, n, n);
1330
      debug("%p ", s);
1331
      sk_dump(&s->r);
1332
    }
1333
  debug("\n");
1334
}
1335

    
1336
#undef ERR
1337
#undef WARN
1338

    
1339
/*
1340
 *        Main I/O Loop
1341
 */
1342

    
1343
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
1344
volatile int async_dump_flag;
1345

    
1346
void
1347
io_init(void)
1348
{
1349
  init_list(&near_timers);
1350
  init_list(&far_timers);
1351
  init_list(&sock_list);
1352
  init_list(&global_event_list);
1353
  krt_io_init();
1354
  init_times();
1355
  update_times();
1356
  srandom((int) now_real);
1357
}
1358

    
1359
static int short_loops = 0;
1360
#define SHORT_LOOP_MAX 10
1361

    
1362
void
1363
io_loop(void)
1364
{
1365
  fd_set rd, wr;
1366
  struct timeval timo;
1367
  time_t tout;
1368
  int hi, events;
1369
  sock *s;
1370
  node *n;
1371

    
1372
  sock_recalc_fdsets_p = 1;
1373
  for(;;)
1374
    {
1375
      events = ev_run_list(&global_event_list);
1376
      update_times();
1377
      tout = tm_first_shot();
1378
      if (tout <= now)
1379
        {
1380
          tm_shot();
1381
          continue;
1382
        }
1383
      timo.tv_sec = events ? 0 : tout - now;
1384
      timo.tv_usec = 0;
1385

    
1386
      if (sock_recalc_fdsets_p)
1387
        {
1388
          sock_recalc_fdsets_p = 0;
1389
          FD_ZERO(&rd);
1390
          FD_ZERO(&wr);
1391
        }
1392

    
1393
      hi = 0;
1394
      WALK_LIST(n, sock_list)
1395
        {
1396
          s = SKIP_BACK(sock, n, n);
1397
          if (s->rx_hook)
1398
            {
1399
              FD_SET(s->fd, &rd);
1400
              if (s->fd > hi)
1401
                hi = s->fd;
1402
            }
1403
          else
1404
            FD_CLR(s->fd, &rd);
1405
          if (s->tx_hook && s->ttx != s->tpos)
1406
            {
1407
              FD_SET(s->fd, &wr);
1408
              if (s->fd > hi)
1409
                hi = s->fd;
1410
            }
1411
          else
1412
            FD_CLR(s->fd, &wr);
1413
        }
1414

    
1415
      /*
1416
       * Yes, this is racy. But even if the signal comes before this test
1417
       * and entering select(), it gets caught on the next timer tick.
1418
       */
1419

    
1420
      if (async_config_flag)
1421
        {
1422
          async_config();
1423
          async_config_flag = 0;
1424
          continue;
1425
        }
1426
      if (async_dump_flag)
1427
        {
1428
          async_dump();
1429
          async_dump_flag = 0;
1430
          continue;
1431
        }
1432
      if (async_shutdown_flag)
1433
        {
1434
          async_shutdown();
1435
          async_shutdown_flag = 0;
1436
          continue;
1437
        }
1438

    
1439
      /* And finally enter select() to find active sockets */
1440
      hi = select(hi+1, &rd, &wr, NULL, &timo);
1441

    
1442
      if (hi < 0)
1443
        {
1444
          if (errno == EINTR || errno == EAGAIN)
1445
            continue;
1446
          die("select: %m");
1447
        }
1448
      if (hi)
1449
        {
1450
          /* guaranteed to be non-empty */
1451
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1452

    
1453
          while (current_sock)
1454
            {
1455
              sock *s = current_sock;
1456
              int e;
1457
              int steps;
1458

    
1459
              steps = MAX_STEPS;
1460
              if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1461
                do
1462
                  {
1463
                    steps--;
1464
                    e = sk_read(s);
1465
                    if (s != current_sock)
1466
                      goto next;
1467
                  }
1468
                while (e && s->rx_hook && steps);
1469

    
1470
              steps = MAX_STEPS;
1471
              if (FD_ISSET(s->fd, &wr))
1472
                do
1473
                  {
1474
                    steps--;
1475
                    e = sk_write(s);
1476
                    if (s != current_sock)
1477
                      goto next;
1478
                  }
1479
                while (e && steps);
1480
              current_sock = sk_next(s);
1481
            next: ;
1482
            }
1483

    
1484
          short_loops++;
1485
          if (events && (short_loops < SHORT_LOOP_MAX))
1486
            continue;
1487
          short_loops = 0;
1488

    
1489
          int count = 0;
1490
          current_sock = stored_sock;
1491
          if (current_sock == NULL)
1492
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1493

    
1494
          while (current_sock && count < MAX_RX_STEPS)
1495
            {
1496
              sock *s = current_sock;
1497
              int e;
1498

    
1499
              if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1500
                {
1501
                  count++;
1502
                  e = sk_read(s);
1503
                  if (s != current_sock)
1504
                      goto next2;
1505
                }
1506
              current_sock = sk_next(s);
1507
            next2: ;
1508
            }
1509

    
1510
          stored_sock = current_sock;
1511
        }
1512
    }
1513
}
1514

    
1515
void
1516
test_old_bird(char *path)
1517
{
1518
  int fd;
1519
  struct sockaddr_un sa;
1520

    
1521
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1522

    
1523
  if (fd < 0)
1524
    die("Cannot create socket: %m");
1525
  bzero(&sa, sizeof(sa));
1526
  sa.sun_family = AF_UNIX;
1527
  strcpy(sa.sun_path, path);
1528
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1529
    die("I found another BIRD running.");
1530
  close(fd);
1531
}
1532

    
1533