Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 061ab802

History | View | Annotate | Download (29 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <time.h>
13
#include <sys/time.h>
14
#include <sys/types.h>
15
#include <sys/socket.h>
16
#include <sys/fcntl.h>
17
#include <sys/un.h>
18
#include <unistd.h>
19
#include <errno.h>
20

    
21
#include "nest/bird.h"
22
#include "lib/lists.h"
23
#include "lib/resource.h"
24
#include "lib/timer.h"
25
#include "lib/socket.h"
26
#include "lib/event.h"
27
#include "lib/string.h"
28
#include "nest/iface.h"
29

    
30
#include "lib/unix.h"
31
#include "lib/sysio.h"
32

    
33
/* Maximum number of calls of rx/tx handler for one socket in one
34
 * select iteration. Should be small enough to not monopolize CPU by
35
 * one protocol instance.
36
 */
37
#define MAX_STEPS 4
38

    
39
/*
40
 *        Tracked Files
41
 */
42

    
43
struct rfile {
44
  resource r;
45
  FILE *f;
46
};
47

    
48
static void
49
rf_free(resource *r)
50
{
51
  struct rfile *a = (struct rfile *) r;
52

    
53
  fclose(a->f);
54
}
55

    
56
static void
57
rf_dump(resource *r)
58
{
59
  struct rfile *a = (struct rfile *) r;
60

    
61
  debug("(FILE *%p)\n", a->f);
62
}
63

    
64
static struct resclass rf_class = {
65
  "FILE",
66
  sizeof(struct rfile),
67
  rf_free,
68
  rf_dump
69
};
70

    
71
void *
72
tracked_fopen(pool *p, char *name, char *mode)
73
{
74
  FILE *f = fopen(name, mode);
75

    
76
  if (f)
77
    {
78
      struct rfile *r = ralloc(p, &rf_class);
79
      r->f = f;
80
    }
81
  return f;
82
}
83

    
84
/**
85
 * DOC: Timers
86
 *
87
 * Timers are resources which represent a wish of a module to call
88
 * a function at the specified time. The platform dependent code
89
 * doesn't guarantee exact timing, only that a timer function
90
 * won't be called before the requested time.
91
 *
92
 * In BIRD, time is represented by values of the &bird_clock_t type
93
 * which are integral numbers interpreted as a relative number of seconds since
94
 * some fixed time point in past. The current time can be read
95
 * from variable @now with reasonable accuracy and is monotonic. There is also
96
 * a current 'absolute' time in variable @now_real reported by OS.
97
 *
98
 * Each timer is described by a &timer structure containing a pointer
99
 * to the handler function (@hook), data private to this function (@data),
100
 * time the function should be called at (@expires, 0 for inactive timers),
101
 * for the other fields see |timer.h|.
102
 */
103

    
104
#define NEAR_TIMER_LIMIT 4
105

    
106
static list near_timers, far_timers;
107
static bird_clock_t first_far_timer = TIME_INFINITY;
108

    
109
bird_clock_t now, now_real;
110

    
111
static void
112
update_times_plain(void)
113
{
114
  bird_clock_t new_time = time(NULL);
115
  int delta = new_time - now_real;
116

    
117
  if ((delta >= 0) && (delta < 60))
118
    now += delta;
119
  else if (now_real != 0)
120
   log(L_WARN "Time jump, delta %d s", delta);
121

    
122
  now_real = new_time;
123
}
124

    
125
static void
126
update_times_gettime(void)
127
{
128
  struct timespec ts;
129
  int rv;
130

    
131
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
132
  if (rv != 0)
133
    die("clock_gettime: %m");
134

    
135
  if (ts.tv_sec != now) {
136
    if (ts.tv_sec < now)
137
      log(L_ERR "Monotonic timer is broken");
138

    
139
    now = ts.tv_sec;
140
    now_real = time(NULL);
141
  }
142
}
143

    
144
static int clock_monotonic_available;
145

    
146
static inline void
147
update_times(void)
148
{
149
  if (clock_monotonic_available)
150
    update_times_gettime();
151
  else
152
    update_times_plain();
153
}
154

    
155
static inline void
156
init_times(void)
157
{
158
 struct timespec ts;
159
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
160
 if (!clock_monotonic_available)
161
   log(L_WARN "Monotonic timer is missing");
162
}
163

    
164

    
165
static void
166
tm_free(resource *r)
167
{
168
  timer *t = (timer *) r;
169

    
170
  tm_stop(t);
171
}
172

    
173
static void
174
tm_dump(resource *r)
175
{
176
  timer *t = (timer *) r;
177

    
178
  debug("(code %p, data %p, ", t->hook, t->data);
179
  if (t->randomize)
180
    debug("rand %d, ", t->randomize);
181
  if (t->recurrent)
182
    debug("recur %d, ", t->recurrent);
183
  if (t->expires)
184
    debug("expires in %d sec)\n", t->expires - now);
185
  else
186
    debug("inactive)\n");
187
}
188

    
189
static struct resclass tm_class = {
190
  "Timer",
191
  sizeof(timer),
192
  tm_free,
193
  tm_dump
194
};
195

    
196
/**
197
 * tm_new - create a timer
198
 * @p: pool
199
 *
200
 * This function creates a new timer resource and returns
201
 * a pointer to it. To use the timer, you need to fill in
202
 * the structure fields and call tm_start() to start timing.
203
 */
204
timer *
205
tm_new(pool *p)
206
{
207
  timer *t = ralloc(p, &tm_class);
208
  t->hook = NULL;
209
  t->data = NULL;
210
  t->randomize = 0;
211
  t->expires = 0;
212
  return t;
213
}
214

    
215
static inline void
216
tm_insert_near(timer *t)
217
{
218
  node *n = HEAD(near_timers);
219

    
220
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
221
    n = n->next;
222
  insert_node(&t->n, n->prev);
223
}
224

    
225
/**
226
 * tm_start - start a timer
227
 * @t: timer
228
 * @after: number of seconds the timer should be run after
229
 *
230
 * This function schedules the hook function of the timer to
231
 * be called after @after seconds. If the timer has been already
232
 * started, it's @expire time is replaced by the new value.
233
 *
234
 * You can have set the @randomize field of @t, the timeout
235
 * will be increased by a random number of seconds chosen
236
 * uniformly from range 0 .. @randomize.
237
 *
238
 * You can call tm_start() from the handler function of the timer
239
 * to request another run of the timer. Also, you can set the @recurrent
240
 * field to have the timer re-added automatically with the same timeout.
241
 */
242
void
243
tm_start(timer *t, unsigned after)
244
{
245
  bird_clock_t when;
246

    
247
  if (t->randomize)
248
    after += random() % (t->randomize + 1);
249
  when = now + after;
250
  if (t->expires == when)
251
    return;
252
  if (t->expires)
253
    rem_node(&t->n);
254
  t->expires = when;
255
  if (after <= NEAR_TIMER_LIMIT)
256
    tm_insert_near(t);
257
  else
258
    {
259
      if (!first_far_timer || first_far_timer > when)
260
        first_far_timer = when;
261
      add_tail(&far_timers, &t->n);
262
    }
263
}
264

    
265
/**
266
 * tm_stop - stop a timer
267
 * @t: timer
268
 *
269
 * This function stops a timer. If the timer is already stopped,
270
 * nothing happens.
271
 */
272
void
273
tm_stop(timer *t)
274
{
275
  if (t->expires)
276
    {
277
      rem_node(&t->n);
278
      t->expires = 0;
279
    }
280
}
281

    
282
static void
283
tm_dump_them(char *name, list *l)
284
{
285
  node *n;
286
  timer *t;
287

    
288
  debug("%s timers:\n", name);
289
  WALK_LIST(n, *l)
290
    {
291
      t = SKIP_BACK(timer, n, n);
292
      debug("%p ", t);
293
      tm_dump(&t->r);
294
    }
295
  debug("\n");
296
}
297

    
298
void
299
tm_dump_all(void)
300
{
301
  tm_dump_them("Near", &near_timers);
302
  tm_dump_them("Far", &far_timers);
303
}
304

    
305
static inline time_t
306
tm_first_shot(void)
307
{
308
  time_t x = first_far_timer;
309

    
310
  if (!EMPTY_LIST(near_timers))
311
    {
312
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
313
      if (t->expires < x)
314
        x = t->expires;
315
    }
316
  return x;
317
}
318

    
319
static void
320
tm_shot(void)
321
{
322
  timer *t;
323
  node *n, *m;
324

    
325
  if (first_far_timer <= now)
326
    {
327
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
328
      first_far_timer = TIME_INFINITY;
329
      n = HEAD(far_timers);
330
      while (m = n->next)
331
        {
332
          t = SKIP_BACK(timer, n, n);
333
          if (t->expires <= limit)
334
            {
335
              rem_node(n);
336
              tm_insert_near(t);
337
            }
338
          else if (t->expires < first_far_timer)
339
            first_far_timer = t->expires;
340
          n = m;
341
        }
342
    }
343
  while ((n = HEAD(near_timers)) -> next)
344
    {
345
      int delay;
346
      t = SKIP_BACK(timer, n, n);
347
      if (t->expires > now)
348
        break;
349
      rem_node(n);
350
      delay = t->expires - now;
351
      t->expires = 0;
352
      if (t->recurrent)
353
        {
354
          int i = t->recurrent - delay;
355
          if (i < 0)
356
            i = 0;
357
          tm_start(t, i);
358
        }
359
      t->hook(t);
360
    }
361
}
362

    
363
/**
364
 * tm_parse_datetime - parse a date and time
365
 * @x: datetime string
366
 *
367
 * tm_parse_datetime() takes a textual representation of
368
 * a date and time (dd-mm-yyyy hh:mm:ss)
369
 * and converts it to the corresponding value of type &bird_clock_t.
370
 */
371
bird_clock_t
372
tm_parse_datetime(char *x)
373
{
374
  struct tm tm;
375
  int n;
376
  time_t t;
377

    
378
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
379
    return tm_parse_date(x);
380
  tm.tm_mon--;
381
  tm.tm_year -= 1900;
382
  t = mktime(&tm);
383
  if (t == (time_t) -1)
384
    return 0;
385
  return t;
386
}
387
/**
388
 * tm_parse_date - parse a date
389
 * @x: date string
390
 *
391
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
392
 * and converts it to the corresponding value of type &bird_clock_t.
393
 */
394
bird_clock_t
395
tm_parse_date(char *x)
396
{
397
  struct tm tm;
398
  int n;
399
  time_t t;
400

    
401
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
402
    return 0;
403
  tm.tm_mon--;
404
  tm.tm_year -= 1900;
405
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
406
  t = mktime(&tm);
407
  if (t == (time_t) -1)
408
    return 0;
409
  return t;
410
}
411

    
412
/**
413
 * tm_format_date - convert date to textual representation
414
 * @x: destination buffer of size %TM_DATE_BUFFER_SIZE
415
 * @t: time
416
 *
417
 * This function formats the given relative time value @t to a textual
418
 * date representation (dd-mm-yyyy) in real time..
419
 */
420
void
421
tm_format_date(char *x, bird_clock_t t)
422
{
423
  struct tm *tm;
424

    
425
  tm = localtime(&t);
426
  bsprintf(x, "%02d-%02d-%04d", tm->tm_mday, tm->tm_mon+1, tm->tm_year+1900);
427
}
428

    
429
/**
430
 * tm_format_datetime - convert date and time to textual representation
431
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
432
 * @t: time
433
 *
434
 * This function formats the given relative time value @t to a textual
435
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
436
 */
437
void
438
tm_format_datetime(char *x, bird_clock_t t)
439
{
440
  struct tm *tm;
441
  bird_clock_t delta = now - t;
442
  t = now_real - delta;
443
  tm = localtime(&t);
444
  if (strftime(x, TM_DATETIME_BUFFER_SIZE, "%d-%m-%Y %H:%M:%S", tm) == TM_DATETIME_BUFFER_SIZE)
445
    strcpy(x, "<too-long>");
446
}
447

    
448
/**
449
 * tm_format_reltime - convert date and time to relative textual representation
450
 * @x: destination buffer of size %TM_RELTIME_BUFFER_SIZE
451
 * @t: time
452
 *
453
 * This function formats the given relative time value @t to a short
454
 * textual representation in real time, relative to the current time.
455
 */
456
void
457
tm_format_reltime(char *x, bird_clock_t t)
458
{
459
  struct tm *tm;
460
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
461

    
462
  bird_clock_t delta = now - t;
463
  t = now_real - delta;
464
  tm = localtime(&t);
465
  if (delta < 20*3600)
466
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
467
  else if (delta < 360*86400)
468
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
469
  else
470
    bsprintf(x, "%d", tm->tm_year+1900);
471
}
472

    
473
/**
474
 * DOC: Sockets
475
 *
476
 * Socket resources represent network connections. Their data structure (&socket)
477
 * contains a lot of fields defining the exact type of the socket, the local and
478
 * remote addresses and ports, pointers to socket buffers and finally pointers to
479
 * hook functions to be called when new data have arrived to the receive buffer
480
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
481
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
482
 *
483
 * Freeing of sockets from inside socket hooks is perfectly safe.
484
 */
485

    
486
#ifndef SOL_IP
487
#define SOL_IP IPPROTO_IP
488
#endif
489

    
490
#ifndef SOL_IPV6
491
#define SOL_IPV6 IPPROTO_IPV6
492
#endif
493

    
494
#ifndef IPV6_ADD_MEMBERSHIP
495
#define IPV6_ADD_MEMBERSHIP IP_ADD_MEMBERSHIP
496
#endif
497

    
498
static list sock_list;
499
static struct birdsock *current_sock;
500
static int sock_recalc_fdsets_p;
501

    
502
static inline sock *
503
sk_next(sock *s)
504
{
505
  if (!s->n.next->next)
506
    return NULL;
507
  else
508
    return SKIP_BACK(sock, n, s->n.next);
509
}
510

    
511
static void
512
sk_alloc_bufs(sock *s)
513
{
514
  if (!s->rbuf && s->rbsize)
515
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
516
  s->rpos = s->rbuf;
517
  if (!s->tbuf && s->tbsize)
518
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
519
  s->tpos = s->ttx = s->tbuf;
520
}
521

    
522
static void
523
sk_free_bufs(sock *s)
524
{
525
  if (s->rbuf_alloc)
526
    {
527
      xfree(s->rbuf_alloc);
528
      s->rbuf = s->rbuf_alloc = NULL;
529
    }
530
  if (s->tbuf_alloc)
531
    {
532
      xfree(s->tbuf_alloc);
533
      s->tbuf = s->tbuf_alloc = NULL;
534
    }
535
}
536

    
537
static void
538
sk_free(resource *r)
539
{
540
  sock *s = (sock *) r;
541

    
542
  sk_free_bufs(s);
543
  if (s->fd >= 0)
544
    {
545
      close(s->fd);
546
      if (s == current_sock)
547
        current_sock = sk_next(s);
548
      rem_node(&s->n);
549
      sock_recalc_fdsets_p = 1;
550
    }
551
}
552

    
553
void
554
sk_reallocate(sock *s)
555
{
556
  sk_free_bufs(s);
557
  sk_alloc_bufs(s);
558
}
559

    
560
static void
561
sk_dump(resource *r)
562
{
563
  sock *s = (sock *) r;
564
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
565

    
566
  debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
567
        sk_type_names[s->type],
568
        s->data,
569
        s->saddr,
570
        s->sport,
571
        s->daddr,
572
        s->dport,
573
        s->tos,
574
        s->ttl,
575
        s->iface ? s->iface->name : "none");
576
}
577

    
578
static struct resclass sk_class = {
579
  "Socket",
580
  sizeof(sock),
581
  sk_free,
582
  sk_dump
583
};
584

    
585
/**
586
 * sk_new - create a socket
587
 * @p: pool
588
 *
589
 * This function creates a new socket resource. If you want to use it,
590
 * you need to fill in all the required fields of the structure and
591
 * call sk_open() to do the actual opening of the socket.
592
 */
593
sock *
594
sk_new(pool *p)
595
{
596
  sock *s = ralloc(p, &sk_class);
597
  s->pool = p;
598
  s->data = NULL;
599
  s->saddr = s->daddr = IPA_NONE;
600
  s->sport = s->dport = 0;
601
  s->tos = s->ttl = -1;
602
  s->flags = 0;
603
  s->iface = NULL;
604
  s->rbuf = NULL;
605
  s->rx_hook = NULL;
606
  s->rbsize = 0;
607
  s->tbuf = NULL;
608
  s->tx_hook = NULL;
609
  s->tbsize = 0;
610
  s->err_hook = NULL;
611
  s->fd = -1;
612
  s->rbuf_alloc = s->tbuf_alloc = NULL;
613
  s->password = NULL;
614
  return s;
615
}
616

    
617
static void
618
sk_insert(sock *s)
619
{
620
  add_tail(&sock_list, &s->n);
621
  sock_recalc_fdsets_p = 1;
622
}
623

    
624
#ifdef IPV6
625

    
626
void
627
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
628
{
629
  memset (sa, 0, sizeof (struct sockaddr_in6));
630
  sa->sin6_family = AF_INET6;
631
  sa->sin6_port = htons(port);
632
  sa->sin6_flowinfo = 0;
633
#ifdef HAVE_SIN_LEN
634
  sa->sin6_len = sizeof(struct sockaddr_in6);
635
#endif
636
  set_inaddr(&sa->sin6_addr, a);
637
}
638

    
639
static inline void
640
fill_in_sockifa(sockaddr *sa, struct iface *ifa)
641
{
642
  sa->sin6_scope_id = ifa ? ifa->index : 0;
643
}
644

    
645
void
646
get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check)
647
{
648
  if (check && sa->sin6_family != AF_INET6)
649
    bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
650
  if (port)
651
    *port = ntohs(sa->sin6_port);
652
  memcpy(a, &sa->sin6_addr, sizeof(*a));
653
  ipa_ntoh(*a);
654
}
655

    
656
#else
657

    
658
void
659
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
660
{
661
  memset (sa, 0, sizeof (struct sockaddr_in));
662
  sa->sin_family = AF_INET;
663
  sa->sin_port = htons(port);
664
#ifdef HAVE_SIN_LEN
665
  sa->sin_len = sizeof(struct sockaddr_in);
666
#endif
667
  set_inaddr(&sa->sin_addr, a);
668
}
669

    
670
static inline void
671
fill_in_sockifa(sockaddr *sa, struct iface *ifa)
672
{
673
}
674

    
675
void
676
get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check)
677
{
678
  if (check && sa->sin_family != AF_INET)
679
    bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
680
  if (port)
681
    *port = ntohs(sa->sin_port);
682
  memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
683
  ipa_ntoh(*a);
684
}
685

    
686
#endif
687

    
688
static char *
689
sk_set_ttl_int(sock *s)
690
{
691
  int one = 1;
692
#ifdef IPV6
693
  if (s->type != SK_UDP_MC && s->type != SK_IP_MC &&
694
      setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
695
    return "IPV6_UNICAST_HOPS";
696
#else
697
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
698
    return "IP_TTL";
699
#ifdef CONFIG_UNIX_DONTROUTE
700
  if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
701
    return "SO_DONTROUTE";
702
#endif 
703
#endif
704
  return NULL;
705
}
706

    
707
#define ERR(x) do { err = x; goto bad; } while(0)
708
#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
709

    
710
static char *
711
sk_setup(sock *s)
712
{
713
  int fd = s->fd;
714
  char *err;
715

    
716
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
717
    ERR("fcntl(O_NONBLOCK)");
718
  if (s->type == SK_UNIX)
719
    return NULL;
720
#ifndef IPV6
721
  if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
722
    WARN("IP_TOS");
723
#endif
724

    
725
#ifdef IPV6
726
  int v = 1;
727
  if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
728
    WARN("IPV6_V6ONLY");
729
#endif
730

    
731
  if (s->ttl >= 0)
732
    err = sk_set_ttl_int(s);
733
  else
734
    err = NULL;
735

    
736
bad:
737
  return err;
738
}
739

    
740
/**
741
 * sk_set_ttl - set TTL for given socket.
742
 * @s: socket
743
 * @ttl: TTL value
744
 *
745
 * Set TTL for already opened connections when TTL was not set before.
746
 * Useful for accepted connections when different ones should have 
747
 * different TTL.
748
 *
749
 * Result: 0 for success, -1 for an error.
750
 */
751

    
752
int
753
sk_set_ttl(sock *s, int ttl)
754
{
755
  char *err;
756

    
757
  s->ttl = ttl;
758
  if (err = sk_set_ttl_int(s))
759
    log(L_ERR "sk_set_ttl: %s: %m", err);
760

    
761
  return (err ? -1 : 0);
762
}
763

    
764

    
765
/**
766
 * sk_set_md5_auth - add / remove MD5 security association for given socket.
767
 * @s: socket
768
 * @a: IP address of the other side
769
 * @passwd: password used for MD5 authentication
770
 *
771
 * In TCP MD5 handling code in kernel, there is a set of pairs
772
 * (address, password) used to choose password according to
773
 * address of the other side. This function is useful for
774
 * listening socket, for active sockets it is enough to set
775
 * s->password field.
776
 *
777
 * When called with passwd != NULL, the new pair is added,
778
 * When called with passwd == NULL, the existing pair is removed.
779
 *
780
 * Result: 0 for success, -1 for an error.
781
 */
782

    
783
int
784
sk_set_md5_auth(sock *s, ip_addr a, char *passwd)
785
{
786
  sockaddr sa;
787
  fill_in_sockaddr(&sa, a, 0);
788
  return sk_set_md5_auth_int(s, &sa, passwd);
789
}
790

    
791

    
792
static void
793
sk_tcp_connected(sock *s)
794
{
795
  s->type = SK_TCP;
796
  sk_alloc_bufs(s);
797
  s->tx_hook(s);
798
}
799

    
800
static int
801
sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
802
{
803
  int fd = accept(s->fd, sa, &al);
804
  if (fd >= 0)
805
    {
806
      sock *t = sk_new(s->pool);
807
      char *err;
808
      t->type = type;
809
      t->fd = fd;
810
      t->ttl = s->ttl;
811
      t->tos = s->tos;
812
      t->rbsize = s->rbsize;
813
      t->tbsize = s->tbsize;
814
      if (type == SK_TCP)
815
        get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1);
816
      sk_insert(t);
817
      if (err = sk_setup(t))
818
        {
819
          log(L_ERR "Incoming connection: %s: %m", err);
820
          rfree(t);
821
          return 1;
822
        }
823
      sk_alloc_bufs(t);
824
      s->rx_hook(t, 0);
825
      return 1;
826
    }
827
  else if (errno != EINTR && errno != EAGAIN)
828
    {
829
      log(L_ERR "accept: %m");
830
      s->err_hook(s, errno);
831
    }
832
  return 0;
833
}
834

    
835
/**
836
 * sk_open - open a socket
837
 * @s: socket
838
 *
839
 * This function takes a socket resource created by sk_new() and
840
 * initialized by the user and binds a corresponding network connection
841
 * to it.
842
 *
843
 * Result: 0 for success, -1 for an error.
844
 */
845
int
846
sk_open(sock *s)
847
{
848
  int fd;
849
  sockaddr sa;
850
  int one = 1;
851
  int type = s->type;
852
  int has_src = ipa_nonzero(s->saddr) || s->sport;
853
  char *err;
854

    
855
  switch (type)
856
    {
857
    case SK_TCP_ACTIVE:
858
      s->ttx = "";                        /* Force s->ttx != s->tpos */
859
      /* Fall thru */
860
    case SK_TCP_PASSIVE:
861
      fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
862
      break;
863
    case SK_UDP:
864
    case SK_UDP_MC:
865
      fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
866
      break;
867
    case SK_IP:
868
    case SK_IP_MC:
869
      fd = socket(BIRD_PF, SOCK_RAW, s->dport);
870
      break;
871
    case SK_MAGIC:
872
      fd = s->fd;
873
      break;
874
    default:
875
      bug("sk_open() called for invalid sock type %d", type);
876
    }
877
  if (fd < 0)
878
    die("sk_open: socket: %m");
879
  s->fd = fd;
880

    
881
  if (err = sk_setup(s))
882
    goto bad;
883

    
884
  switch (type)
885
    {
886
    case SK_UDP:
887
    case SK_IP:
888
#ifndef IPV6
889
      if (s->iface)                        /* It's a broadcast socket */
890
        if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0)
891
          ERR("SO_BROADCAST");
892
#endif
893
      break;
894
    case SK_UDP_MC:
895
    case SK_IP_MC:
896
      {
897
#ifdef IPV6
898
        /* Fortunately, IPv6 socket interface is recent enough and therefore standardized */
899
        ASSERT(s->iface && s->iface->addr);
900
        if (ipa_nonzero(s->daddr))
901
          {
902
            int t = s->iface->index;
903
            int zero = 0;
904
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
905
              ERR("IPV6_MULTICAST_HOPS");
906
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
907
              ERR("IPV6_MULTICAST_LOOP");
908
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_IF, &t, sizeof(t)) < 0)
909
              ERR("IPV6_MULTICAST_IF");
910
          }
911
        if (has_src)
912
          {
913
            struct ipv6_mreq mreq;
914
            set_inaddr(&mreq.ipv6mr_multiaddr, s->daddr);
915
#ifdef CONFIG_IPV6_GLIBC_20
916
            mreq.ipv6mr_ifindex = s->iface->index;
917
#else
918
            mreq.ipv6mr_interface = s->iface->index;
919
#endif /* CONFIG_IPV6_GLIBC_20 */
920
            if (setsockopt(fd, SOL_IPV6, IPV6_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
921
              ERR("IPV6_ADD_MEMBERSHIP");
922
          }
923
#else /* IPv4 */
924
        /* With IPv4 there are zillions of different socket interface variants. Ugh. */
925
        ASSERT(s->iface && s->iface->addr);
926
        if (err = sysio_mcast_join(s))
927
          goto bad;
928
#endif /* IPV6 */
929
      break;
930
      }
931
    }
932
  if (has_src)
933
    {
934
      int port;
935

    
936
      if (type == SK_IP || type == SK_IP_MC)
937
        port = 0;
938
      else
939
        {
940
          port = s->sport;
941
          if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
942
            ERR("SO_REUSEADDR");
943
        }
944
      fill_in_sockaddr(&sa, s->saddr, port);
945
      fill_in_sockifa(&sa, s->iface);
946
#ifdef CONFIG_SKIP_MC_BIND
947
      if ((type != SK_UDP_MC) && (type != SK_IP_MC) &&
948
          bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
949
#else
950
      if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
951
#endif
952
        ERR("bind");
953
    }
954
  fill_in_sockaddr(&sa, s->daddr, s->dport);
955

    
956
  if (s->password)
957
    {
958
      int rv = sk_set_md5_auth_int(s, &sa, s->password);
959
      if (rv < 0)
960
        goto bad_no_log;
961
    }
962

    
963
  switch (type)
964
    {
965
    case SK_TCP_ACTIVE:
966
      if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
967
        sk_tcp_connected(s);
968
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
969
               errno != ECONNREFUSED && errno != EHOSTUNREACH)
970
        ERR("connect");
971
      break;
972
    case SK_TCP_PASSIVE:
973
      if (listen(fd, 8))
974
        ERR("listen");
975
      break;
976
    case SK_MAGIC:
977
      break;
978
    default:
979
      sk_alloc_bufs(s);
980
#ifdef IPV6
981
#ifdef IPV6_MTU_DISCOVER
982
      {
983
        int dont = IPV6_PMTUDISC_DONT;
984
        if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
985
          ERR("IPV6_MTU_DISCOVER");
986
      }
987
#endif
988
#else
989
#ifdef IP_PMTUDISC
990
      {
991
        int dont = IP_PMTUDISC_DONT;
992
        if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
993
          ERR("IP_PMTUDISC");
994
      }
995
#endif
996
#endif
997
    }
998

    
999
  sk_insert(s);
1000
  return 0;
1001

    
1002
bad:
1003
  log(L_ERR "sk_open: %s: %m", err);
1004
bad_no_log:
1005
  close(fd);
1006
  s->fd = -1;
1007
  return -1;
1008
}
1009

    
1010
int
1011
sk_open_unix(sock *s, char *name)
1012
{
1013
  int fd;
1014
  struct sockaddr_un sa;
1015
  char *err;
1016

    
1017
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1018
  if (fd < 0)
1019
    die("sk_open_unix: socket: %m");
1020
  s->fd = fd;
1021
  if (err = sk_setup(s))
1022
    goto bad;
1023
  unlink(name);
1024
 
1025
  if (strlen(name) >= sizeof(sa.sun_path))
1026
    die("sk_open_unix: path too long");
1027

    
1028
  sa.sun_family = AF_UNIX;
1029
  strcpy(sa.sun_path, name);
1030
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1031
    ERR("bind");
1032
  if (listen(fd, 8))
1033
    ERR("listen");
1034
  sk_insert(s);
1035
  return 0;
1036

    
1037
bad:
1038
  log(L_ERR "sk_open_unix: %s: %m", err);
1039
  close(fd);
1040
  s->fd = -1;
1041
  return -1;
1042
}
1043

    
1044
static int
1045
sk_maybe_write(sock *s)
1046
{
1047
  int e;
1048

    
1049
  switch (s->type)
1050
    {
1051
    case SK_TCP:
1052
    case SK_MAGIC:
1053
    case SK_UNIX:
1054
      while (s->ttx != s->tpos)
1055
        {
1056
          e = write(s->fd, s->ttx, s->tpos - s->ttx);
1057
          if (e < 0)
1058
            {
1059
              if (errno != EINTR && errno != EAGAIN)
1060
                {
1061
                  s->ttx = s->tpos;        /* empty tx buffer */
1062
                  s->err_hook(s, errno);
1063
                  return -1;
1064
                }
1065
              return 0;
1066
            }
1067
          s->ttx += e;
1068
        }
1069
      s->ttx = s->tpos = s->tbuf;
1070
      return 1;
1071
    case SK_UDP:
1072
    case SK_UDP_MC:
1073
    case SK_IP:
1074
    case SK_IP_MC:
1075
      {
1076
        sockaddr sa;
1077

    
1078
        if (s->tbuf == s->tpos)
1079
          return 1;
1080

    
1081
        fill_in_sockaddr(&sa, s->faddr, s->fport);
1082
        fill_in_sockifa(&sa, s->iface);
1083
        e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa));
1084
        if (e < 0)
1085
          {
1086
            if (errno != EINTR && errno != EAGAIN)
1087
              {
1088
                s->ttx = s->tpos;        /* empty tx buffer */
1089
                s->err_hook(s, errno);
1090
                return -1;
1091
              }
1092
            return 0;
1093
          }
1094
        s->tpos = s->tbuf;
1095
        return 1;
1096
      }
1097
    default:
1098
      bug("sk_maybe_write: unknown socket type %d", s->type);
1099
    }
1100
}
1101

    
1102
/**
1103
 * sk_send - send data to a socket
1104
 * @s: socket
1105
 * @len: number of bytes to send
1106
 *
1107
 * This function sends @len bytes of data prepared in the
1108
 * transmit buffer of the socket @s to the network connection.
1109
 * If the packet can be sent immediately, it does so and returns
1110
 * 1, else it queues the packet for later processing, returns 0
1111
 * and calls the @tx_hook of the socket when the tranmission
1112
 * takes place.
1113
 */
1114
int
1115
sk_send(sock *s, unsigned len)
1116
{
1117
  s->faddr = s->daddr;
1118
  s->fport = s->dport;
1119
  s->ttx = s->tbuf;
1120
  s->tpos = s->tbuf + len;
1121
  return sk_maybe_write(s);
1122
}
1123

    
1124
/**
1125
 * sk_send_to - send data to a specific destination
1126
 * @s: socket
1127
 * @len: number of bytes to send
1128
 * @addr: IP address to send the packet to
1129
 * @port: port to send the packet to
1130
 *
1131
 * This is a sk_send() replacement for connection-less packet sockets
1132
 * which allows destination of the packet to be chosen dynamically.
1133
 */
1134
int
1135
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1136
{
1137
  s->faddr = addr;
1138
  s->fport = port;
1139
  s->ttx = s->tbuf;
1140
  s->tpos = s->tbuf + len;
1141
  return sk_maybe_write(s);
1142
}
1143

    
1144
static int
1145
sk_read(sock *s)
1146
{
1147
  switch (s->type)
1148
    {
1149
    case SK_TCP_PASSIVE:
1150
      {
1151
        sockaddr sa;
1152
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1153
      }
1154
    case SK_UNIX_PASSIVE:
1155
      {
1156
        struct sockaddr_un sa;
1157
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
1158
      }
1159
    case SK_TCP:
1160
    case SK_UNIX:
1161
      {
1162
        int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1163

    
1164
        if (c < 0)
1165
          {
1166
            if (errno != EINTR && errno != EAGAIN)
1167
              s->err_hook(s, errno);
1168
          }
1169
        else if (!c)
1170
          s->err_hook(s, 0);
1171
        else
1172
          {
1173
            s->rpos += c;
1174
            if (s->rx_hook(s, s->rpos - s->rbuf))
1175
              {
1176
                /* We need to be careful since the socket could have been deleted by the hook */
1177
                if (current_sock == s)
1178
                  s->rpos = s->rbuf;
1179
              }
1180
            return 1;
1181
          }
1182
        return 0;
1183
      }
1184
    case SK_MAGIC:
1185
      return s->rx_hook(s, 0);
1186
    default:
1187
      {
1188
        sockaddr sa;
1189
        int al = sizeof(sa);
1190
        int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al);
1191

    
1192
        if (e < 0)
1193
          {
1194
            if (errno != EINTR && errno != EAGAIN)
1195
              s->err_hook(s, errno);
1196
            return 0;
1197
          }
1198
        s->rpos = s->rbuf + e;
1199
        get_sockaddr(&sa, &s->faddr, &s->fport, 1);
1200
        s->rx_hook(s, e);
1201
        return 1;
1202
      }
1203
    }
1204
}
1205

    
1206
static int
1207
sk_write(sock *s)
1208
{
1209
  switch (s->type)
1210
    {
1211
    case SK_TCP_ACTIVE:
1212
      {
1213
        sockaddr sa;
1214
        fill_in_sockaddr(&sa, s->daddr, s->dport);
1215
        if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
1216
          sk_tcp_connected(s);
1217
        else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1218
          s->err_hook(s, errno);
1219
        return 0;
1220
      }
1221
    default:
1222
      if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1223
        {
1224
          s->tx_hook(s);
1225
          return 1;
1226
        }
1227
      return 0;
1228
    }
1229
}
1230

    
1231
void
1232
sk_dump_all(void)
1233
{
1234
  node *n;
1235
  sock *s;
1236

    
1237
  debug("Open sockets:\n");
1238
  WALK_LIST(n, sock_list)
1239
    {
1240
      s = SKIP_BACK(sock, n, n);
1241
      debug("%p ", s);
1242
      sk_dump(&s->r);
1243
    }
1244
  debug("\n");
1245
}
1246

    
1247
#undef ERR
1248
#undef WARN
1249

    
1250
/*
1251
 *        Main I/O Loop
1252
 */
1253

    
1254
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
1255
volatile int async_dump_flag;
1256

    
1257
void
1258
io_init(void)
1259
{
1260
  init_list(&near_timers);
1261
  init_list(&far_timers);
1262
  init_list(&sock_list);
1263
  init_list(&global_event_list);
1264
  krt_io_init();
1265
  init_times();
1266
  update_times();
1267
  srandom((int) now_real);
1268
}
1269

    
1270
void
1271
io_loop(void)
1272
{
1273
  fd_set rd, wr;
1274
  struct timeval timo;
1275
  time_t tout;
1276
  int hi, events;
1277
  sock *s;
1278
  node *n;
1279

    
1280
  sock_recalc_fdsets_p = 1;
1281
  for(;;)
1282
    {
1283
      events = ev_run_list(&global_event_list);
1284
      update_times();
1285
      tout = tm_first_shot();
1286
      if (tout <= now)
1287
        {
1288
          tm_shot();
1289
          continue;
1290
        }
1291
      timo.tv_sec = events ? 0 : tout - now;
1292
      timo.tv_usec = 0;
1293

    
1294
      if (sock_recalc_fdsets_p)
1295
        {
1296
          sock_recalc_fdsets_p = 0;
1297
          FD_ZERO(&rd);
1298
          FD_ZERO(&wr);
1299
        }
1300

    
1301
      hi = 0;
1302
      WALK_LIST(n, sock_list)
1303
        {
1304
          s = SKIP_BACK(sock, n, n);
1305
          if (s->rx_hook)
1306
            {
1307
              FD_SET(s->fd, &rd);
1308
              if (s->fd > hi)
1309
                hi = s->fd;
1310
            }
1311
          else
1312
            FD_CLR(s->fd, &rd);
1313
          if (s->tx_hook && s->ttx != s->tpos)
1314
            {
1315
              FD_SET(s->fd, &wr);
1316
              if (s->fd > hi)
1317
                hi = s->fd;
1318
            }
1319
          else
1320
            FD_CLR(s->fd, &wr);
1321
        }
1322

    
1323
      /*
1324
       * Yes, this is racy. But even if the signal comes before this test
1325
       * and entering select(), it gets caught on the next timer tick.
1326
       */
1327

    
1328
      if (async_config_flag)
1329
        {
1330
          async_config();
1331
          async_config_flag = 0;
1332
          continue;
1333
        }
1334
      if (async_dump_flag)
1335
        {
1336
          async_dump();
1337
          async_dump_flag = 0;
1338
          continue;
1339
        }
1340
      if (async_shutdown_flag)
1341
        {
1342
          async_shutdown();
1343
          async_shutdown_flag = 0;
1344
          continue;
1345
        }
1346

    
1347
      /* And finally enter select() to find active sockets */
1348

    
1349
      hi = select(hi+1, &rd, &wr, NULL, &timo);
1350
      if (hi < 0)
1351
        {
1352
          if (errno == EINTR || errno == EAGAIN)
1353
            continue;
1354
          die("select: %m");
1355
        }
1356
      if (hi)
1357
        {
1358
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));        /* guaranteed to be non-empty */
1359
          while (current_sock)
1360
            {
1361
              sock *s = current_sock;
1362
              int e;
1363
              int steps = MAX_STEPS;
1364
              if (FD_ISSET(s->fd, &rd) && s->rx_hook)
1365
                do
1366
                  {
1367
                    steps--;
1368
                    e = sk_read(s);
1369
                    if (s != current_sock)
1370
                      goto next;
1371
                  }
1372
                while (e && s->rx_hook && steps);
1373

    
1374
              steps = MAX_STEPS;
1375
              if (FD_ISSET(s->fd, &wr))
1376
                do
1377
                  {
1378
                    steps--;
1379
                    e = sk_write(s);
1380
                    if (s != current_sock)
1381
                      goto next;
1382
                  }
1383
                while (e && steps);
1384
              current_sock = sk_next(s);
1385
            next: ;
1386
            }
1387
        }
1388
    }
1389
}
1390

    
1391
void
1392
test_old_bird(char *path)
1393
{
1394
  int fd;
1395
  struct sockaddr_un sa;
1396

    
1397
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1398

    
1399
  if (fd < 0)
1400
    die("Cannot create socket: %m");
1401
  bzero(&sa, sizeof(sa));
1402
  sa.sun_family = AF_UNIX;
1403
  strcpy(sa.sun_path, path);
1404
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1405
    die("I found another BIRD running.");
1406
  close(fd);
1407
}
1408

    
1409