Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 35164c50

History | View | Annotate | Download (29 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <time.h>
13
#include <sys/time.h>
14
#include <sys/types.h>
15
#include <sys/socket.h>
16
#include <sys/fcntl.h>
17
#include <sys/un.h>
18
#include <unistd.h>
19
#include <errno.h>
20

    
21
#include "nest/bird.h"
22
#include "lib/lists.h"
23
#include "lib/resource.h"
24
#include "lib/timer.h"
25
#include "lib/socket.h"
26
#include "lib/event.h"
27
#include "lib/string.h"
28
#include "nest/iface.h"
29

    
30
#include "lib/unix.h"
31
#include "lib/sysio.h"
32

    
33
/*
34
 *        Tracked Files
35
 */
36

    
37
struct rfile {
38
  resource r;
39
  FILE *f;
40
};
41

    
42
static void
43
rf_free(resource *r)
44
{
45
  struct rfile *a = (struct rfile *) r;
46

    
47
  fclose(a->f);
48
}
49

    
50
static void
51
rf_dump(resource *r)
52
{
53
  struct rfile *a = (struct rfile *) r;
54

    
55
  debug("(FILE *%p)\n", a->f);
56
}
57

    
58
static struct resclass rf_class = {
59
  "FILE",
60
  sizeof(struct rfile),
61
  rf_free,
62
  rf_dump
63
};
64

    
65
void *
66
tracked_fopen(pool *p, char *name, char *mode)
67
{
68
  FILE *f = fopen(name, mode);
69

    
70
  if (f)
71
    {
72
      struct rfile *r = ralloc(p, &rf_class);
73
      r->f = f;
74
    }
75
  return f;
76
}
77

    
78
/**
79
 * DOC: Timers
80
 *
81
 * Timers are resources which represent a wish of a module to call
82
 * a function at the specified time. The platform dependent code
83
 * doesn't guarantee exact timing, only that a timer function
84
 * won't be called before the requested time.
85
 *
86
 * In BIRD, time is represented by values of the &bird_clock_t type
87
 * which are integral numbers interpreted as a relative number of seconds since
88
 * some fixed time point in past. The current time can be read
89
 * from variable @now with reasonable accuracy and is monotonic. There is also
90
 * a current 'absolute' time in variable @now_real reported by OS.
91
 *
92
 * Each timer is described by a &timer structure containing a pointer
93
 * to the handler function (@hook), data private to this function (@data),
94
 * time the function should be called at (@expires, 0 for inactive timers),
95
 * for the other fields see |timer.h|.
96
 */
97

    
98
#define NEAR_TIMER_LIMIT 4
99

    
100
static list near_timers, far_timers;
101
static bird_clock_t first_far_timer = TIME_INFINITY;
102

    
103
bird_clock_t now, now_real;
104

    
105
static void
106
update_times_plain(void)
107
{
108
  bird_clock_t new_time = time(NULL);
109
  int delta = new_time - now_real;
110

    
111
  if ((delta >= 0) && (delta < 60))
112
    now += delta;
113
  else if (now_real != 0)
114
   log(L_WARN "Time jump, delta %d s", delta);
115

    
116
  now_real = new_time;
117
}
118

    
119
static void
120
update_times_gettime(void)
121
{
122
  struct timespec ts;
123
  int rv;
124

    
125
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
126
  if (rv != 0)
127
    die("clock_gettime: %m");
128

    
129
  if (ts.tv_sec != now) {
130
    if (ts.tv_sec < now)
131
      log(L_ERR "Monotonic timer is broken");
132

    
133
    now = ts.tv_sec;
134
    now_real = time(NULL);
135
  }
136
}
137

    
138
static int clock_monotonic_available;
139

    
140
static inline void
141
update_times(void)
142
{
143
  if (clock_monotonic_available)
144
    update_times_gettime();
145
  else
146
    update_times_plain();
147
}
148

    
149
static inline void
150
init_times(void)
151
{
152
 struct timespec ts;
153
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
154
 if (!clock_monotonic_available)
155
   log(L_WARN "Monotonic timer is missing");
156
}
157

    
158

    
159
static void
160
tm_free(resource *r)
161
{
162
  timer *t = (timer *) r;
163

    
164
  tm_stop(t);
165
}
166

    
167
static void
168
tm_dump(resource *r)
169
{
170
  timer *t = (timer *) r;
171

    
172
  debug("(code %p, data %p, ", t->hook, t->data);
173
  if (t->randomize)
174
    debug("rand %d, ", t->randomize);
175
  if (t->recurrent)
176
    debug("recur %d, ", t->recurrent);
177
  if (t->expires)
178
    debug("expires in %d sec)\n", t->expires - now);
179
  else
180
    debug("inactive)\n");
181
}
182

    
183
static struct resclass tm_class = {
184
  "Timer",
185
  sizeof(timer),
186
  tm_free,
187
  tm_dump
188
};
189

    
190
/**
191
 * tm_new - create a timer
192
 * @p: pool
193
 *
194
 * This function creates a new timer resource and returns
195
 * a pointer to it. To use the timer, you need to fill in
196
 * the structure fields and call tm_start() to start timing.
197
 */
198
timer *
199
tm_new(pool *p)
200
{
201
  timer *t = ralloc(p, &tm_class);
202
  t->hook = NULL;
203
  t->data = NULL;
204
  t->randomize = 0;
205
  t->expires = 0;
206
  return t;
207
}
208

    
209
static inline void
210
tm_insert_near(timer *t)
211
{
212
  node *n = HEAD(near_timers);
213

    
214
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
215
    n = n->next;
216
  insert_node(&t->n, n->prev);
217
}
218

    
219
/**
220
 * tm_start - start a timer
221
 * @t: timer
222
 * @after: number of seconds the timer should be run after
223
 *
224
 * This function schedules the hook function of the timer to
225
 * be called after @after seconds. If the timer has been already
226
 * started, it's @expire time is replaced by the new value.
227
 *
228
 * You can have set the @randomize field of @t, the timeout
229
 * will be increased by a random number of seconds chosen
230
 * uniformly from range 0 .. @randomize.
231
 *
232
 * You can call tm_start() from the handler function of the timer
233
 * to request another run of the timer. Also, you can set the @recurrent
234
 * field to have the timer re-added automatically with the same timeout.
235
 */
236
void
237
tm_start(timer *t, unsigned after)
238
{
239
  bird_clock_t when;
240

    
241
  if (t->randomize)
242
    after += random() % (t->randomize + 1);
243
  when = now + after;
244
  if (t->expires == when)
245
    return;
246
  if (t->expires)
247
    rem_node(&t->n);
248
  t->expires = when;
249
  if (after <= NEAR_TIMER_LIMIT)
250
    tm_insert_near(t);
251
  else
252
    {
253
      if (!first_far_timer || first_far_timer > when)
254
        first_far_timer = when;
255
      add_tail(&far_timers, &t->n);
256
    }
257
}
258

    
259
/**
260
 * tm_stop - stop a timer
261
 * @t: timer
262
 *
263
 * This function stops a timer. If the timer is already stopped,
264
 * nothing happens.
265
 */
266
void
267
tm_stop(timer *t)
268
{
269
  if (t->expires)
270
    {
271
      rem_node(&t->n);
272
      t->expires = 0;
273
    }
274
}
275

    
276
static void
277
tm_dump_them(char *name, list *l)
278
{
279
  node *n;
280
  timer *t;
281

    
282
  debug("%s timers:\n", name);
283
  WALK_LIST(n, *l)
284
    {
285
      t = SKIP_BACK(timer, n, n);
286
      debug("%p ", t);
287
      tm_dump(&t->r);
288
    }
289
  debug("\n");
290
}
291

    
292
void
293
tm_dump_all(void)
294
{
295
  tm_dump_them("Near", &near_timers);
296
  tm_dump_them("Far", &far_timers);
297
}
298

    
299
static inline time_t
300
tm_first_shot(void)
301
{
302
  time_t x = first_far_timer;
303

    
304
  if (!EMPTY_LIST(near_timers))
305
    {
306
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
307
      if (t->expires < x)
308
        x = t->expires;
309
    }
310
  return x;
311
}
312

    
313
static void
314
tm_shot(void)
315
{
316
  timer *t;
317
  node *n, *m;
318

    
319
  if (first_far_timer <= now)
320
    {
321
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
322
      first_far_timer = TIME_INFINITY;
323
      n = HEAD(far_timers);
324
      while (m = n->next)
325
        {
326
          t = SKIP_BACK(timer, n, n);
327
          if (t->expires <= limit)
328
            {
329
              rem_node(n);
330
              tm_insert_near(t);
331
            }
332
          else if (t->expires < first_far_timer)
333
            first_far_timer = t->expires;
334
          n = m;
335
        }
336
    }
337
  while ((n = HEAD(near_timers)) -> next)
338
    {
339
      int delay;
340
      t = SKIP_BACK(timer, n, n);
341
      if (t->expires > now)
342
        break;
343
      rem_node(n);
344
      delay = t->expires - now;
345
      t->expires = 0;
346
      if (t->recurrent)
347
        {
348
          int i = t->recurrent - delay;
349
          if (i < 0)
350
            i = 0;
351
          tm_start(t, i);
352
        }
353
      t->hook(t);
354
    }
355
}
356

    
357
/**
358
 * tm_parse_datetime - parse a date and time
359
 * @x: datetime string
360
 *
361
 * tm_parse_datetime() takes a textual representation of
362
 * a date and time (dd-mm-yyyy hh:mm:ss)
363
 * and converts it to the corresponding value of type &bird_clock_t.
364
 */
365
bird_clock_t
366
tm_parse_datetime(char *x)
367
{
368
  struct tm tm;
369
  int n;
370
  time_t t;
371

    
372
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
373
    return tm_parse_date(x);
374
  tm.tm_mon--;
375
  tm.tm_year -= 1900;
376
  t = mktime(&tm);
377
  if (t == (time_t) -1)
378
    return 0;
379
  return t;
380
}
381
/**
382
 * tm_parse_date - parse a date
383
 * @x: date string
384
 *
385
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
386
 * and converts it to the corresponding value of type &bird_clock_t.
387
 */
388
bird_clock_t
389
tm_parse_date(char *x)
390
{
391
  struct tm tm;
392
  int n;
393
  time_t t;
394

    
395
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
396
    return 0;
397
  tm.tm_mon--;
398
  tm.tm_year -= 1900;
399
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
400
  t = mktime(&tm);
401
  if (t == (time_t) -1)
402
    return 0;
403
  return t;
404
}
405

    
406
/**
407
 * tm_format_date - convert date to textual representation
408
 * @x: destination buffer of size %TM_DATE_BUFFER_SIZE
409
 * @t: time
410
 *
411
 * This function formats the given relative time value @t to a textual
412
 * date representation (dd-mm-yyyy) in real time..
413
 */
414
void
415
tm_format_date(char *x, bird_clock_t t)
416
{
417
  struct tm *tm;
418

    
419
  tm = localtime(&t);
420
  bsprintf(x, "%02d-%02d-%04d", tm->tm_mday, tm->tm_mon+1, tm->tm_year+1900);
421
}
422

    
423
/**
424
 * tm_format_datetime - convert date and time to textual representation
425
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
426
 * @t: time
427
 *
428
 * This function formats the given relative time value @t to a textual
429
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
430
 */
431
void
432
tm_format_datetime(char *x, bird_clock_t t)
433
{
434
  struct tm *tm;
435
  bird_clock_t delta = now - t;
436
  t = now_real - delta;
437
  tm = localtime(&t);
438
  if (strftime(x, TM_DATETIME_BUFFER_SIZE, "%d-%m-%Y %H:%M:%S", tm) == TM_DATETIME_BUFFER_SIZE)
439
    strcpy(x, "<too-long>");
440
}
441

    
442
/**
443
 * tm_format_reltime - convert date and time to relative textual representation
444
 * @x: destination buffer of size %TM_RELTIME_BUFFER_SIZE
445
 * @t: time
446
 *
447
 * This function formats the given relative time value @t to a short
448
 * textual representation in real time, relative to the current time.
449
 */
450
void
451
tm_format_reltime(char *x, bird_clock_t t)
452
{
453
  struct tm *tm;
454
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
455

    
456
  bird_clock_t delta = now - t;
457
  t = now_real - delta;
458
  tm = localtime(&t);
459
  if (delta < 20*3600)
460
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
461
  else if (delta < 360*86400)
462
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
463
  else
464
    bsprintf(x, "%d", tm->tm_year+1900);
465
}
466

    
467
/**
468
 * DOC: Sockets
469
 *
470
 * Socket resources represent network connections. Their data structure (&socket)
471
 * contains a lot of fields defining the exact type of the socket, the local and
472
 * remote addresses and ports, pointers to socket buffers and finally pointers to
473
 * hook functions to be called when new data have arrived to the receive buffer
474
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
475
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
476
 *
477
 * Freeing of sockets from inside socket hooks is perfectly safe.
478
 */
479

    
480
#ifndef SOL_IP
481
#define SOL_IP IPPROTO_IP
482
#endif
483

    
484
#ifndef SOL_IPV6
485
#define SOL_IPV6 IPPROTO_IPV6
486
#endif
487

    
488
#ifndef IPV6_ADD_MEMBERSHIP
489
#define IPV6_ADD_MEMBERSHIP IP_ADD_MEMBERSHIP
490
#endif
491

    
492
static list sock_list;
493
static struct birdsock *current_sock;
494
static int sock_recalc_fdsets_p;
495

    
496
static inline sock *
497
sk_next(sock *s)
498
{
499
  if (!s->n.next->next)
500
    return NULL;
501
  else
502
    return SKIP_BACK(sock, n, s->n.next);
503
}
504

    
505
static void
506
sk_alloc_bufs(sock *s)
507
{
508
  if (!s->rbuf && s->rbsize)
509
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
510
  s->rpos = s->rbuf;
511
  if (!s->tbuf && s->tbsize)
512
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
513
  s->tpos = s->ttx = s->tbuf;
514
}
515

    
516
static void
517
sk_free_bufs(sock *s)
518
{
519
  if (s->rbuf_alloc)
520
    {
521
      xfree(s->rbuf_alloc);
522
      s->rbuf = s->rbuf_alloc = NULL;
523
    }
524
  if (s->tbuf_alloc)
525
    {
526
      xfree(s->tbuf_alloc);
527
      s->tbuf = s->tbuf_alloc = NULL;
528
    }
529
}
530

    
531
static void
532
sk_free(resource *r)
533
{
534
  sock *s = (sock *) r;
535

    
536
  sk_free_bufs(s);
537
  if (s->fd >= 0)
538
    {
539
      close(s->fd);
540
      if (s == current_sock)
541
        current_sock = sk_next(s);
542
      rem_node(&s->n);
543
      sock_recalc_fdsets_p = 1;
544
    }
545
}
546

    
547
void
548
sk_reallocate(sock *s)
549
{
550
  sk_free_bufs(s);
551
  sk_alloc_bufs(s);
552
}
553

    
554
static void
555
sk_dump(resource *r)
556
{
557
  sock *s = (sock *) r;
558
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
559

    
560
  debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
561
        sk_type_names[s->type],
562
        s->data,
563
        s->saddr,
564
        s->sport,
565
        s->daddr,
566
        s->dport,
567
        s->tos,
568
        s->ttl,
569
        s->iface ? s->iface->name : "none");
570
}
571

    
572
static struct resclass sk_class = {
573
  "Socket",
574
  sizeof(sock),
575
  sk_free,
576
  sk_dump
577
};
578

    
579
/**
580
 * sk_new - create a socket
581
 * @p: pool
582
 *
583
 * This function creates a new socket resource. If you want to use it,
584
 * you need to fill in all the required fields of the structure and
585
 * call sk_open() to do the actual opening of the socket.
586
 */
587
sock *
588
sk_new(pool *p)
589
{
590
  sock *s = ralloc(p, &sk_class);
591
  s->pool = p;
592
  s->data = NULL;
593
  s->saddr = s->daddr = IPA_NONE;
594
  s->sport = s->dport = 0;
595
  s->tos = s->ttl = -1;
596
  s->iface = NULL;
597
  s->rbuf = NULL;
598
  s->rx_hook = NULL;
599
  s->rbsize = 0;
600
  s->tbuf = NULL;
601
  s->tx_hook = NULL;
602
  s->tbsize = 0;
603
  s->err_hook = NULL;
604
  s->fd = -1;
605
  s->rbuf_alloc = s->tbuf_alloc = NULL;
606
  s->password = NULL;
607
  return s;
608
}
609

    
610
static void
611
sk_insert(sock *s)
612
{
613
  add_tail(&sock_list, &s->n);
614
  sock_recalc_fdsets_p = 1;
615
}
616

    
617
#ifdef IPV6
618

    
619
void
620
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
621
{
622
  memset (sa, 0, sizeof (struct sockaddr_in6));
623
  sa->sin6_family = AF_INET6;
624
  sa->sin6_port = htons(port);
625
  sa->sin6_flowinfo = 0;
626
#ifdef HAVE_SIN_LEN
627
  sa->sin6_len = sizeof(struct sockaddr_in6);
628
#endif
629
  set_inaddr(&sa->sin6_addr, a);
630
}
631

    
632
void
633
get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check)
634
{
635
  if (check && sa->sin6_family != AF_INET6)
636
    bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
637
  if (port)
638
    *port = ntohs(sa->sin6_port);
639
  memcpy(a, &sa->sin6_addr, sizeof(*a));
640
  ipa_ntoh(*a);
641
}
642

    
643
#else
644

    
645
void
646
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
647
{
648
  memset (sa, 0, sizeof (struct sockaddr_in));
649
  sa->sin_family = AF_INET;
650
  sa->sin_port = htons(port);
651
#ifdef HAVE_SIN_LEN
652
  sa->sin_len = sizeof(struct sockaddr_in);
653
#endif
654
  set_inaddr(&sa->sin_addr, a);
655
}
656

    
657
void
658
get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check)
659
{
660
  if (check && sa->sin_family != AF_INET)
661
    bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
662
  if (port)
663
    *port = ntohs(sa->sin_port);
664
  memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
665
  ipa_ntoh(*a);
666
}
667

    
668
#endif
669

    
670
static char *
671
sk_set_ttl_int(sock *s)
672
{
673
  int one = 1;
674
#ifdef IPV6
675
  if (s->type != SK_UDP_MC && s->type != SK_IP_MC &&
676
      setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
677
    return "IPV6_UNICAST_HOPS";
678
#else
679
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
680
    return "IP_TTL";
681
#ifdef CONFIG_UNIX_DONTROUTE
682
  if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
683
    return "SO_DONTROUTE";
684
#endif 
685
#endif
686
  return NULL;
687
}
688

    
689
#define ERR(x) do { err = x; goto bad; } while(0)
690
#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
691

    
692
static char *
693
sk_setup(sock *s)
694
{
695
  int fd = s->fd;
696
  char *err;
697

    
698
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
699
    ERR("fcntl(O_NONBLOCK)");
700
  if (s->type == SK_UNIX)
701
    return NULL;
702
#ifndef IPV6
703
  if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
704
    WARN("IP_TOS");
705
#endif
706
  
707
  if (s->ttl >= 0)
708
    err = sk_set_ttl_int(s);
709
  else
710
    err = NULL;
711

    
712
bad:
713
  return err;
714
}
715

    
716
/**
717
 * sk_set_ttl - set TTL for given socket.
718
 * @s: socket
719
 * @ttl: TTL value
720
 *
721
 * Set TTL for already opened connections when TTL was not set before.
722
 * Useful for accepted connections when different ones should have 
723
 * different TTL.
724
 *
725
 * Result: 0 for success, -1 for an error.
726
 */
727

    
728
int
729
sk_set_ttl(sock *s, int ttl)
730
{
731
  char *err;
732

    
733
  s->ttl = ttl;
734
  if (err = sk_set_ttl_int(s))
735
    log(L_ERR "sk_set_ttl: %s: %m", err);
736

    
737
  return (err ? -1 : 0);
738
}
739

    
740

    
741
/* FIXME: check portability  */
742

    
743
static int
744
sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd)
745
{
746
  struct tcp_md5sig md5;
747

    
748
  memset(&md5, 0, sizeof(md5));
749
  memcpy(&md5.tcpm_addr, (struct sockaddr *) sa, sizeof(*sa));
750

    
751
  if (passwd)
752
    {
753
      int len = strlen(passwd);
754

    
755
      if (len > TCP_MD5SIG_MAXKEYLEN)
756
        {
757
          log(L_ERR "MD5 password too long");
758
          return -1;
759
        }
760

    
761
      md5.tcpm_keylen = len;
762
      memcpy(&md5.tcpm_key, passwd, len);
763
    }
764

    
765
  int rv = setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &md5, sizeof(md5));
766

    
767
  if (rv < 0) 
768
    {
769
      if (errno == ENOPROTOOPT)
770
        log(L_ERR "Kernel does not support TCP MD5 signatures");
771
      else
772
        log(L_ERR "sk_set_md5_auth_int: setsockopt: %m");
773
    }
774

    
775
  return rv;
776
}
777

    
778
/**
779
 * sk_set_md5_auth - add / remove MD5 security association for given socket.
780
 * @s: socket
781
 * @a: IP address of the other side
782
 * @passwd: password used for MD5 authentication
783
 *
784
 * In TCP MD5 handling code in kernel, there is a set of pairs
785
 * (address, password) used to choose password according to
786
 * address of the other side. This function is useful for
787
 * listening socket, for active sockets it is enough to set
788
 * s->password field.
789
 *
790
 * When called with passwd != NULL, the new pair is added,
791
 * When called with passwd == NULL, the existing pair is removed.
792
 *
793
 * Result: 0 for success, -1 for an error.
794
 */
795

    
796
int
797
sk_set_md5_auth(sock *s, ip_addr a, char *passwd)
798
{
799
  sockaddr sa;
800
  fill_in_sockaddr(&sa, a, 0);
801
  return sk_set_md5_auth_int(s, &sa, passwd);
802
}
803

    
804

    
805
static void
806
sk_tcp_connected(sock *s)
807
{
808
  s->type = SK_TCP;
809
  sk_alloc_bufs(s);
810
  s->tx_hook(s);
811
}
812

    
813
static int
814
sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
815
{
816
  int fd = accept(s->fd, sa, &al);
817
  if (fd >= 0)
818
    {
819
      sock *t = sk_new(s->pool);
820
      char *err;
821
      t->type = type;
822
      t->fd = fd;
823
      t->ttl = s->ttl;
824
      t->tos = s->tos;
825
      t->rbsize = s->rbsize;
826
      t->tbsize = s->tbsize;
827
      if (type == SK_TCP)
828
        get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1);
829
      sk_insert(t);
830
      if (err = sk_setup(t))
831
        {
832
          log(L_ERR "Incoming connection: %s: %m", err);
833
          rfree(t);
834
          return 1;
835
        }
836
      sk_alloc_bufs(t);
837
      s->rx_hook(t, 0);
838
      return 1;
839
    }
840
  else if (errno != EINTR && errno != EAGAIN)
841
    {
842
      log(L_ERR "accept: %m");
843
      s->err_hook(s, errno);
844
    }
845
  return 0;
846
}
847

    
848
/**
849
 * sk_open - open a socket
850
 * @s: socket
851
 *
852
 * This function takes a socket resource created by sk_new() and
853
 * initialized by the user and binds a corresponding network connection
854
 * to it.
855
 *
856
 * Result: 0 for success, -1 for an error.
857
 */
858
int
859
sk_open(sock *s)
860
{
861
  int fd;
862
  sockaddr sa;
863
  int one = 1;
864
  int type = s->type;
865
  int has_src = ipa_nonzero(s->saddr) || s->sport;
866
  char *err;
867

    
868
  switch (type)
869
    {
870
    case SK_TCP_ACTIVE:
871
      s->ttx = "";                        /* Force s->ttx != s->tpos */
872
      /* Fall thru */
873
    case SK_TCP_PASSIVE:
874
      fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
875
      break;
876
    case SK_UDP:
877
    case SK_UDP_MC:
878
      fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
879
      break;
880
    case SK_IP:
881
    case SK_IP_MC:
882
      fd = socket(BIRD_PF, SOCK_RAW, s->dport);
883
      break;
884
    case SK_MAGIC:
885
      fd = s->fd;
886
      break;
887
    default:
888
      bug("sk_open() called for invalid sock type %d", type);
889
    }
890
  if (fd < 0)
891
    die("sk_open: socket: %m");
892
  s->fd = fd;
893

    
894
  if (err = sk_setup(s))
895
    goto bad;
896

    
897
  switch (type)
898
    {
899
    case SK_UDP:
900
    case SK_IP:
901
      if (s->iface)                        /* It's a broadcast socket */
902
#ifdef IPV6
903
        bug("IPv6 has no broadcasts");
904
#else
905
        if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0)
906
          ERR("SO_BROADCAST");
907
#endif
908
      break;
909
    case SK_UDP_MC:
910
    case SK_IP_MC:
911
      {
912
#ifdef IPV6
913
        /* Fortunately, IPv6 socket interface is recent enough and therefore standardized */
914
        ASSERT(s->iface && s->iface->addr);
915
        if (ipa_nonzero(s->daddr))
916
          {
917
            int t = s->iface->index;
918
            int zero = 0;
919
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
920
              ERR("IPV6_MULTICAST_HOPS");
921
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
922
              ERR("IPV6_MULTICAST_LOOP");
923
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_IF, &t, sizeof(t)) < 0)
924
              ERR("IPV6_MULTICAST_IF");
925
          }
926
        if (has_src)
927
          {
928
            struct ipv6_mreq mreq;
929
            set_inaddr(&mreq.ipv6mr_multiaddr, s->daddr);
930
#ifdef CONFIG_IPV6_GLIBC_20
931
            mreq.ipv6mr_ifindex = s->iface->index;
932
#else
933
            mreq.ipv6mr_interface = s->iface->index;
934
#endif /* CONFIG_IPV6_GLIBC_20 */
935
            if (setsockopt(fd, SOL_IPV6, IPV6_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
936
              ERR("IPV6_ADD_MEMBERSHIP");
937
          }
938
#else
939
        /* With IPv4 there are zillions of different socket interface variants. Ugh. */
940
        ASSERT(s->iface && s->iface->addr);
941
        if (err = sysio_mcast_join(s))
942
          goto bad;
943
#endif /* IPV6 */
944
      break;
945
      }
946
    }
947
  if (has_src)
948
    {
949
      int port;
950

    
951
      if (type == SK_IP || type == SK_IP_MC)
952
        port = 0;
953
      else
954
        {
955
          port = s->sport;
956
          if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
957
            ERR("SO_REUSEADDR");
958
        }
959
      fill_in_sockaddr(&sa, s->saddr, port);
960
#ifdef CONFIG_SKIP_MC_BIND
961
      if (type == SK_IP && bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
962
#else
963
      if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
964
#endif
965
        ERR("bind");
966
    }
967
  fill_in_sockaddr(&sa, s->daddr, s->dport);
968

    
969
  if (s->password)
970
    {
971
      int rv = sk_set_md5_auth_int(s, &sa, s->password);
972
      if (rv < 0)
973
        goto bad_no_log;
974
    }
975

    
976
  switch (type)
977
    {
978
    case SK_TCP_ACTIVE:
979
      if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
980
        sk_tcp_connected(s);
981
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
982
               errno != ECONNREFUSED && errno != EHOSTUNREACH)
983
        ERR("connect");
984
      break;
985
    case SK_TCP_PASSIVE:
986
      if (listen(fd, 8))
987
        ERR("listen");
988
      break;
989
    case SK_MAGIC:
990
      break;
991
    default:
992
      sk_alloc_bufs(s);
993
#ifdef IPV6
994
#ifdef IPV6_MTU_DISCOVER
995
      {
996
        int dont = IPV6_PMTUDISC_DONT;
997
        if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
998
          ERR("IPV6_MTU_DISCOVER");
999
      }
1000
#endif
1001
#else
1002
#ifdef IP_PMTUDISC
1003
      {
1004
        int dont = IP_PMTUDISC_DONT;
1005
        if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
1006
          ERR("IP_PMTUDISC");
1007
      }
1008
#endif
1009
#endif
1010
    }
1011

    
1012
  sk_insert(s);
1013
  return 0;
1014

    
1015
bad:
1016
  log(L_ERR "sk_open: %s: %m", err);
1017
bad_no_log:
1018
  close(fd);
1019
  s->fd = -1;
1020
  return -1;
1021
}
1022

    
1023
int
1024
sk_open_unix(sock *s, char *name)
1025
{
1026
  int fd;
1027
  struct sockaddr_un sa;
1028
  char *err;
1029

    
1030
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1031
  if (fd < 0)
1032
    die("sk_open_unix: socket: %m");
1033
  s->fd = fd;
1034
  if (err = sk_setup(s))
1035
    goto bad;
1036
  unlink(name);
1037
 
1038
  if (strlen(name) >= sizeof(sa.sun_path))
1039
    die("sk_open_unix: path too long");
1040

    
1041
  sa.sun_family = AF_UNIX;
1042
  strcpy(sa.sun_path, name);
1043
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1044
    ERR("bind");
1045
  if (listen(fd, 8))
1046
    ERR("listen");
1047
  sk_insert(s);
1048
  return 0;
1049

    
1050
bad:
1051
  log(L_ERR "sk_open_unix: %s: %m", err);
1052
  close(fd);
1053
  s->fd = -1;
1054
  return -1;
1055
}
1056

    
1057
static int
1058
sk_maybe_write(sock *s)
1059
{
1060
  int e;
1061

    
1062
  switch (s->type)
1063
    {
1064
    case SK_TCP:
1065
    case SK_MAGIC:
1066
    case SK_UNIX:
1067
      while (s->ttx != s->tpos)
1068
        {
1069
          e = write(s->fd, s->ttx, s->tpos - s->ttx);
1070
          if (e < 0)
1071
            {
1072
              if (errno != EINTR && errno != EAGAIN)
1073
                {
1074
                  s->ttx = s->tpos;        /* empty tx buffer */
1075
                  s->err_hook(s, errno);
1076
                  return -1;
1077
                }
1078
              return 0;
1079
            }
1080
          s->ttx += e;
1081
        }
1082
      s->ttx = s->tpos = s->tbuf;
1083
      return 1;
1084
    case SK_UDP:
1085
    case SK_UDP_MC:
1086
    case SK_IP:
1087
    case SK_IP_MC:
1088
      {
1089
        sockaddr sa;
1090

    
1091
        if (s->tbuf == s->tpos)
1092
          return 1;
1093
        fill_in_sockaddr(&sa, s->faddr, s->fport);
1094

    
1095
        e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa));
1096
        if (e < 0)
1097
          {
1098
            if (errno != EINTR && errno != EAGAIN)
1099
              {
1100
                s->ttx = s->tpos;        /* empty tx buffer */
1101
                s->err_hook(s, errno);
1102
                return -1;
1103
              }
1104
            return 0;
1105
          }
1106
        s->tpos = s->tbuf;
1107
        return 1;
1108
      }
1109
    default:
1110
      bug("sk_maybe_write: unknown socket type %d", s->type);
1111
    }
1112
}
1113

    
1114
/**
1115
 * sk_send - send data to a socket
1116
 * @s: socket
1117
 * @len: number of bytes to send
1118
 *
1119
 * This function sends @len bytes of data prepared in the
1120
 * transmit buffer of the socket @s to the network connection.
1121
 * If the packet can be sent immediately, it does so and returns
1122
 * 1, else it queues the packet for later processing, returns 0
1123
 * and calls the @tx_hook of the socket when the tranmission
1124
 * takes place.
1125
 */
1126
int
1127
sk_send(sock *s, unsigned len)
1128
{
1129
  s->faddr = s->daddr;
1130
  s->fport = s->dport;
1131
  s->ttx = s->tbuf;
1132
  s->tpos = s->tbuf + len;
1133
  return sk_maybe_write(s);
1134
}
1135

    
1136
/**
1137
 * sk_send_to - send data to a specific destination
1138
 * @s: socket
1139
 * @len: number of bytes to send
1140
 * @addr: IP address to send the packet to
1141
 * @port: port to send the packet to
1142
 *
1143
 * This is a sk_send() replacement for connection-less packet sockets
1144
 * which allows destination of the packet to be chosen dynamically.
1145
 */
1146
int
1147
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1148
{
1149
  s->faddr = addr;
1150
  s->fport = port;
1151
  s->ttx = s->tbuf;
1152
  s->tpos = s->tbuf + len;
1153
  return sk_maybe_write(s);
1154
}
1155

    
1156
static int
1157
sk_read(sock *s)
1158
{
1159
  switch (s->type)
1160
    {
1161
    case SK_TCP_PASSIVE:
1162
      {
1163
        sockaddr sa;
1164
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1165
      }
1166
    case SK_UNIX_PASSIVE:
1167
      {
1168
        struct sockaddr_un sa;
1169
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
1170
      }
1171
    case SK_TCP:
1172
    case SK_UNIX:
1173
      {
1174
        int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1175

    
1176
        if (c < 0)
1177
          {
1178
            if (errno != EINTR && errno != EAGAIN)
1179
              s->err_hook(s, errno);
1180
          }
1181
        else if (!c)
1182
          s->err_hook(s, 0);
1183
        else
1184
          {
1185
            s->rpos += c;
1186
            if (s->rx_hook(s, s->rpos - s->rbuf))
1187
              {
1188
                /* We need to be careful since the socket could have been deleted by the hook */
1189
                if (current_sock == s)
1190
                  s->rpos = s->rbuf;
1191
              }
1192
            return 1;
1193
          }
1194
        return 0;
1195
      }
1196
    case SK_MAGIC:
1197
      return s->rx_hook(s, 0);
1198
    default:
1199
      {
1200
        sockaddr sa;
1201
        int al = sizeof(sa);
1202
        int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al);
1203

    
1204
        if (e < 0)
1205
          {
1206
            if (errno != EINTR && errno != EAGAIN)
1207
              s->err_hook(s, errno);
1208
            return 0;
1209
          }
1210
        s->rpos = s->rbuf + e;
1211
        get_sockaddr(&sa, &s->faddr, &s->fport, 1);
1212
        s->rx_hook(s, e);
1213
        return 1;
1214
      }
1215
    }
1216
}
1217

    
1218
static int
1219
sk_write(sock *s)
1220
{
1221
  switch (s->type)
1222
    {
1223
    case SK_TCP_ACTIVE:
1224
      {
1225
        sockaddr sa;
1226
        fill_in_sockaddr(&sa, s->daddr, s->dport);
1227
        if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
1228
          sk_tcp_connected(s);
1229
        else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1230
          s->err_hook(s, errno);
1231
        return 0;
1232
      }
1233
    default:
1234
      if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1235
        {
1236
          s->tx_hook(s);
1237
          return 1;
1238
        }
1239
      return 0;
1240
    }
1241
}
1242

    
1243
void
1244
sk_dump_all(void)
1245
{
1246
  node *n;
1247
  sock *s;
1248

    
1249
  debug("Open sockets:\n");
1250
  WALK_LIST(n, sock_list)
1251
    {
1252
      s = SKIP_BACK(sock, n, n);
1253
      debug("%p ", s);
1254
      sk_dump(&s->r);
1255
    }
1256
  debug("\n");
1257
}
1258

    
1259
#undef ERR
1260
#undef WARN
1261

    
1262
/*
1263
 *        Main I/O Loop
1264
 */
1265

    
1266
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
1267
volatile int async_dump_flag;
1268

    
1269
void
1270
io_init(void)
1271
{
1272
  init_list(&near_timers);
1273
  init_list(&far_timers);
1274
  init_list(&sock_list);
1275
  init_list(&global_event_list);
1276
  krt_io_init();
1277
  init_times();
1278
  update_times();
1279
  srandom((int) now_real);
1280
}
1281

    
1282
void
1283
io_loop(void)
1284
{
1285
  fd_set rd, wr;
1286
  struct timeval timo;
1287
  time_t tout;
1288
  int hi, events;
1289
  sock *s;
1290
  node *n;
1291

    
1292
  sock_recalc_fdsets_p = 1;
1293
  for(;;)
1294
    {
1295
      events = ev_run_list(&global_event_list);
1296
      update_times();
1297
      tout = tm_first_shot();
1298
      if (tout <= now)
1299
        {
1300
          tm_shot();
1301
          continue;
1302
        }
1303
      timo.tv_sec = events ? 0 : tout - now;
1304
      timo.tv_usec = 0;
1305

    
1306
      if (sock_recalc_fdsets_p)
1307
        {
1308
          sock_recalc_fdsets_p = 0;
1309
          FD_ZERO(&rd);
1310
          FD_ZERO(&wr);
1311
        }
1312

    
1313
      hi = 0;
1314
      WALK_LIST(n, sock_list)
1315
        {
1316
          s = SKIP_BACK(sock, n, n);
1317
          if (s->rx_hook)
1318
            {
1319
              FD_SET(s->fd, &rd);
1320
              if (s->fd > hi)
1321
                hi = s->fd;
1322
            }
1323
          else
1324
            FD_CLR(s->fd, &rd);
1325
          if (s->tx_hook && s->ttx != s->tpos)
1326
            {
1327
              FD_SET(s->fd, &wr);
1328
              if (s->fd > hi)
1329
                hi = s->fd;
1330
            }
1331
          else
1332
            FD_CLR(s->fd, &wr);
1333
        }
1334

    
1335
      /*
1336
       * Yes, this is racy. But even if the signal comes before this test
1337
       * and entering select(), it gets caught on the next timer tick.
1338
       */
1339

    
1340
      if (async_config_flag)
1341
        {
1342
          async_config();
1343
          async_config_flag = 0;
1344
          continue;
1345
        }
1346
      if (async_dump_flag)
1347
        {
1348
          async_dump();
1349
          async_dump_flag = 0;
1350
          continue;
1351
        }
1352
      if (async_shutdown_flag)
1353
        {
1354
          async_shutdown();
1355
          async_shutdown_flag = 0;
1356
          continue;
1357
        }
1358

    
1359
      /* And finally enter select() to find active sockets */
1360

    
1361
      hi = select(hi+1, &rd, &wr, NULL, &timo);
1362
      if (hi < 0)
1363
        {
1364
          if (errno == EINTR || errno == EAGAIN)
1365
            continue;
1366
          die("select: %m");
1367
        }
1368
      if (hi)
1369
        {
1370
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));        /* guaranteed to be non-empty */
1371
          while (current_sock)
1372
            {
1373
              sock *s = current_sock;
1374
              int e;
1375
              if (FD_ISSET(s->fd, &rd) && s->rx_hook)
1376
                do
1377
                  {
1378
                    e = sk_read(s);
1379
                    if (s != current_sock)
1380
                      goto next;
1381
                  }
1382
                while (e && s->rx_hook);
1383
              if (FD_ISSET(s->fd, &wr))
1384
                do
1385
                  {
1386
                    e = sk_write(s);
1387
                    if (s != current_sock)
1388
                      goto next;
1389
                  }
1390
                while (e);
1391
              current_sock = sk_next(s);
1392
            next: ;
1393
            }
1394
        }
1395
    }
1396
}
1397

    
1398
void
1399
test_old_bird(char *path)
1400
{
1401
  int fd;
1402
  struct sockaddr_un sa;
1403

    
1404
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1405

    
1406
  if (fd < 0)
1407
    die("Cannot create socket: %m");
1408
  bzero(&sa, sizeof(sa));
1409
  sa.sun_family = AF_UNIX;
1410
  strcpy(sa.sun_path, path);
1411
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1412
    die("I found another BIRD running.");
1413
  close(fd);
1414
}
1415

    
1416