Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 1389f369

History | View | Annotate | Download (27.8 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <time.h>
13
#include <sys/time.h>
14
#include <sys/types.h>
15
#include <sys/socket.h>
16
#include <sys/fcntl.h>
17
#include <sys/un.h>
18
#include <unistd.h>
19
#include <errno.h>
20

    
21
#include "nest/bird.h"
22
#include "lib/lists.h"
23
#include "lib/resource.h"
24
#include "lib/timer.h"
25
#include "lib/socket.h"
26
#include "lib/event.h"
27
#include "lib/string.h"
28
#include "nest/iface.h"
29

    
30
#include "lib/unix.h"
31
#include "lib/sysio.h"
32

    
33
/*
34
 *        Tracked Files
35
 */
36

    
37
struct rfile {
38
  resource r;
39
  FILE *f;
40
};
41

    
42
static void
43
rf_free(resource *r)
44
{
45
  struct rfile *a = (struct rfile *) r;
46

    
47
  fclose(a->f);
48
}
49

    
50
static void
51
rf_dump(resource *r)
52
{
53
  struct rfile *a = (struct rfile *) r;
54

    
55
  debug("(FILE *%p)\n", a->f);
56
}
57

    
58
static struct resclass rf_class = {
59
  "FILE",
60
  sizeof(struct rfile),
61
  rf_free,
62
  rf_dump
63
};
64

    
65
void *
66
tracked_fopen(pool *p, char *name, char *mode)
67
{
68
  FILE *f = fopen(name, mode);
69

    
70
  if (f)
71
    {
72
      struct rfile *r = ralloc(p, &rf_class);
73
      r->f = f;
74
    }
75
  return f;
76
}
77

    
78
/**
79
 * DOC: Timers
80
 *
81
 * Timers are resources which represent a wish of a module to call
82
 * a function at the specified time. The platform dependent code
83
 * doesn't guarantee exact timing, only that a timer function
84
 * won't be called before the requested time.
85
 *
86
 * In BIRD, real time is represented by values of the &bird_clock_t type
87
 * which are integral numbers interpreted as a number of seconds since
88
 * a fixed (but platform dependent) epoch. The current time can be read
89
 * from a variable @now with reasonable accuracy.
90
 *
91
 * Each timer is described by a &timer structure containing a pointer
92
 * to the handler function (@hook), data private to this function (@data),
93
 * time the function should be called at (@expires, 0 for inactive timers),
94
 * for the other fields see |timer.h|.
95
 */
96

    
97
#define NEAR_TIMER_LIMIT 4
98

    
99
static list near_timers, far_timers;
100
static bird_clock_t first_far_timer = TIME_INFINITY;
101

    
102
bird_clock_t now;
103

    
104
static void
105
tm_free(resource *r)
106
{
107
  timer *t = (timer *) r;
108

    
109
  tm_stop(t);
110
}
111

    
112
static void
113
tm_dump(resource *r)
114
{
115
  timer *t = (timer *) r;
116

    
117
  debug("(code %p, data %p, ", t->hook, t->data);
118
  if (t->randomize)
119
    debug("rand %d, ", t->randomize);
120
  if (t->recurrent)
121
    debug("recur %d, ", t->recurrent);
122
  if (t->expires)
123
    debug("expires in %d sec)\n", t->expires - now);
124
  else
125
    debug("inactive)\n");
126
}
127

    
128
static struct resclass tm_class = {
129
  "Timer",
130
  sizeof(timer),
131
  tm_free,
132
  tm_dump
133
};
134

    
135
/**
136
 * tm_new - create a timer
137
 * @p: pool
138
 *
139
 * This function creates a new timer resource and returns
140
 * a pointer to it. To use the timer, you need to fill in
141
 * the structure fields and call tm_start() to start timing.
142
 */
143
timer *
144
tm_new(pool *p)
145
{
146
  timer *t = ralloc(p, &tm_class);
147
  t->hook = NULL;
148
  t->data = NULL;
149
  t->randomize = 0;
150
  t->expires = 0;
151
  return t;
152
}
153

    
154
static inline void
155
tm_insert_near(timer *t)
156
{
157
  node *n = HEAD(near_timers);
158

    
159
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
160
    n = n->next;
161
  insert_node(&t->n, n->prev);
162
}
163

    
164
/**
165
 * tm_start - start a timer
166
 * @t: timer
167
 * @after: number of seconds the timer should be run after
168
 *
169
 * This function schedules the hook function of the timer to
170
 * be called after @after seconds. If the timer has been already
171
 * started, it's @expire time is replaced by the new value.
172
 *
173
 * You can have set the @randomize field of @t, the timeout
174
 * will be increased by a random number of seconds chosen
175
 * uniformly from range 0 .. @randomize.
176
 *
177
 * You can call tm_start() from the handler function of the timer
178
 * to request another run of the timer. Also, you can set the @recurrent
179
 * field to have the timer re-added automatically with the same timeout.
180
 */
181
void
182
tm_start(timer *t, unsigned after)
183
{
184
  bird_clock_t when;
185

    
186
  if (t->randomize)
187
    after += random() % (t->randomize + 1);
188
  when = now + after;
189
  if (t->expires == when)
190
    return;
191
  if (t->expires)
192
    rem_node(&t->n);
193
  t->expires = when;
194
  if (after <= NEAR_TIMER_LIMIT)
195
    tm_insert_near(t);
196
  else
197
    {
198
      if (!first_far_timer || first_far_timer > when)
199
        first_far_timer = when;
200
      add_tail(&far_timers, &t->n);
201
    }
202
}
203

    
204
/**
205
 * tm_stop - stop a timer
206
 * @t: timer
207
 *
208
 * This function stops a timer. If the timer is already stopped,
209
 * nothing happens.
210
 */
211
void
212
tm_stop(timer *t)
213
{
214
  if (t->expires)
215
    {
216
      rem_node(&t->n);
217
      t->expires = 0;
218
    }
219
}
220

    
221
static void
222
tm_dump_them(char *name, list *l)
223
{
224
  node *n;
225
  timer *t;
226

    
227
  debug("%s timers:\n", name);
228
  WALK_LIST(n, *l)
229
    {
230
      t = SKIP_BACK(timer, n, n);
231
      debug("%p ", t);
232
      tm_dump(&t->r);
233
    }
234
  debug("\n");
235
}
236

    
237
void
238
tm_dump_all(void)
239
{
240
  tm_dump_them("Near", &near_timers);
241
  tm_dump_them("Far", &far_timers);
242
}
243

    
244
static inline time_t
245
tm_first_shot(void)
246
{
247
  time_t x = first_far_timer;
248

    
249
  if (!EMPTY_LIST(near_timers))
250
    {
251
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
252
      if (t->expires < x)
253
        x = t->expires;
254
    }
255
  return x;
256
}
257

    
258
static void
259
tm_shot(void)
260
{
261
  timer *t;
262
  node *n, *m;
263

    
264
  if (first_far_timer <= now)
265
    {
266
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
267
      first_far_timer = TIME_INFINITY;
268
      n = HEAD(far_timers);
269
      while (m = n->next)
270
        {
271
          t = SKIP_BACK(timer, n, n);
272
          if (t->expires <= limit)
273
            {
274
              rem_node(n);
275
              tm_insert_near(t);
276
            }
277
          else if (t->expires < first_far_timer)
278
            first_far_timer = t->expires;
279
          n = m;
280
        }
281
    }
282
  while ((n = HEAD(near_timers)) -> next)
283
    {
284
      int delay;
285
      t = SKIP_BACK(timer, n, n);
286
      if (t->expires > now)
287
        break;
288
      rem_node(n);
289
      delay = t->expires - now;
290
      t->expires = 0;
291
      if (t->recurrent)
292
        {
293
          int i = t->recurrent - delay;
294
          if (i < 0)
295
            i = 0;
296
          tm_start(t, i);
297
        }
298
      t->hook(t);
299
    }
300
}
301

    
302
/**
303
 * tm_parse_datetime - parse a date and time
304
 * @x: datetime string
305
 *
306
 * tm_parse_datetime() takes a textual representation of
307
 * a date and time (dd-mm-yyyy hh:mm:ss)
308
 * and converts it to the corresponding value of type &bird_clock_t.
309
 */
310
bird_clock_t
311
tm_parse_datetime(char *x)
312
{
313
  struct tm tm;
314
  int n;
315
  time_t t;
316

    
317
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
318
    return tm_parse_date(x);
319
  tm.tm_mon--;
320
  tm.tm_year -= 1900;
321
  t = mktime(&tm);
322
  if (t == (time_t) -1)
323
    return 0;
324
  return t;
325
}
326
/**
327
 * tm_parse_date - parse a date
328
 * @x: date string
329
 *
330
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
331
 * and converts it to the corresponding value of type &bird_clock_t.
332
 */
333
bird_clock_t
334
tm_parse_date(char *x)
335
{
336
  struct tm tm;
337
  int n;
338
  time_t t;
339

    
340
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
341
    return 0;
342
  tm.tm_mon--;
343
  tm.tm_year -= 1900;
344
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
345
  t = mktime(&tm);
346
  if (t == (time_t) -1)
347
    return 0;
348
  return t;
349
}
350

    
351
/**
352
 * tm_format_date - convert date to textual representation
353
 * @x: destination buffer of size %TM_DATE_BUFFER_SIZE
354
 * @t: time
355
 *
356
 * This function formats the given time value @t to a textual
357
 * date representation (dd-mm-yyyy).
358
 */
359
void
360
tm_format_date(char *x, bird_clock_t t)
361
{
362
  struct tm *tm;
363

    
364
  tm = localtime(&t);
365
  bsprintf(x, "%02d-%02d-%04d", tm->tm_mday, tm->tm_mon+1, tm->tm_year+1900);
366
}
367

    
368
/**
369
 * tm_format_datetime - convert date and time to textual representation
370
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
371
 * @t: time
372
 *
373
 * This function formats the given time value @t to a textual
374
 * date/time representation (dd-mm-yyyy hh:mm:ss).
375
 */
376
void
377
tm_format_datetime(char *x, bird_clock_t t)
378
{
379
  struct tm *tm;
380

    
381
  tm = localtime(&t);
382
  if (strftime(x, TM_DATETIME_BUFFER_SIZE, "%d-%m-%Y %H:%M:%S", tm) == TM_DATETIME_BUFFER_SIZE)
383
    strcpy(x, "<too-long>");
384
}
385

    
386
/**
387
 * tm_format_reltime - convert date and time to relative textual representation
388
 * @x: destination buffer of size %TM_RELTIME_BUFFER_SIZE
389
 * @t: time
390
 *
391
 * This function formats the given time value @t to a short
392
 * textual representation relative to the current time.
393
 */
394
void
395
tm_format_reltime(char *x, bird_clock_t t)
396
{
397
  struct tm *tm;
398
  bird_clock_t delta = (t < now) ? (now - t) : (t - now);
399
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
400

    
401
  tm = localtime(&t);
402
  if (delta < 20*3600)
403
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
404
  else if (delta < 360*86400)
405
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
406
  else
407
    bsprintf(x, "%d", tm->tm_year+1900);
408
}
409

    
410
/**
411
 * DOC: Sockets
412
 *
413
 * Socket resources represent network connections. Their data structure (&socket)
414
 * contains a lot of fields defining the exact type of the socket, the local and
415
 * remote addresses and ports, pointers to socket buffers and finally pointers to
416
 * hook functions to be called when new data have arrived to the receive buffer
417
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
418
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
419
 *
420
 * Freeing of sockets from inside socket hooks is perfectly safe.
421
 */
422

    
423
#ifndef SOL_IP
424
#define SOL_IP IPPROTO_IP
425
#endif
426

    
427
#ifndef SOL_IPV6
428
#define SOL_IPV6 IPPROTO_IPV6
429
#endif
430

    
431
#ifndef IPV6_ADD_MEMBERSHIP
432
#define IPV6_ADD_MEMBERSHIP IP_ADD_MEMBERSHIP
433
#endif
434

    
435
static list sock_list;
436
static struct birdsock *current_sock;
437
static int sock_recalc_fdsets_p;
438

    
439
static inline sock *
440
sk_next(sock *s)
441
{
442
  if (!s->n.next->next)
443
    return NULL;
444
  else
445
    return SKIP_BACK(sock, n, s->n.next);
446
}
447

    
448
static void
449
sk_alloc_bufs(sock *s)
450
{
451
  if (!s->rbuf && s->rbsize)
452
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
453
  s->rpos = s->rbuf;
454
  if (!s->tbuf && s->tbsize)
455
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
456
  s->tpos = s->ttx = s->tbuf;
457
}
458

    
459
static void
460
sk_free_bufs(sock *s)
461
{
462
  if (s->rbuf_alloc)
463
    {
464
      xfree(s->rbuf_alloc);
465
      s->rbuf = s->rbuf_alloc = NULL;
466
    }
467
  if (s->tbuf_alloc)
468
    {
469
      xfree(s->tbuf_alloc);
470
      s->tbuf = s->tbuf_alloc = NULL;
471
    }
472
}
473

    
474
static void
475
sk_free(resource *r)
476
{
477
  sock *s = (sock *) r;
478

    
479
  sk_free_bufs(s);
480
  if (s->fd >= 0)
481
    {
482
      close(s->fd);
483
      if (s == current_sock)
484
        current_sock = sk_next(s);
485
      rem_node(&s->n);
486
      sock_recalc_fdsets_p = 1;
487
    }
488
}
489

    
490
void
491
sk_reallocate(sock *s)
492
{
493
  sk_free_bufs(s);
494
  sk_alloc_bufs(s);
495
}
496

    
497
static void
498
sk_dump(resource *r)
499
{
500
  sock *s = (sock *) r;
501
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
502

    
503
  debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
504
        sk_type_names[s->type],
505
        s->data,
506
        s->saddr,
507
        s->sport,
508
        s->daddr,
509
        s->dport,
510
        s->tos,
511
        s->ttl,
512
        s->iface ? s->iface->name : "none");
513
}
514

    
515
static struct resclass sk_class = {
516
  "Socket",
517
  sizeof(sock),
518
  sk_free,
519
  sk_dump
520
};
521

    
522
/**
523
 * sk_new - create a socket
524
 * @p: pool
525
 *
526
 * This function creates a new socket resource. If you want to use it,
527
 * you need to fill in all the required fields of the structure and
528
 * call sk_open() to do the actual opening of the socket.
529
 */
530
sock *
531
sk_new(pool *p)
532
{
533
  sock *s = ralloc(p, &sk_class);
534
  s->pool = p;
535
  s->data = NULL;
536
  s->saddr = s->daddr = IPA_NONE;
537
  s->sport = s->dport = 0;
538
  s->tos = s->ttl = -1;
539
  s->iface = NULL;
540
  s->rbuf = NULL;
541
  s->rx_hook = NULL;
542
  s->rbsize = 0;
543
  s->tbuf = NULL;
544
  s->tx_hook = NULL;
545
  s->tbsize = 0;
546
  s->err_hook = NULL;
547
  s->fd = -1;
548
  s->rbuf_alloc = s->tbuf_alloc = NULL;
549
  s->password = NULL;
550
  return s;
551
}
552

    
553
static void
554
sk_insert(sock *s)
555
{
556
  add_tail(&sock_list, &s->n);
557
  sock_recalc_fdsets_p = 1;
558
}
559

    
560
#ifdef IPV6
561

    
562
void
563
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
564
{
565
  memset (sa, 0, sizeof (struct sockaddr_in6));
566
  sa->sin6_family = AF_INET6;
567
  sa->sin6_port = htons(port);
568
  sa->sin6_flowinfo = 0;
569
#ifdef HAVE_SIN_LEN
570
  sa->sin6_len = sizeof(struct sockaddr_in6);
571
#endif
572
  set_inaddr(&sa->sin6_addr, a);
573
}
574

    
575
void
576
get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check)
577
{
578
  if (check && sa->sin6_family != AF_INET6)
579
    bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
580
  if (port)
581
    *port = ntohs(sa->sin6_port);
582
  memcpy(a, &sa->sin6_addr, sizeof(*a));
583
  ipa_ntoh(*a);
584
}
585

    
586
#else
587

    
588
void
589
fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
590
{
591
  memset (sa, 0, sizeof (struct sockaddr_in));
592
  sa->sin_family = AF_INET;
593
  sa->sin_port = htons(port);
594
#ifdef HAVE_SIN_LEN
595
  sa->sin_len = sizeof(struct sockaddr_in);
596
#endif
597
  set_inaddr(&sa->sin_addr, a);
598
}
599

    
600
void
601
get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check)
602
{
603
  if (check && sa->sin_family != AF_INET)
604
    bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
605
  if (port)
606
    *port = ntohs(sa->sin_port);
607
  memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
608
  ipa_ntoh(*a);
609
}
610

    
611
#endif
612

    
613
static char *
614
sk_set_ttl_int(sock *s)
615
{
616
  int one = 1;
617
#ifdef IPV6
618
  if (s->type != SK_UDP_MC && s->type != SK_IP_MC &&
619
      setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
620
    return "IPV6_UNICAST_HOPS";
621
#else
622
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
623
    return "IP_TTL";
624
#ifdef CONFIG_UNIX_DONTROUTE
625
  if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
626
    return "SO_DONTROUTE";
627
#endif 
628
#endif
629
  return NULL;
630
}
631

    
632
#define ERR(x) do { err = x; goto bad; } while(0)
633
#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
634

    
635
static char *
636
sk_setup(sock *s)
637
{
638
  int fd = s->fd;
639
  char *err;
640

    
641
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
642
    ERR("fcntl(O_NONBLOCK)");
643
  if (s->type == SK_UNIX)
644
    return NULL;
645
#ifndef IPV6
646
  if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
647
    WARN("IP_TOS");
648
#endif
649
  
650
  if (s->ttl >= 0)
651
    err = sk_set_ttl_int(s);
652
  else
653
    err = NULL;
654

    
655
bad:
656
  return err;
657
}
658

    
659
/**
660
 * sk_set_ttl - set TTL for given socket.
661
 * @s: socket
662
 * @ttl: TTL value
663
 *
664
 * Set TTL for already opened connections when TTL was not set before.
665
 * Useful for accepted connections when different ones should have 
666
 * different TTL.
667
 *
668
 * Result: 0 for success, -1 for an error.
669
 */
670

    
671
int
672
sk_set_ttl(sock *s, int ttl)
673
{
674
  char *err;
675

    
676
  s->ttl = ttl;
677
  if (err = sk_set_ttl_int(s))
678
    log(L_ERR "sk_set_ttl: %s: %m", err);
679

    
680
  return (err ? -1 : 0);
681
}
682

    
683

    
684
/* FIXME: check portability  */
685

    
686
static int
687
sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd)
688
{
689
  struct tcp_md5sig md5;
690

    
691
  memset(&md5, 0, sizeof(md5));
692
  memcpy(&md5.tcpm_addr, (struct sockaddr *) sa, sizeof(*sa));
693

    
694
  if (passwd)
695
    {
696
      int len = strlen(passwd);
697

    
698
      if (len > TCP_MD5SIG_MAXKEYLEN)
699
        {
700
          log(L_ERR "MD5 password too long");
701
          return -1;
702
        }
703

    
704
      md5.tcpm_keylen = len;
705
      memcpy(&md5.tcpm_key, passwd, len);
706
    }
707

    
708
  int rv = setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &md5, sizeof(md5));
709

    
710
  if (rv < 0) 
711
    {
712
      if (errno == ENOPROTOOPT)
713
        log(L_ERR "Kernel does not support TCP MD5 signatures");
714
      else
715
        log(L_ERR "sk_set_md5_auth_int: setsockopt: %m");
716
    }
717

    
718
  return rv;
719
}
720

    
721
/**
722
 * sk_set_md5_auth - add / remove MD5 security association for given socket.
723
 * @s: socket
724
 * @a: IP address of the other side
725
 * @passwd: password used for MD5 authentication
726
 *
727
 * In TCP MD5 handling code in kernel, there is a set of pairs
728
 * (address, password) used to choose password according to
729
 * address of the other side. This function is useful for
730
 * listening socket, for active sockets it is enough to set
731
 * s->password field.
732
 *
733
 * When called with passwd != NULL, the new pair is added,
734
 * When called with passwd == NULL, the existing pair is removed.
735
 *
736
 * Result: 0 for success, -1 for an error.
737
 */
738

    
739
int
740
sk_set_md5_auth(sock *s, ip_addr a, char *passwd)
741
{
742
  sockaddr sa;
743
  fill_in_sockaddr(&sa, a, 0);
744
  return sk_set_md5_auth_int(s, &sa, passwd);
745
}
746

    
747

    
748
static void
749
sk_tcp_connected(sock *s)
750
{
751
  s->type = SK_TCP;
752
  sk_alloc_bufs(s);
753
  s->tx_hook(s);
754
}
755

    
756
static int
757
sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
758
{
759
  int fd = accept(s->fd, sa, &al);
760
  if (fd >= 0)
761
    {
762
      sock *t = sk_new(s->pool);
763
      char *err;
764
      t->type = type;
765
      t->fd = fd;
766
      t->ttl = s->ttl;
767
      t->tos = s->tos;
768
      t->rbsize = s->rbsize;
769
      t->tbsize = s->tbsize;
770
      if (type == SK_TCP)
771
        get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1);
772
      sk_insert(t);
773
      if (err = sk_setup(t))
774
        {
775
          log(L_ERR "Incoming connection: %s: %m", err);
776
          rfree(t);
777
          return 1;
778
        }
779
      sk_alloc_bufs(t);
780
      s->rx_hook(t, 0);
781
      return 1;
782
    }
783
  else if (errno != EINTR && errno != EAGAIN)
784
    {
785
      log(L_ERR "accept: %m");
786
      s->err_hook(s, errno);
787
    }
788
  return 0;
789
}
790

    
791
/**
792
 * sk_open - open a socket
793
 * @s: socket
794
 *
795
 * This function takes a socket resource created by sk_new() and
796
 * initialized by the user and binds a corresponding network connection
797
 * to it.
798
 *
799
 * Result: 0 for success, -1 for an error.
800
 */
801
int
802
sk_open(sock *s)
803
{
804
  int fd;
805
  sockaddr sa;
806
  int one = 1;
807
  int type = s->type;
808
  int has_src = ipa_nonzero(s->saddr) || s->sport;
809
  char *err;
810

    
811
  switch (type)
812
    {
813
    case SK_TCP_ACTIVE:
814
      s->ttx = "";                        /* Force s->ttx != s->tpos */
815
      /* Fall thru */
816
    case SK_TCP_PASSIVE:
817
      fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
818
      break;
819
    case SK_UDP:
820
    case SK_UDP_MC:
821
      fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
822
      break;
823
    case SK_IP:
824
    case SK_IP_MC:
825
      fd = socket(BIRD_PF, SOCK_RAW, s->dport);
826
      break;
827
    case SK_MAGIC:
828
      fd = s->fd;
829
      break;
830
    default:
831
      bug("sk_open() called for invalid sock type %d", type);
832
    }
833
  if (fd < 0)
834
    die("sk_open: socket: %m");
835
  s->fd = fd;
836

    
837
  if (err = sk_setup(s))
838
    goto bad;
839

    
840
  switch (type)
841
    {
842
    case SK_UDP:
843
    case SK_IP:
844
      if (s->iface)                        /* It's a broadcast socket */
845
#ifdef IPV6
846
        bug("IPv6 has no broadcasts");
847
#else
848
        if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0)
849
          ERR("SO_BROADCAST");
850
#endif
851
      break;
852
    case SK_UDP_MC:
853
    case SK_IP_MC:
854
      {
855
#ifdef IPV6
856
        /* Fortunately, IPv6 socket interface is recent enough and therefore standardized */
857
        ASSERT(s->iface && s->iface->addr);
858
        if (ipa_nonzero(s->daddr))
859
          {
860
            int t = s->iface->index;
861
            int zero = 0;
862
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
863
              ERR("IPV6_MULTICAST_HOPS");
864
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
865
              ERR("IPV6_MULTICAST_LOOP");
866
            if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_IF, &t, sizeof(t)) < 0)
867
              ERR("IPV6_MULTICAST_IF");
868
          }
869
        if (has_src)
870
          {
871
            struct ipv6_mreq mreq;
872
            set_inaddr(&mreq.ipv6mr_multiaddr, s->daddr);
873
#ifdef CONFIG_IPV6_GLIBC_20
874
            mreq.ipv6mr_ifindex = s->iface->index;
875
#else
876
            mreq.ipv6mr_interface = s->iface->index;
877
#endif /* CONFIG_IPV6_GLIBC_20 */
878
            if (setsockopt(fd, SOL_IPV6, IPV6_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
879
              ERR("IPV6_ADD_MEMBERSHIP");
880
          }
881
#else
882
        /* With IPv4 there are zillions of different socket interface variants. Ugh. */
883
        ASSERT(s->iface && s->iface->addr);
884
        if (err = sysio_mcast_join(s))
885
          goto bad;
886
#endif /* IPV6 */
887
      break;
888
      }
889
    }
890
  if (has_src)
891
    {
892
      int port;
893

    
894
      if (type == SK_IP || type == SK_IP_MC)
895
        port = 0;
896
      else
897
        {
898
          port = s->sport;
899
          if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
900
            ERR("SO_REUSEADDR");
901
        }
902
      fill_in_sockaddr(&sa, s->saddr, port);
903
#ifdef CONFIG_SKIP_MC_BIND
904
      if (type == SK_IP && bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
905
#else
906
      if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
907
#endif
908
        ERR("bind");
909
    }
910
  fill_in_sockaddr(&sa, s->daddr, s->dport);
911

    
912
  if (s->password)
913
    {
914
      int rv = sk_set_md5_auth_int(s, &sa, s->password);
915
      if (rv < 0)
916
        goto bad_no_log;
917
    }
918

    
919
  switch (type)
920
    {
921
    case SK_TCP_ACTIVE:
922
      if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
923
        sk_tcp_connected(s);
924
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
925
               errno != ECONNREFUSED && errno != EHOSTUNREACH)
926
        ERR("connect");
927
      break;
928
    case SK_TCP_PASSIVE:
929
      if (listen(fd, 8))
930
        ERR("listen");
931
      break;
932
    case SK_MAGIC:
933
      break;
934
    default:
935
      sk_alloc_bufs(s);
936
#ifdef IPV6
937
#ifdef IPV6_MTU_DISCOVER
938
      {
939
        int dont = IPV6_PMTUDISC_DONT;
940
        if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
941
          ERR("IPV6_MTU_DISCOVER");
942
      }
943
#endif
944
#else
945
#ifdef IP_PMTUDISC
946
      {
947
        int dont = IP_PMTUDISC_DONT;
948
        if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
949
          ERR("IP_PMTUDISC");
950
      }
951
#endif
952
#endif
953
    }
954

    
955
  sk_insert(s);
956
  return 0;
957

    
958
bad:
959
  log(L_ERR "sk_open: %s: %m", err);
960
bad_no_log:
961
  close(fd);
962
  s->fd = -1;
963
  return -1;
964
}
965

    
966
int
967
sk_open_unix(sock *s, char *name)
968
{
969
  int fd;
970
  struct sockaddr_un sa;
971
  char *err;
972

    
973
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
974
  if (fd < 0)
975
    die("sk_open_unix: socket: %m");
976
  s->fd = fd;
977
  if (err = sk_setup(s))
978
    goto bad;
979
  unlink(name);
980
 
981
  if (strlen(name) >= sizeof(sa.sun_path))
982
    die("sk_open_unix: path too long");
983

    
984
  sa.sun_family = AF_UNIX;
985
  strcpy(sa.sun_path, name);
986
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
987
    ERR("bind");
988
  if (listen(fd, 8))
989
    ERR("listen");
990
  sk_insert(s);
991
  return 0;
992

    
993
bad:
994
  log(L_ERR "sk_open_unix: %s: %m", err);
995
  close(fd);
996
  s->fd = -1;
997
  return -1;
998
}
999

    
1000
static int
1001
sk_maybe_write(sock *s)
1002
{
1003
  int e;
1004

    
1005
  switch (s->type)
1006
    {
1007
    case SK_TCP:
1008
    case SK_MAGIC:
1009
    case SK_UNIX:
1010
      while (s->ttx != s->tpos)
1011
        {
1012
          e = write(s->fd, s->ttx, s->tpos - s->ttx);
1013
          if (e < 0)
1014
            {
1015
              if (errno != EINTR && errno != EAGAIN)
1016
                {
1017
                  s->ttx = s->tpos;        /* empty tx buffer */
1018
                  s->err_hook(s, errno);
1019
                  return -1;
1020
                }
1021
              return 0;
1022
            }
1023
          s->ttx += e;
1024
        }
1025
      s->ttx = s->tpos = s->tbuf;
1026
      return 1;
1027
    case SK_UDP:
1028
    case SK_UDP_MC:
1029
    case SK_IP:
1030
    case SK_IP_MC:
1031
      {
1032
        sockaddr sa;
1033

    
1034
        if (s->tbuf == s->tpos)
1035
          return 1;
1036
        fill_in_sockaddr(&sa, s->faddr, s->fport);
1037

    
1038
        e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa));
1039
        if (e < 0)
1040
          {
1041
            if (errno != EINTR && errno != EAGAIN)
1042
              {
1043
                s->ttx = s->tpos;        /* empty tx buffer */
1044
                s->err_hook(s, errno);
1045
                return -1;
1046
              }
1047
            return 0;
1048
          }
1049
        s->tpos = s->tbuf;
1050
        return 1;
1051
      }
1052
    default:
1053
      bug("sk_maybe_write: unknown socket type %d", s->type);
1054
    }
1055
}
1056

    
1057
/**
1058
 * sk_send - send data to a socket
1059
 * @s: socket
1060
 * @len: number of bytes to send
1061
 *
1062
 * This function sends @len bytes of data prepared in the
1063
 * transmit buffer of the socket @s to the network connection.
1064
 * If the packet can be sent immediately, it does so and returns
1065
 * 1, else it queues the packet for later processing, returns 0
1066
 * and calls the @tx_hook of the socket when the tranmission
1067
 * takes place.
1068
 */
1069
int
1070
sk_send(sock *s, unsigned len)
1071
{
1072
  s->faddr = s->daddr;
1073
  s->fport = s->dport;
1074
  s->ttx = s->tbuf;
1075
  s->tpos = s->tbuf + len;
1076
  return sk_maybe_write(s);
1077
}
1078

    
1079
/**
1080
 * sk_send_to - send data to a specific destination
1081
 * @s: socket
1082
 * @len: number of bytes to send
1083
 * @addr: IP address to send the packet to
1084
 * @port: port to send the packet to
1085
 *
1086
 * This is a sk_send() replacement for connection-less packet sockets
1087
 * which allows destination of the packet to be chosen dynamically.
1088
 */
1089
int
1090
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1091
{
1092
  s->faddr = addr;
1093
  s->fport = port;
1094
  s->ttx = s->tbuf;
1095
  s->tpos = s->tbuf + len;
1096
  return sk_maybe_write(s);
1097
}
1098

    
1099
static int
1100
sk_read(sock *s)
1101
{
1102
  switch (s->type)
1103
    {
1104
    case SK_TCP_PASSIVE:
1105
      {
1106
        sockaddr sa;
1107
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1108
      }
1109
    case SK_UNIX_PASSIVE:
1110
      {
1111
        struct sockaddr_un sa;
1112
        return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
1113
      }
1114
    case SK_TCP:
1115
    case SK_UNIX:
1116
      {
1117
        int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1118

    
1119
        if (c < 0)
1120
          {
1121
            if (errno != EINTR && errno != EAGAIN)
1122
              s->err_hook(s, errno);
1123
          }
1124
        else if (!c)
1125
          s->err_hook(s, 0);
1126
        else
1127
          {
1128
            s->rpos += c;
1129
            if (s->rx_hook(s, s->rpos - s->rbuf))
1130
              {
1131
                /* We need to be careful since the socket could have been deleted by the hook */
1132
                if (current_sock == s)
1133
                  s->rpos = s->rbuf;
1134
              }
1135
            return 1;
1136
          }
1137
        return 0;
1138
      }
1139
    case SK_MAGIC:
1140
      return s->rx_hook(s, 0);
1141
    default:
1142
      {
1143
        sockaddr sa;
1144
        int al = sizeof(sa);
1145
        int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al);
1146

    
1147
        if (e < 0)
1148
          {
1149
            if (errno != EINTR && errno != EAGAIN)
1150
              s->err_hook(s, errno);
1151
            return 0;
1152
          }
1153
        s->rpos = s->rbuf + e;
1154
        get_sockaddr(&sa, &s->faddr, &s->fport, 1);
1155
        s->rx_hook(s, e);
1156
        return 1;
1157
      }
1158
    }
1159
}
1160

    
1161
static int
1162
sk_write(sock *s)
1163
{
1164
  switch (s->type)
1165
    {
1166
    case SK_TCP_ACTIVE:
1167
      {
1168
        sockaddr sa;
1169
        fill_in_sockaddr(&sa, s->daddr, s->dport);
1170
        if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
1171
          sk_tcp_connected(s);
1172
        else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1173
          s->err_hook(s, errno);
1174
        return 0;
1175
      }
1176
    default:
1177
      if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1178
        {
1179
          s->tx_hook(s);
1180
          return 1;
1181
        }
1182
      return 0;
1183
    }
1184
}
1185

    
1186
void
1187
sk_dump_all(void)
1188
{
1189
  node *n;
1190
  sock *s;
1191

    
1192
  debug("Open sockets:\n");
1193
  WALK_LIST(n, sock_list)
1194
    {
1195
      s = SKIP_BACK(sock, n, n);
1196
      debug("%p ", s);
1197
      sk_dump(&s->r);
1198
    }
1199
  debug("\n");
1200
}
1201

    
1202
#undef ERR
1203
#undef WARN
1204

    
1205
/*
1206
 *        Main I/O Loop
1207
 */
1208

    
1209
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
1210
volatile int async_dump_flag;
1211

    
1212
void
1213
io_init(void)
1214
{
1215
  init_list(&near_timers);
1216
  init_list(&far_timers);
1217
  init_list(&sock_list);
1218
  init_list(&global_event_list);
1219
  krt_io_init();
1220
  now = time(NULL);
1221
  srandom((int) now);
1222
}
1223

    
1224
void
1225
io_loop(void)
1226
{
1227
  fd_set rd, wr;
1228
  struct timeval timo;
1229
  time_t tout;
1230
  int hi, events;
1231
  sock *s;
1232
  node *n;
1233

    
1234
  sock_recalc_fdsets_p = 1;
1235
  for(;;)
1236
    {
1237
      events = ev_run_list(&global_event_list);
1238
      now = time(NULL);
1239
      tout = tm_first_shot();
1240
      if (tout <= now)
1241
        {
1242
          tm_shot();
1243
          continue;
1244
        }
1245
      timo.tv_sec = events ? 0 : tout - now;
1246
      timo.tv_usec = 0;
1247

    
1248
      if (sock_recalc_fdsets_p)
1249
        {
1250
          sock_recalc_fdsets_p = 0;
1251
          FD_ZERO(&rd);
1252
          FD_ZERO(&wr);
1253
        }
1254

    
1255
      hi = 0;
1256
      WALK_LIST(n, sock_list)
1257
        {
1258
          s = SKIP_BACK(sock, n, n);
1259
          if (s->rx_hook)
1260
            {
1261
              FD_SET(s->fd, &rd);
1262
              if (s->fd > hi)
1263
                hi = s->fd;
1264
            }
1265
          else
1266
            FD_CLR(s->fd, &rd);
1267
          if (s->tx_hook && s->ttx != s->tpos)
1268
            {
1269
              FD_SET(s->fd, &wr);
1270
              if (s->fd > hi)
1271
                hi = s->fd;
1272
            }
1273
          else
1274
            FD_CLR(s->fd, &wr);
1275
        }
1276

    
1277
      /*
1278
       * Yes, this is racy. But even if the signal comes before this test
1279
       * and entering select(), it gets caught on the next timer tick.
1280
       */
1281

    
1282
      if (async_config_flag)
1283
        {
1284
          async_config();
1285
          async_config_flag = 0;
1286
          continue;
1287
        }
1288
      if (async_dump_flag)
1289
        {
1290
          async_dump();
1291
          async_dump_flag = 0;
1292
          continue;
1293
        }
1294
      if (async_shutdown_flag)
1295
        {
1296
          async_shutdown();
1297
          async_shutdown_flag = 0;
1298
          continue;
1299
        }
1300

    
1301
      /* And finally enter select() to find active sockets */
1302

    
1303
      hi = select(hi+1, &rd, &wr, NULL, &timo);
1304
      if (hi < 0)
1305
        {
1306
          if (errno == EINTR || errno == EAGAIN)
1307
            continue;
1308
          die("select: %m");
1309
        }
1310
      if (hi)
1311
        {
1312
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));        /* guaranteed to be non-empty */
1313
          while (current_sock)
1314
            {
1315
              sock *s = current_sock;
1316
              int e;
1317
              if (FD_ISSET(s->fd, &rd))
1318
                do
1319
                  {
1320
                    e = sk_read(s);
1321
                    if (s != current_sock)
1322
                      goto next;
1323
                  }
1324
                while (e);
1325
              if (FD_ISSET(s->fd, &wr))
1326
                do
1327
                  {
1328
                    e = sk_write(s);
1329
                    if (s != current_sock)
1330
                      goto next;
1331
                  }
1332
                while (e);
1333
              current_sock = sk_next(s);
1334
            next: ;
1335
            }
1336
        }
1337
    }
1338
}
1339

    
1340
void
1341
test_old_bird(char *path)
1342
{
1343
  int fd;
1344
  struct sockaddr_un sa;
1345

    
1346
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1347

    
1348
  if (fd < 0)
1349
    die("Cannot create socket: %m");
1350
  bzero(&sa, sizeof(sa));
1351
  sa.sun_family = AF_UNIX;
1352
  strcpy(sa.sun_path, path);
1353
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1354
    die("I found another BIRD running.");
1355
  close(fd);
1356
}
1357

    
1358