Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 7152e5ef

History | View | Annotate | Download (45.5 KB)

1 b5d9ee5c Martin Mares
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4 38a608c5 Martin Mares
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 b1a1faba Ondrej Filip
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6 b5d9ee5c Martin Mares
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9
10 607d9914 Ondrej Zajicek
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11
   if _GNU_SOURCE is not defined. */
12
#define _GNU_SOURCE 1
13
14 b5d9ee5c Martin Mares
#include <stdio.h>
15
#include <stdlib.h>
16 01b776e1 Martin Mares
#include <time.h>
17 b5d9ee5c Martin Mares
#include <sys/time.h>
18
#include <sys/types.h>
19
#include <sys/socket.h>
20 46a82e9c Ondrej Zajicek
#include <sys/uio.h>
21 b93abffa Martin Mares
#include <sys/un.h>
22 b5d9ee5c Martin Mares
#include <unistd.h>
23 a0b176e3 Ondrej Zajicek
#include <fcntl.h>
24 b5d9ee5c Martin Mares
#include <errno.h>
25 05476c4d Ondrej Zajicek
#include <net/if.h>
26 d0e9b36d Ondrej Zajicek
#include <netinet/in.h>
27 48e5f32d Ondrej Zajicek
#include <netinet/tcp.h>
28
#include <netinet/udp.h>
29 93e868c7 Ondrej Zajicek
#include <netinet/icmp6.h>
30 b5d9ee5c Martin Mares
31
#include "nest/bird.h"
32
#include "lib/lists.h"
33
#include "lib/resource.h"
34 7152e5ef Jan Moskyto Matejka
#include "sysdep/unix/timer.h"
35 b5d9ee5c Martin Mares
#include "lib/socket.h"
36 e8f73195 Martin Mares
#include "lib/event.h"
37 afa8937a Martin Mares
#include "lib/string.h"
38 b5d9ee5c Martin Mares
#include "nest/iface.h"
39
40 7152e5ef Jan Moskyto Matejka
#include "sysdep/unix/unix.h"
41
#include CONFIG_INCLUDE_SYSIO_H
42 b5d9ee5c Martin Mares
43 ea89da38 Ondrej Zajicek
/* Maximum number of calls of tx handler for one socket in one
44 4323099d Ondrej Zajicek
 * select iteration. Should be small enough to not monopolize CPU by
45
 * one protocol instance.
46
 */
47
#define MAX_STEPS 4
48
49 ea89da38 Ondrej Zajicek
/* Maximum number of calls of rx handler for all sockets in one select
50
   iteration. RX callbacks are often much more costly so we limit
51
   this to gen small latencies */
52
#define MAX_RX_STEPS 4
53
54 b5d9ee5c Martin Mares
/*
55 a9c986f9 Martin Mares
 *        Tracked Files
56
 */
57
58
struct rfile {
59
  resource r;
60
  FILE *f;
61
};
62
63
static void
64
rf_free(resource *r)
65
{
66
  struct rfile *a = (struct rfile *) r;
67
68
  fclose(a->f);
69
}
70
71
static void
72
rf_dump(resource *r)
73
{
74
  struct rfile *a = (struct rfile *) r;
75
76
  debug("(FILE *%p)\n", a->f);
77
}
78
79
static struct resclass rf_class = {
80
  "FILE",
81
  sizeof(struct rfile),
82
  rf_free,
83 e81b440f Ondrej Zajicek
  rf_dump,
84 acb60628 Ondrej Zajicek
  NULL,
85 e81b440f Ondrej Zajicek
  NULL
86 a9c986f9 Martin Mares
};
87
88
void *
89 f78056fb Martin Mares
tracked_fopen(pool *p, char *name, char *mode)
90 a9c986f9 Martin Mares
{
91
  FILE *f = fopen(name, mode);
92
93
  if (f)
94
    {
95
      struct rfile *r = ralloc(p, &rf_class);
96
      r->f = f;
97
    }
98
  return f;
99
}
100
101 525fa2c1 Martin Mares
/**
102
 * DOC: Timers
103
 *
104
 * Timers are resources which represent a wish of a module to call
105
 * a function at the specified time. The platform dependent code
106 58f7d004 Martin Mares
 * doesn't guarantee exact timing, only that a timer function
107 525fa2c1 Martin Mares
 * won't be called before the requested time.
108
 *
109 fd91ae33 Ondrej Zajicek
 * In BIRD, time is represented by values of the &bird_clock_t type
110
 * which are integral numbers interpreted as a relative number of seconds since
111
 * some fixed time point in past. The current time can be read
112
 * from variable @now with reasonable accuracy and is monotonic. There is also
113
 * a current 'absolute' time in variable @now_real reported by OS.
114 525fa2c1 Martin Mares
 *
115
 * Each timer is described by a &timer structure containing a pointer
116
 * to the handler function (@hook), data private to this function (@data),
117
 * time the function should be called at (@expires, 0 for inactive timers),
118
 * for the other fields see |timer.h|.
119 b5d9ee5c Martin Mares
 */
120
121
#define NEAR_TIMER_LIMIT 4
122
123
static list near_timers, far_timers;
124
static bird_clock_t first_far_timer = TIME_INFINITY;
125
126 002b6423 Ondrej Zajicek
/* now must be different from 0, because 0 is a special value in timer->expires */
127 a92cf57d Ondrej Zajicek
bird_clock_t now = 1, now_real, boot_time;
128 fd91ae33 Ondrej Zajicek
129
static void
130
update_times_plain(void)
131
{
132
  bird_clock_t new_time = time(NULL);
133
  int delta = new_time - now_real;
134
135
  if ((delta >= 0) && (delta < 60))
136
    now += delta;
137
  else if (now_real != 0)
138
   log(L_WARN "Time jump, delta %d s", delta);
139
140
  now_real = new_time;
141
}
142
143
static void
144
update_times_gettime(void)
145
{
146
  struct timespec ts;
147
  int rv;
148
149
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
150
  if (rv != 0)
151
    die("clock_gettime: %m");
152
153
  if (ts.tv_sec != now) {
154
    if (ts.tv_sec < now)
155
      log(L_ERR "Monotonic timer is broken");
156
157
    now = ts.tv_sec;
158
    now_real = time(NULL);
159
  }
160
}
161
162
static int clock_monotonic_available;
163
164
static inline void
165
update_times(void)
166
{
167
  if (clock_monotonic_available)
168
    update_times_gettime();
169
  else
170
    update_times_plain();
171
}
172
173
static inline void
174
init_times(void)
175
{
176
 struct timespec ts;
177
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
178
 if (!clock_monotonic_available)
179
   log(L_WARN "Monotonic timer is missing");
180
}
181
182 b5d9ee5c Martin Mares
183
static void
184
tm_free(resource *r)
185
{
186
  timer *t = (timer *) r;
187
188
  tm_stop(t);
189
}
190
191
static void
192
tm_dump(resource *r)
193
{
194
  timer *t = (timer *) r;
195
196 e8f73195 Martin Mares
  debug("(code %p, data %p, ", t->hook, t->data);
197 af847acc Martin Mares
  if (t->randomize)
198
    debug("rand %d, ", t->randomize);
199
  if (t->recurrent)
200
    debug("recur %d, ", t->recurrent);
201 b5d9ee5c Martin Mares
  if (t->expires)
202
    debug("expires in %d sec)\n", t->expires - now);
203
  else
204
    debug("inactive)\n");
205
}
206
207
static struct resclass tm_class = {
208
  "Timer",
209
  sizeof(timer),
210
  tm_free,
211 e81b440f Ondrej Zajicek
  tm_dump,
212 acb60628 Ondrej Zajicek
  NULL,
213 e81b440f Ondrej Zajicek
  NULL
214 b5d9ee5c Martin Mares
};
215
216 525fa2c1 Martin Mares
/**
217
 * tm_new - create a timer
218
 * @p: pool
219
 *
220
 * This function creates a new timer resource and returns
221
 * a pointer to it. To use the timer, you need to fill in
222
 * the structure fields and call tm_start() to start timing.
223
 */
224 b5d9ee5c Martin Mares
timer *
225
tm_new(pool *p)
226
{
227
  timer *t = ralloc(p, &tm_class);
228
  return t;
229
}
230
231
static inline void
232
tm_insert_near(timer *t)
233
{
234
  node *n = HEAD(near_timers);
235
236
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
237
    n = n->next;
238
  insert_node(&t->n, n->prev);
239
}
240
241 525fa2c1 Martin Mares
/**
242
 * tm_start - start a timer
243
 * @t: timer
244
 * @after: number of seconds the timer should be run after
245
 *
246
 * This function schedules the hook function of the timer to
247
 * be called after @after seconds. If the timer has been already
248
 * started, it's @expire time is replaced by the new value.
249
 *
250
 * You can have set the @randomize field of @t, the timeout
251
 * will be increased by a random number of seconds chosen
252
 * uniformly from range 0 .. @randomize.
253
 *
254
 * You can call tm_start() from the handler function of the timer
255
 * to request another run of the timer. Also, you can set the @recurrent
256
 * field to have the timer re-added automatically with the same timeout.
257
 */
258 b5d9ee5c Martin Mares
void
259
tm_start(timer *t, unsigned after)
260
{
261
  bird_clock_t when;
262
263
  if (t->randomize)
264 af847acc Martin Mares
    after += random() % (t->randomize + 1);
265 b5d9ee5c Martin Mares
  when = now + after;
266
  if (t->expires == when)
267
    return;
268
  if (t->expires)
269
    rem_node(&t->n);
270
  t->expires = when;
271
  if (after <= NEAR_TIMER_LIMIT)
272
    tm_insert_near(t);
273
  else
274
    {
275
      if (!first_far_timer || first_far_timer > when)
276
        first_far_timer = when;
277
      add_tail(&far_timers, &t->n);
278
    }
279
}
280
281 525fa2c1 Martin Mares
/**
282
 * tm_stop - stop a timer
283
 * @t: timer
284
 *
285
 * This function stops a timer. If the timer is already stopped,
286
 * nothing happens.
287
 */
288 b5d9ee5c Martin Mares
void
289
tm_stop(timer *t)
290
{
291
  if (t->expires)
292
    {
293
      rem_node(&t->n);
294
      t->expires = 0;
295
    }
296
}
297
298
static void
299
tm_dump_them(char *name, list *l)
300
{
301
  node *n;
302
  timer *t;
303
304
  debug("%s timers:\n", name);
305
  WALK_LIST(n, *l)
306
    {
307
      t = SKIP_BACK(timer, n, n);
308
      debug("%p ", t);
309
      tm_dump(&t->r);
310
    }
311
  debug("\n");
312
}
313
314
void
315
tm_dump_all(void)
316
{
317
  tm_dump_them("Near", &near_timers);
318
  tm_dump_them("Far", &far_timers);
319
}
320
321
static inline time_t
322
tm_first_shot(void)
323
{
324
  time_t x = first_far_timer;
325
326
  if (!EMPTY_LIST(near_timers))
327
    {
328
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
329
      if (t->expires < x)
330
        x = t->expires;
331
    }
332
  return x;
333
}
334
335 8bcb5fb1 Ondrej Zajicek
void io_log_event(void *hook, void *data);
336
337 b5d9ee5c Martin Mares
static void
338
tm_shot(void)
339
{
340
  timer *t;
341
  node *n, *m;
342
343
  if (first_far_timer <= now)
344
    {
345 28a9a189 Martin Mares
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
346 b5d9ee5c Martin Mares
      first_far_timer = TIME_INFINITY;
347
      n = HEAD(far_timers);
348
      while (m = n->next)
349
        {
350
          t = SKIP_BACK(timer, n, n);
351
          if (t->expires <= limit)
352
            {
353
              rem_node(n);
354
              tm_insert_near(t);
355
            }
356
          else if (t->expires < first_far_timer)
357
            first_far_timer = t->expires;
358
          n = m;
359
        }
360
    }
361
  while ((n = HEAD(near_timers)) -> next)
362
    {
363 af847acc Martin Mares
      int delay;
364 b5d9ee5c Martin Mares
      t = SKIP_BACK(timer, n, n);
365
      if (t->expires > now)
366
        break;
367
      rem_node(n);
368 af847acc Martin Mares
      delay = t->expires - now;
369 b5d9ee5c Martin Mares
      t->expires = 0;
370 af847acc Martin Mares
      if (t->recurrent)
371
        {
372
          int i = t->recurrent - delay;
373
          if (i < 0)
374
            i = 0;
375
          tm_start(t, i);
376
        }
377 8bcb5fb1 Ondrej Zajicek
      io_log_event(t->hook, t->data);
378 b5d9ee5c Martin Mares
      t->hook(t);
379
    }
380
}
381
382 525fa2c1 Martin Mares
/**
383 0d3effcf Ondrej Filip
 * tm_parse_datetime - parse a date and time
384
 * @x: datetime string
385
 *
386
 * tm_parse_datetime() takes a textual representation of
387
 * a date and time (dd-mm-yyyy hh:mm:ss)
388
 * and converts it to the corresponding value of type &bird_clock_t.
389
 */
390
bird_clock_t
391
tm_parse_datetime(char *x)
392
{
393
  struct tm tm;
394
  int n;
395
  time_t t;
396
397
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
398
    return tm_parse_date(x);
399
  tm.tm_mon--;
400
  tm.tm_year -= 1900;
401
  t = mktime(&tm);
402
  if (t == (time_t) -1)
403
    return 0;
404
  return t;
405
}
406
/**
407 525fa2c1 Martin Mares
 * tm_parse_date - parse a date
408
 * @x: date string
409
 *
410
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
411
 * and converts it to the corresponding value of type &bird_clock_t.
412
 */
413 913f7dc9 Martin Mares
bird_clock_t
414
tm_parse_date(char *x)
415
{
416
  struct tm tm;
417
  int n;
418
  time_t t;
419
420
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
421
    return 0;
422
  tm.tm_mon--;
423
  tm.tm_year -= 1900;
424
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
425
  t = mktime(&tm);
426
  if (t == (time_t) -1)
427
    return 0;
428
  return t;
429
}
430
431 c37e7851 Ondrej Zajicek
static void
432
tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
433 913f7dc9 Martin Mares
{
434 c37e7851 Ondrej Zajicek
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
435
                                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
436 913f7dc9 Martin Mares
437 c37e7851 Ondrej Zajicek
  if (delta < 20*3600)
438
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
439
  else if (delta < 360*86400)
440
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
441
  else
442
    bsprintf(x, "%d", tm->tm_year+1900);
443 913f7dc9 Martin Mares
}
444
445 c37e7851 Ondrej Zajicek
#include "conf/conf.h"
446
447 525fa2c1 Martin Mares
/**
448
 * tm_format_datetime - convert date and time to textual representation
449
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
450
 * @t: time
451
 *
452 fd91ae33 Ondrej Zajicek
 * This function formats the given relative time value @t to a textual
453
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
454 525fa2c1 Martin Mares
 */
455 afa8937a Martin Mares
void
456 c37e7851 Ondrej Zajicek
tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
457 7a88832e Martin Mares
{
458 c37e7851 Ondrej Zajicek
  const char *fmt_used;
459 7a88832e Martin Mares
  struct tm *tm;
460 fd91ae33 Ondrej Zajicek
  bird_clock_t delta = now - t;
461
  t = now_real - delta;
462 7a88832e Martin Mares
  tm = localtime(&t);
463
464 c37e7851 Ondrej Zajicek
  if (fmt_spec->fmt1 == NULL)
465
    return tm_format_reltime(x, tm, delta);
466 afa8937a Martin Mares
467 c37e7851 Ondrej Zajicek
  if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
468
    fmt_used = fmt_spec->fmt1;
469 afa8937a Martin Mares
  else
470 c37e7851 Ondrej Zajicek
    fmt_used = fmt_spec->fmt2;
471
472
  int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
473
  if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
474
    strcpy(x, "<too-long>");
475 afa8937a Martin Mares
}
476
477 05476c4d Ondrej Zajicek
478 525fa2c1 Martin Mares
/**
479
 * DOC: Sockets
480
 *
481
 * Socket resources represent network connections. Their data structure (&socket)
482
 * contains a lot of fields defining the exact type of the socket, the local and
483
 * remote addresses and ports, pointers to socket buffers and finally pointers to
484
 * hook functions to be called when new data have arrived to the receive buffer
485
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
486
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
487
 *
488 38a608c5 Martin Mares
 * Freeing of sockets from inside socket hooks is perfectly safe.
489 b5d9ee5c Martin Mares
 */
490
491 abae6e9c Martin Mares
#ifndef SOL_IP
492
#define SOL_IP IPPROTO_IP
493
#endif
494
495 b1a1faba Ondrej Filip
#ifndef SOL_IPV6
496
#define SOL_IPV6 IPPROTO_IPV6
497
#endif
498
499 48e5f32d Ondrej Zajicek
#ifndef SOL_ICMPV6
500
#define SOL_ICMPV6 IPPROTO_ICMPV6
501
#endif
502
503
504 05476c4d Ondrej Zajicek
/*
505
 *        Sockaddr helper functions
506
 */
507 38a608c5 Martin Mares
508 05476c4d Ondrej Zajicek
static inline int sockaddr_length(int af)
509
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
510
511
static inline void
512
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port)
513 38a608c5 Martin Mares
{
514 05476c4d Ondrej Zajicek
  memset(sa, 0, sizeof(struct sockaddr_in));
515
#ifdef HAVE_SIN_LEN
516
  sa->sin_len = sizeof(struct sockaddr_in);
517
#endif
518
  sa->sin_family = AF_INET;
519
  sa->sin_port = htons(port);
520
  sa->sin_addr = ipa_to_in4(a);
521 38a608c5 Martin Mares
}
522 b5d9ee5c Martin Mares
523 05476c4d Ondrej Zajicek
static inline void
524
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
525 b5d9ee5c Martin Mares
{
526 05476c4d Ondrej Zajicek
  memset(sa, 0, sizeof(struct sockaddr_in6));
527
#ifdef SIN6_LEN
528
  sa->sin6_len = sizeof(struct sockaddr_in6);
529
#endif
530
  sa->sin6_family = AF_INET6;
531
  sa->sin6_port = htons(port);
532
  sa->sin6_flowinfo = 0;
533
  sa->sin6_addr = ipa_to_in6(a);
534
535
  if (ifa && ipa_is_link_local(a))
536
    sa->sin6_scope_id = ifa->index;
537 4da25acb Martin Mares
}
538 b5d9ee5c Martin Mares
539 05476c4d Ondrej Zajicek
void
540
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
541 4da25acb Martin Mares
{
542 05476c4d Ondrej Zajicek
  if (af == AF_INET)
543
    sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
544
  else if (af == AF_INET6)
545
    sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
546
  else
547
    bug("Unknown AF");
548 4da25acb Martin Mares
}
549
550 05476c4d Ondrej Zajicek
static inline void
551
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port)
552 4da25acb Martin Mares
{
553 05476c4d Ondrej Zajicek
  *port = ntohs(sa->sin_port);
554
  *a = ipa_from_in4(sa->sin_addr);
555 b5d9ee5c Martin Mares
}
556
557 05476c4d Ondrej Zajicek
static inline void
558
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
559 48e5f32d Ondrej Zajicek
{
560 05476c4d Ondrej Zajicek
  *port = ntohs(sa->sin6_port);
561
  *a = ipa_from_in6(sa->sin6_addr);
562 48e5f32d Ondrej Zajicek
563 05476c4d Ondrej Zajicek
  if (ifa && ipa_is_link_local(*a))
564
    *ifa = if_find_by_index(sa->sin6_scope_id);
565 48e5f32d Ondrej Zajicek
}
566
567 05476c4d Ondrej Zajicek
int
568
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
569 48e5f32d Ondrej Zajicek
{
570 05476c4d Ondrej Zajicek
  if (sa->sa.sa_family != af)
571
    goto fail;
572 48e5f32d Ondrej Zajicek
573 05476c4d Ondrej Zajicek
  if (af == AF_INET)
574
    sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
575
  else if (af == AF_INET6)
576
    sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
577
  else
578
    goto fail;
579 48e5f32d Ondrej Zajicek
580 05476c4d Ondrej Zajicek
  return 0;
581 48e5f32d Ondrej Zajicek
582 05476c4d Ondrej Zajicek
 fail:
583
  *a = IPA_NONE;
584
  *port = 0;
585
  return -1;
586 48e5f32d Ondrej Zajicek
}
587
588 6ffa8f53 Jan Moskyto Matejka
const int fam_to_af[] = { [SK_FAM_IPV4] = AF_INET, [SK_FAM_IPV6] = AF_INET6 };
589 48e5f32d Ondrej Zajicek
590 05476c4d Ondrej Zajicek
/*
591
 *        IPv6 multicast syscalls
592
 */
593 4da25acb Martin Mares
594 05476c4d Ondrej Zajicek
/* Fortunately standardized in RFC 3493 */
595 b5d9ee5c Martin Mares
596 05476c4d Ondrej Zajicek
#define INIT_MREQ6(maddr,ifa) \
597
  { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
598 b5d9ee5c Martin Mares
599 05476c4d Ondrej Zajicek
static inline int
600
sk_setup_multicast6(sock *s)
601 b5d9ee5c Martin Mares
{
602 05476c4d Ondrej Zajicek
  int index = s->iface->index;
603
  int ttl = s->ttl;
604
  int n = 0;
605 b5d9ee5c Martin Mares
606 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
607
    ERR("IPV6_MULTICAST_IF");
608 b5d9ee5c Martin Mares
609 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
610
    ERR("IPV6_MULTICAST_HOPS");
611 4f22c981 Martin Mares
612 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
613
    ERR("IPV6_MULTICAST_LOOP");
614 4f22c981 Martin Mares
615 05476c4d Ondrej Zajicek
  return 0;
616 061ab802 Ondrej Zajicek
}
617
618 05476c4d Ondrej Zajicek
static inline int
619
sk_join_group6(sock *s, ip_addr maddr)
620 4f22c981 Martin Mares
{
621 05476c4d Ondrej Zajicek
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
622 eb1451a3 Ondrej Zajicek
623 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
624
    ERR("IPV6_JOIN_GROUP");
625 4f22c981 Martin Mares
626 05476c4d Ondrej Zajicek
  return 0;
627 b5d9ee5c Martin Mares
}
628
629 05476c4d Ondrej Zajicek
static inline int
630
sk_leave_group6(sock *s, ip_addr maddr)
631 b5d9ee5c Martin Mares
{
632 05476c4d Ondrej Zajicek
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
633 b5d9ee5c Martin Mares
634 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
635
    ERR("IPV6_LEAVE_GROUP");
636
637
  return 0;
638
}
639 4f22c981 Martin Mares
640 bed41728 Ondrej Zajicek
641 05476c4d Ondrej Zajicek
/*
642
 *        IPv6 packet control messages
643
 */
644 bed41728 Ondrej Zajicek
645 05476c4d Ondrej Zajicek
/* Also standardized, in RFC 3542 */
646 bed41728 Ondrej Zajicek
647 dcc60494 Ondrej Zajicek
/*
648
 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
649
 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
650
 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
651
 * RFC and we use IPV6_PKTINFO.
652
 */
653
#ifndef IPV6_RECVPKTINFO
654
#define IPV6_RECVPKTINFO IPV6_PKTINFO
655
#endif
656 70e212f9 Ondrej Zajicek
/*
657
 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
658
 */
659
#ifndef IPV6_RECVHOPLIMIT
660
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
661
#endif
662 dcc60494 Ondrej Zajicek
663 70e212f9 Ondrej Zajicek
664 05476c4d Ondrej Zajicek
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
665
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
666 bed41728 Ondrej Zajicek
667 05476c4d Ondrej Zajicek
static inline int
668
sk_request_cmsg6_pktinfo(sock *s)
669
{
670
  int y = 1;
671 70e212f9 Ondrej Zajicek
672 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
673
    ERR("IPV6_RECVPKTINFO");
674
675
  return 0;
676 bed41728 Ondrej Zajicek
}
677
678 05476c4d Ondrej Zajicek
static inline int
679
sk_request_cmsg6_ttl(sock *s)
680 bed41728 Ondrej Zajicek
{
681 05476c4d Ondrej Zajicek
  int y = 1;
682 bed41728 Ondrej Zajicek
683 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
684
    ERR("IPV6_RECVHOPLIMIT");
685 70e212f9 Ondrej Zajicek
686 05476c4d Ondrej Zajicek
  return 0;
687
}
688 70e212f9 Ondrej Zajicek
689 05476c4d Ondrej Zajicek
static inline void
690
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
691
{
692
  if (cm->cmsg_type == IPV6_PKTINFO)
693 70e212f9 Ondrej Zajicek
  {
694 05476c4d Ondrej Zajicek
    struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
695
    s->laddr = ipa_from_in6(pi->ipi6_addr);
696
    s->lifindex = pi->ipi6_ifindex;
697 70e212f9 Ondrej Zajicek
  }
698 05476c4d Ondrej Zajicek
}
699 70e212f9 Ondrej Zajicek
700 05476c4d Ondrej Zajicek
static inline void
701
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
702
{
703
  if (cm->cmsg_type == IPV6_HOPLIMIT)
704
    s->rcv_ttl = * (int *) CMSG_DATA(cm);
705 bed41728 Ondrej Zajicek
}
706
707 05476c4d Ondrej Zajicek
static inline void
708
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
709 bed41728 Ondrej Zajicek
{
710
  struct cmsghdr *cm;
711
  struct in6_pktinfo *pi;
712 8945f73d Ondrej Zajicek
  int controllen = 0;
713 bed41728 Ondrej Zajicek
714
  msg->msg_control = cbuf;
715
  msg->msg_controllen = cbuflen;
716
717
  cm = CMSG_FIRSTHDR(msg);
718 48e5f32d Ondrej Zajicek
  cm->cmsg_level = SOL_IPV6;
719 bed41728 Ondrej Zajicek
  cm->cmsg_type = IPV6_PKTINFO;
720
  cm->cmsg_len = CMSG_LEN(sizeof(*pi));
721 8945f73d Ondrej Zajicek
  controllen += CMSG_SPACE(sizeof(*pi));
722 bed41728 Ondrej Zajicek
723
  pi = (struct in6_pktinfo *) CMSG_DATA(cm);
724
  pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
725 05476c4d Ondrej Zajicek
  pi->ipi6_addr = ipa_to_in6(s->saddr);
726 bed41728 Ondrej Zajicek
727 8945f73d Ondrej Zajicek
  msg->msg_controllen = controllen;
728 bed41728 Ondrej Zajicek
}
729 48e5f32d Ondrej Zajicek
730 bed41728 Ondrej Zajicek
731 05476c4d Ondrej Zajicek
/*
732
 *        Miscellaneous socket syscalls
733
 */
734
735
static inline int
736
sk_set_ttl4(sock *s, int ttl)
737 a39b165e Ondrej Zajicek
{
738 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
739
    ERR("IP_TTL");
740
741
  return 0;
742 a39b165e Ondrej Zajicek
}
743
744 05476c4d Ondrej Zajicek
static inline int
745
sk_set_ttl6(sock *s, int ttl)
746
{
747
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
748
    ERR("IPV6_UNICAST_HOPS");
749 38a608c5 Martin Mares
750 05476c4d Ondrej Zajicek
  return 0;
751
}
752
753
static inline int
754
sk_set_tos4(sock *s, int tos)
755 b5d9ee5c Martin Mares
{
756 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
757
    ERR("IP_TOS");
758 b5d9ee5c Martin Mares
759 05476c4d Ondrej Zajicek
  return 0;
760
}
761 ef4a50be Ondrej Zajicek
762 05476c4d Ondrej Zajicek
static inline int
763
sk_set_tos6(sock *s, int tos)
764
{
765
  if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
766
    ERR("IPV6_TCLASS");
767 48e5f32d Ondrej Zajicek
768 05476c4d Ondrej Zajicek
  return 0;
769
}
770 48e5f32d Ondrej Zajicek
771 b867a87c Ondrej Zajicek
static inline int
772
sk_set_high_port(sock *s)
773
{
774
  /* Port range setting is optional, ignore it if not supported */
775
776
#ifdef IP_PORTRANGE
777
  if (sk_is_ipv4(s))
778
  {
779
    int range = IP_PORTRANGE_HIGH;
780
    if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
781
      ERR("IP_PORTRANGE");
782
  }
783
#endif
784
785
#ifdef IPV6_PORTRANGE
786
  if (sk_is_ipv6(s))
787
  {
788
    int range = IPV6_PORTRANGE_HIGH;
789
    if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
790
      ERR("IPV6_PORTRANGE");
791
  }
792
#endif
793
794
  return 0;
795
}
796
797 88a183c6 Ondrej Zajicek
static inline byte *
798
sk_skip_ip_header(byte *pkt, int *len)
799
{
800
  if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
801
    return NULL;
802
803
  int hlen = (*pkt & 0x0f) * 4;
804
  if ((hlen < 20) || (hlen > *len))
805
    return NULL;
806
807
  *len -= hlen;
808
  return pkt + hlen;
809
}
810
811
byte *
812
sk_rx_buffer(sock *s, int *len)
813
{
814
  if (sk_is_ipv4(s) && (s->type == SK_IP))
815
    return sk_skip_ip_header(s->rbuf, len);
816
  else
817
    return s->rbuf;
818
}
819
820 48e5f32d Ondrej Zajicek
821 05476c4d Ondrej Zajicek
/*
822
 *        Public socket functions
823
 */
824 48e5f32d Ondrej Zajicek
825 05476c4d Ondrej Zajicek
/**
826
 * sk_setup_multicast - enable multicast for given socket
827
 * @s: socket
828
 *
829
 * Prepare transmission of multicast packets for given datagram socket.
830
 * The socket must have defined @iface.
831
 *
832
 * Result: 0 for success, -1 for an error.
833
 */
834 48e5f32d Ondrej Zajicek
835 05476c4d Ondrej Zajicek
int
836
sk_setup_multicast(sock *s)
837
{
838
  ASSERT(s->iface);
839 48e5f32d Ondrej Zajicek
840 05476c4d Ondrej Zajicek
  if (sk_is_ipv4(s))
841
    return sk_setup_multicast4(s);
842
  else
843
    return sk_setup_multicast6(s);
844
}
845 48e5f32d Ondrej Zajicek
846 05476c4d Ondrej Zajicek
/**
847
 * sk_join_group - join multicast group for given socket
848
 * @s: socket
849
 * @maddr: multicast address
850
 *
851
 * Join multicast group for given datagram socket and associated interface.
852
 * The socket must have defined @iface.
853
 *
854
 * Result: 0 for success, -1 for an error.
855
 */
856 789772ed Ondrej Zajicek
857 05476c4d Ondrej Zajicek
int
858
sk_join_group(sock *s, ip_addr maddr)
859
{
860
  if (sk_is_ipv4(s))
861
    return sk_join_group4(s, maddr);
862
  else
863
    return sk_join_group6(s, maddr);
864
}
865 ef4a50be Ondrej Zajicek
866 05476c4d Ondrej Zajicek
/**
867
 * sk_leave_group - leave multicast group for given socket
868
 * @s: socket
869
 * @maddr: multicast address
870
 *
871
 * Leave multicast group for given datagram socket and associated interface.
872
 * The socket must have defined @iface.
873
 *
874
 * Result: 0 for success, -1 for an error.
875
 */
876 789772ed Ondrej Zajicek
877 05476c4d Ondrej Zajicek
int
878
sk_leave_group(sock *s, ip_addr maddr)
879
{
880
  if (sk_is_ipv4(s))
881
    return sk_leave_group4(s, maddr);
882
  else
883
    return sk_leave_group6(s, maddr);
884 b5d9ee5c Martin Mares
}
885
886 a39b165e Ondrej Zajicek
/**
887 05476c4d Ondrej Zajicek
 * sk_setup_broadcast - enable broadcast for given socket
888
 * @s: socket
889
 *
890
 * Allow reception and transmission of broadcast packets for given datagram
891
 * socket. The socket must have defined @iface. For transmission, packets should
892
 * be send to @brd address of @iface.
893
 *
894
 * Result: 0 for success, -1 for an error.
895
 */
896
897
int
898
sk_setup_broadcast(sock *s)
899
{
900
  int y = 1;
901
902
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
903
    ERR("SO_BROADCAST");
904
905
  return 0;
906
}
907
908
/**
909
 * sk_set_ttl - set transmit TTL for given socket
910 a39b165e Ondrej Zajicek
 * @s: socket
911
 * @ttl: TTL value
912
 *
913 05476c4d Ondrej Zajicek
 * Set TTL for already opened connections when TTL was not set before. Useful
914
 * for accepted connections when different ones should have different TTL.
915 a39b165e Ondrej Zajicek
 *
916
 * Result: 0 for success, -1 for an error.
917
 */
918
919
int
920
sk_set_ttl(sock *s, int ttl)
921
{
922
  s->ttl = ttl;
923
924 05476c4d Ondrej Zajicek
  if (sk_is_ipv4(s))
925
    return sk_set_ttl4(s, ttl);
926
  else
927
    return sk_set_ttl6(s, ttl);
928 a39b165e Ondrej Zajicek
}
929
930 b1b19433 Ondrej Zajicek
/**
931 05476c4d Ondrej Zajicek
 * sk_set_min_ttl - set minimal accepted TTL for given socket
932 b1b19433 Ondrej Zajicek
 * @s: socket
933
 * @ttl: TTL value
934
 *
935 05476c4d Ondrej Zajicek
 * Set minimal accepted TTL for given socket. Can be used for TTL security.
936
 * implementations.
937 b1b19433 Ondrej Zajicek
 *
938
 * Result: 0 for success, -1 for an error.
939
 */
940
941
int
942
sk_set_min_ttl(sock *s, int ttl)
943
{
944 05476c4d Ondrej Zajicek
  if (sk_is_ipv4(s))
945
    return sk_set_min_ttl4(s, ttl);
946
  else
947
    return sk_set_min_ttl6(s, ttl);
948 b1b19433 Ondrej Zajicek
}
949 d51aa281 Ondrej Zajicek
950 05476c4d Ondrej Zajicek
#if 0
951 d51aa281 Ondrej Zajicek
/**
952 05476c4d Ondrej Zajicek
 * sk_set_md5_auth - add / remove MD5 security association for given socket
953 d51aa281 Ondrej Zajicek
 * @s: socket
954
 * @a: IP address of the other side
955 eb1451a3 Ondrej Zajicek
 * @ifa: Interface for link-local IP address
956 d51aa281 Ondrej Zajicek
 * @passwd: password used for MD5 authentication
957
 *
958 05476c4d Ondrej Zajicek
 * In TCP MD5 handling code in kernel, there is a set of pairs (address,
959
 * password) used to choose password according to address of the other side.
960
 * This function is useful for listening socket, for active sockets it is enough
961
 * to set s->password field.
962 d51aa281 Ondrej Zajicek
 *
963
 * When called with passwd != NULL, the new pair is added,
964
 * When called with passwd == NULL, the existing pair is removed.
965
 *
966
 * Result: 0 for success, -1 for an error.
967
 */
968

969
int
970 eb1451a3 Ondrej Zajicek
sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
971 05476c4d Ondrej Zajicek
{ DUMMY; }
972
#endif
973 f9c799a0 Ondrej Zajicek
974 05476c4d Ondrej Zajicek
/**
975
 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
976
 * @s: socket
977
 * @offset: offset
978
 *
979
 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
980
 * kernel will automatically fill it for outgoing packets and check it for
981
 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
982
 * known to the kernel.
983
 *
984
 * Result: 0 for success, -1 for an error.
985
 */
986 f9c799a0 Ondrej Zajicek
987
int
988 4ac7c834 Ondrej Zajicek
sk_set_ipv6_checksum(sock *s, int offset)
989
{
990 48e5f32d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
991 05476c4d Ondrej Zajicek
    ERR("IPV6_CHECKSUM");
992 4ac7c834 Ondrej Zajicek
993
  return 0;
994
}
995
996
int
997 05476c4d Ondrej Zajicek
sk_set_icmp6_filter(sock *s, int p1, int p2)
998 93e868c7 Ondrej Zajicek
{
999
  /* a bit of lame interface, but it is here only for Radv */
1000
  struct icmp6_filter f;
1001
1002
  ICMP6_FILTER_SETBLOCKALL(&f);
1003
  ICMP6_FILTER_SETPASS(p1, &f);
1004
  ICMP6_FILTER_SETPASS(p2, &f);
1005
1006 48e5f32d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
1007 05476c4d Ondrej Zajicek
    ERR("ICMP6_FILTER");
1008 93e868c7 Ondrej Zajicek
1009
  return 0;
1010
}
1011
1012 05476c4d Ondrej Zajicek
void
1013
sk_log_error(sock *s, const char *p)
1014
{
1015
  log(L_ERR "%s: Socket error: %s%#m", p, s->err);
1016
}
1017
1018
1019
/*
1020
 *        Actual struct birdsock code
1021
 */
1022
1023
static list sock_list;
1024
static struct birdsock *current_sock;
1025
static struct birdsock *stored_sock;
1026
static int sock_recalc_fdsets_p;
1027
1028
static inline sock *
1029
sk_next(sock *s)
1030
{
1031
  if (!s->n.next->next)
1032
    return NULL;
1033
  else
1034
    return SKIP_BACK(sock, n, s->n.next);
1035
}
1036
1037
static void
1038
sk_alloc_bufs(sock *s)
1039
{
1040
  if (!s->rbuf && s->rbsize)
1041
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
1042
  s->rpos = s->rbuf;
1043
  if (!s->tbuf && s->tbsize)
1044
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
1045
  s->tpos = s->ttx = s->tbuf;
1046
}
1047
1048
static void
1049
sk_free_bufs(sock *s)
1050
{
1051
  if (s->rbuf_alloc)
1052
  {
1053
    xfree(s->rbuf_alloc);
1054
    s->rbuf = s->rbuf_alloc = NULL;
1055
  }
1056
  if (s->tbuf_alloc)
1057
  {
1058
    xfree(s->tbuf_alloc);
1059
    s->tbuf = s->tbuf_alloc = NULL;
1060
  }
1061
}
1062
1063
static void
1064
sk_free(resource *r)
1065
{
1066
  sock *s = (sock *) r;
1067
1068
  sk_free_bufs(s);
1069
  if (s->fd >= 0)
1070
  {
1071
    close(s->fd);
1072
1073
    /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1074
    if (s->flags & SKF_THREAD)
1075
      return;
1076
1077
    if (s == current_sock)
1078
      current_sock = sk_next(s);
1079
    if (s == stored_sock)
1080
      stored_sock = sk_next(s);
1081
    rem_node(&s->n);
1082
    sock_recalc_fdsets_p = 1;
1083
  }
1084
}
1085
1086
void
1087
sk_set_rbsize(sock *s, uint val)
1088
{
1089
  ASSERT(s->rbuf_alloc == s->rbuf);
1090
1091
  if (s->rbsize == val)
1092
    return;
1093
1094
  s->rbsize = val;
1095
  xfree(s->rbuf_alloc);
1096
  s->rbuf_alloc = xmalloc(val);
1097
  s->rpos = s->rbuf = s->rbuf_alloc;
1098
}
1099
1100
void
1101
sk_set_tbsize(sock *s, uint val)
1102
{
1103
  ASSERT(s->tbuf_alloc == s->tbuf);
1104
1105
  if (s->tbsize == val)
1106
    return;
1107
1108
  byte *old_tbuf = s->tbuf;
1109
1110
  s->tbsize = val;
1111
  s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1112
  s->tpos = s->tbuf + (s->tpos - old_tbuf);
1113
  s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
1114
}
1115
1116
void
1117
sk_set_tbuf(sock *s, void *tbuf)
1118
{
1119
  s->tbuf = tbuf ?: s->tbuf_alloc;
1120
  s->ttx = s->tpos = s->tbuf;
1121
}
1122
1123
void
1124
sk_reallocate(sock *s)
1125
{
1126
  sk_free_bufs(s);
1127
  sk_alloc_bufs(s);
1128
}
1129
1130
static void
1131
sk_dump(resource *r)
1132
{
1133
  sock *s = (sock *) r;
1134
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1135
1136 af454f9b Ondrej Zajicek
  debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1137 05476c4d Ondrej Zajicek
        sk_type_names[s->type],
1138
        s->data,
1139
        s->saddr,
1140
        s->sport,
1141
        s->daddr,
1142
        s->dport,
1143
        s->tos,
1144
        s->ttl,
1145
        s->iface ? s->iface->name : "none");
1146
}
1147
1148
static struct resclass sk_class = {
1149
  "Socket",
1150
  sizeof(sock),
1151
  sk_free,
1152
  sk_dump,
1153
  NULL,
1154
  NULL
1155
};
1156
1157
/**
1158
 * sk_new - create a socket
1159
 * @p: pool
1160
 *
1161
 * This function creates a new socket resource. If you want to use it,
1162
 * you need to fill in all the required fields of the structure and
1163
 * call sk_open() to do the actual opening of the socket.
1164
 *
1165
 * The real function name is sock_new(), sk_new() is a macro wrapper
1166
 * to avoid collision with OpenSSL.
1167
 */
1168
sock *
1169
sock_new(pool *p)
1170
{
1171
  sock *s = ralloc(p, &sk_class);
1172
  s->pool = p;
1173
  // s->saddr = s->daddr = IPA_NONE;
1174
  s->tos = s->priority = s->ttl = -1;
1175
  s->fd = -1;
1176
  return s;
1177
}
1178
1179
static int
1180
sk_setup(sock *s)
1181 f9c799a0 Ondrej Zajicek
{
1182 05476c4d Ondrej Zajicek
  int y = 1;
1183
  int fd = s->fd;
1184 f9c799a0 Ondrej Zajicek
1185 05476c4d Ondrej Zajicek
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1186
    ERR("O_NONBLOCK");
1187 f9c799a0 Ondrej Zajicek
1188 6ffa8f53 Jan Moskyto Matejka
  if (!s->fam)
1189 05476c4d Ondrej Zajicek
    return 0;
1190 f9c799a0 Ondrej Zajicek
1191 05476c4d Ondrej Zajicek
  if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1192
    s->flags |= SKF_PKTINFO;
1193 f9c799a0 Ondrej Zajicek
1194 05476c4d Ondrej Zajicek
#ifdef CONFIG_USE_HDRINCL
1195
  if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1196
  {
1197
    s->flags &= ~SKF_PKTINFO;
1198
    s->flags |= SKF_HDRINCL;
1199
    if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1200
      ERR("IP_HDRINCL");
1201
  }
1202 48e5f32d Ondrej Zajicek
#endif
1203
1204 05476c4d Ondrej Zajicek
  if (s->iface)
1205
  {
1206
#ifdef SO_BINDTODEVICE
1207 966ca614 Pavel Tvrdík
    struct ifreq ifr = {};
1208 05476c4d Ondrej Zajicek
    strcpy(ifr.ifr_name, s->iface->name);
1209
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1210
      ERR("SO_BINDTODEVICE");
1211
#endif
1212 f1aceff5 Ondrej Zajicek
1213 05476c4d Ondrej Zajicek
#ifdef CONFIG_UNIX_DONTROUTE
1214
    if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1215
      ERR("SO_DONTROUTE");
1216
#endif
1217
  }
1218 f9c799a0 Ondrej Zajicek
1219 05476c4d Ondrej Zajicek
  if (s->priority >= 0)
1220
    if (sk_set_priority(s, s->priority) < 0)
1221 f9c799a0 Ondrej Zajicek
      return -1;
1222
1223 05476c4d Ondrej Zajicek
  if (sk_is_ipv4(s))
1224
  {
1225
    if (s->flags & SKF_LADDR_RX)
1226
      if (sk_request_cmsg4_pktinfo(s) < 0)
1227
        return -1;
1228 f9c799a0 Ondrej Zajicek
1229 05476c4d Ondrej Zajicek
    if (s->flags & SKF_TTL_RX)
1230
      if (sk_request_cmsg4_ttl(s) < 0)
1231
        return -1;
1232 f9c799a0 Ondrej Zajicek
1233 05476c4d Ondrej Zajicek
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1234
      if (sk_disable_mtu_disc4(s) < 0)
1235
        return -1;
1236 f9c799a0 Ondrej Zajicek
1237 05476c4d Ondrej Zajicek
    if (s->ttl >= 0)
1238
      if (sk_set_ttl4(s, s->ttl) < 0)
1239
        return -1;
1240 f9c799a0 Ondrej Zajicek
1241 05476c4d Ondrej Zajicek
    if (s->tos >= 0)
1242
      if (sk_set_tos4(s, s->tos) < 0)
1243
        return -1;
1244
  }
1245 f9c799a0 Ondrej Zajicek
1246 05476c4d Ondrej Zajicek
  if (sk_is_ipv6(s))
1247
  {
1248
    if (s->flags & SKF_V6ONLY)
1249
      if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1250
        ERR("IPV6_V6ONLY");
1251 f9c799a0 Ondrej Zajicek
1252 05476c4d Ondrej Zajicek
    if (s->flags & SKF_LADDR_RX)
1253
      if (sk_request_cmsg6_pktinfo(s) < 0)
1254
        return -1;
1255 f9c799a0 Ondrej Zajicek
1256 05476c4d Ondrej Zajicek
    if (s->flags & SKF_TTL_RX)
1257
      if (sk_request_cmsg6_ttl(s) < 0)
1258
        return -1;
1259 f9c799a0 Ondrej Zajicek
1260 05476c4d Ondrej Zajicek
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1261
      if (sk_disable_mtu_disc6(s) < 0)
1262
        return -1;
1263 f9c799a0 Ondrej Zajicek
1264 05476c4d Ondrej Zajicek
    if (s->ttl >= 0)
1265
      if (sk_set_ttl6(s, s->ttl) < 0)
1266
        return -1;
1267 f9c799a0 Ondrej Zajicek
1268 05476c4d Ondrej Zajicek
    if (s->tos >= 0)
1269
      if (sk_set_tos6(s, s->tos) < 0)
1270
        return -1;
1271
  }
1272 f9c799a0 Ondrej Zajicek
1273
  return 0;
1274
}
1275
1276 05476c4d Ondrej Zajicek
static void
1277
sk_insert(sock *s)
1278 f9c799a0 Ondrej Zajicek
{
1279 05476c4d Ondrej Zajicek
  add_tail(&sock_list, &s->n);
1280
  sock_recalc_fdsets_p = 1;
1281 f9c799a0 Ondrej Zajicek
}
1282
1283 b5d9ee5c Martin Mares
static void
1284
sk_tcp_connected(sock *s)
1285
{
1286 05476c4d Ondrej Zajicek
  sockaddr sa;
1287
  int sa_len = sizeof(sa);
1288
1289
  if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1290 6ffa8f53 Jan Moskyto Matejka
      (sockaddr_read(&sa, fam_to_af[s->fam], &s->saddr, &s->iface, &s->sport) < 0))
1291 05476c4d Ondrej Zajicek
    log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1292 9be9a264 Ondrej Zajicek
1293 b5d9ee5c Martin Mares
  s->type = SK_TCP;
1294
  sk_alloc_bufs(s);
1295 320f4173 Martin Mares
  s->tx_hook(s);
1296 b5d9ee5c Martin Mares
}
1297
1298 b93abffa Martin Mares
static int
1299 05476c4d Ondrej Zajicek
sk_passive_connected(sock *s, int type)
1300 b93abffa Martin Mares
{
1301 05476c4d Ondrej Zajicek
  sockaddr loc_sa, rem_sa;
1302
  int loc_sa_len = sizeof(loc_sa);
1303
  int rem_sa_len = sizeof(rem_sa);
1304 cf31112f Ondrej Zajicek
1305 05476c4d Ondrej Zajicek
  int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1306
  if (fd < 0)
1307
  {
1308
    if ((errno != EINTR) && (errno != EAGAIN))
1309 c025b852 Ondrej Filip
      s->err_hook(s, errno);
1310 05476c4d Ondrej Zajicek
    return 0;
1311
  }
1312
1313
  sock *t = sk_new(s->pool);
1314
  t->type = type;
1315 6ffa8f53 Jan Moskyto Matejka
  t->fam = s->fam;
1316 d7661fbe Jan Moskyto Matejka
  t->fd = fd;
1317 05476c4d Ondrej Zajicek
  t->ttl = s->ttl;
1318
  t->tos = s->tos;
1319
  t->rbsize = s->rbsize;
1320
  t->tbsize = s->tbsize;
1321
1322
  if (type == SK_TCP)
1323
  {
1324
    if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1325 6ffa8f53 Jan Moskyto Matejka
        (sockaddr_read(&loc_sa, fam_to_af[s->fam], &t->saddr, &t->iface, &t->sport) < 0))
1326 05476c4d Ondrej Zajicek
      log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1327
1328 6ffa8f53 Jan Moskyto Matejka
    if (sockaddr_read(&rem_sa, fam_to_af[s->fam], &t->daddr, &t->iface, &t->dport) < 0)
1329 05476c4d Ondrej Zajicek
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1330
  }
1331
1332 3aed0a6f Ondrej Zajicek (work)
  if (fd >= FD_SETSIZE)
1333
  {
1334
    /* FIXME: Call err_hook instead ? */
1335
    log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s",
1336
        t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL,
1337
        t->dport, "rejected due to FD_SETSIZE limit");
1338
    close(fd);
1339
    t->fd = -1;
1340
    rfree(t);
1341
    return 1;
1342
  }
1343
1344 05476c4d Ondrej Zajicek
  if (sk_setup(t) < 0)
1345
  {
1346
    /* FIXME: Call err_hook instead ? */
1347
    log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1348
1349
    /* FIXME: handle it better in rfree() */
1350 9c89560e Ondrej Zajicek
    close(t->fd);
1351 05476c4d Ondrej Zajicek
    t->fd = -1;
1352
    rfree(t);
1353
    return 1;
1354
  }
1355
1356
  sk_insert(t);
1357
  sk_alloc_bufs(t);
1358
  s->rx_hook(t, 0);
1359
  return 1;
1360 b93abffa Martin Mares
}
1361
1362 525fa2c1 Martin Mares
/**
1363
 * sk_open - open a socket
1364
 * @s: socket
1365
 *
1366
 * This function takes a socket resource created by sk_new() and
1367
 * initialized by the user and binds a corresponding network connection
1368
 * to it.
1369
 *
1370
 * Result: 0 for success, -1 for an error.
1371
 */
1372 b5d9ee5c Martin Mares
int
1373
sk_open(sock *s)
1374
{
1375 05476c4d Ondrej Zajicek
  int fd = -1;
1376 48e5f32d Ondrej Zajicek
  int do_bind = 0;
1377
  int bind_port = 0;
1378
  ip_addr bind_addr = IPA_NONE;
1379
  sockaddr sa;
1380 b5d9ee5c Martin Mares
1381 48e5f32d Ondrej Zajicek
  switch (s->type)
1382 05476c4d Ondrej Zajicek
  {
1383
  case SK_TCP_ACTIVE:
1384
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1385
    /* Fall thru */
1386
  case SK_TCP_PASSIVE:
1387 6ffa8f53 Jan Moskyto Matejka
    fd = socket(fam_to_af[s->fam], SOCK_STREAM, IPPROTO_TCP);
1388 05476c4d Ondrej Zajicek
    bind_port = s->sport;
1389
    bind_addr = s->saddr;
1390
    do_bind = bind_port || ipa_nonzero(bind_addr);
1391
    break;
1392 9c89560e Ondrej Zajicek
1393 05476c4d Ondrej Zajicek
  case SK_UDP:
1394 6ffa8f53 Jan Moskyto Matejka
    fd = socket(fam_to_af[s->fam], SOCK_DGRAM, IPPROTO_UDP);
1395 05476c4d Ondrej Zajicek
    bind_port = s->sport;
1396
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1397
    do_bind = 1;
1398
    break;
1399
1400
  case SK_IP:
1401 6ffa8f53 Jan Moskyto Matejka
    fd = socket(fam_to_af[s->fam], SOCK_RAW, s->dport);
1402 05476c4d Ondrej Zajicek
    bind_port = 0;
1403
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1404
    do_bind = ipa_nonzero(bind_addr);
1405
    break;
1406
1407
  case SK_MAGIC:
1408 6ffa8f53 Jan Moskyto Matejka
    s->fam = SK_FAM_NONE;
1409 05476c4d Ondrej Zajicek
    fd = s->fd;
1410
    break;
1411
1412
  default:
1413
    bug("sk_open() called for invalid sock type %d", s->type);
1414
  }
1415
1416 b5d9ee5c Martin Mares
  if (fd < 0)
1417 05476c4d Ondrej Zajicek
    ERR("socket");
1418
1419 338f85ca Ondrej Zajicek (work)
  if (fd >= FD_SETSIZE)
1420
    ERR2("FD_SETSIZE limit reached");
1421
1422 b5d9ee5c Martin Mares
  s->fd = fd;
1423
1424 05476c4d Ondrej Zajicek
  if (sk_setup(s) < 0)
1425
    goto err;
1426 38a608c5 Martin Mares
1427 48e5f32d Ondrej Zajicek
  if (do_bind)
1428 05476c4d Ondrej Zajicek
  {
1429
    if (bind_port)
1430 b5d9ee5c Martin Mares
    {
1431 05476c4d Ondrej Zajicek
      int y = 1;
1432
1433
      if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1434
        ERR2("SO_REUSEADDR");
1435 48e5f32d Ondrej Zajicek
1436 8931425d Ondrej Zajicek
#ifdef CONFIG_NO_IFACE_BIND
1437 05476c4d Ondrej Zajicek
      /* Workaround missing ability to bind to an iface */
1438
      if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1439
      {
1440
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1441
          ERR2("SO_REUSEPORT");
1442
      }
1443 8931425d Ondrej Zajicek
#endif
1444 b5d9ee5c Martin Mares
    }
1445 b867a87c Ondrej Zajicek
    else
1446
      if (s->flags & SKF_HIGH_PORT)
1447
        if (sk_set_high_port(s) < 0)
1448
          log(L_WARN "Socket error: %s%#m", s->err);
1449 48e5f32d Ondrej Zajicek
1450 6ffa8f53 Jan Moskyto Matejka
    sockaddr_fill(&sa, fam_to_af[s->fam], bind_addr, s->iface, bind_port);
1451 05476c4d Ondrej Zajicek
    if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1452
      ERR2("bind");
1453
  }
1454 d51aa281 Ondrej Zajicek
1455
  if (s->password)
1456 05476c4d Ondrej Zajicek
    if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0)
1457
      goto err;
1458 d51aa281 Ondrej Zajicek
1459 48e5f32d Ondrej Zajicek
  switch (s->type)
1460 05476c4d Ondrej Zajicek
  {
1461
  case SK_TCP_ACTIVE:
1462 6ffa8f53 Jan Moskyto Matejka
    sockaddr_fill(&sa, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
1463 05476c4d Ondrej Zajicek
    if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1464
      sk_tcp_connected(s);
1465
    else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1466
             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1467
      ERR2("connect");
1468
    break;
1469
1470
  case SK_TCP_PASSIVE:
1471
    if (listen(fd, 8) < 0)
1472
      ERR2("listen");
1473
    break;
1474
1475
  case SK_MAGIC:
1476
    break;
1477
1478
  default:
1479
    sk_alloc_bufs(s);
1480
  }
1481 b5d9ee5c Martin Mares
1482 bf139664 Ondrej Zajicek
  if (!(s->flags & SKF_THREAD))
1483
    sk_insert(s);
1484 b5d9ee5c Martin Mares
  return 0;
1485
1486 05476c4d Ondrej Zajicek
err:
1487 b5d9ee5c Martin Mares
  close(fd);
1488
  s->fd = -1;
1489
  return -1;
1490
}
1491
1492 05476c4d Ondrej Zajicek
int
1493 b93abffa Martin Mares
sk_open_unix(sock *s, char *name)
1494
{
1495
  struct sockaddr_un sa;
1496 05476c4d Ondrej Zajicek
  int fd;
1497
1498
  /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1499 b93abffa Martin Mares
1500
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1501
  if (fd < 0)
1502 05476c4d Ondrej Zajicek
    return -1;
1503
1504
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1505
    return -1;
1506 68fa95cf Ondrej Zajicek
1507 97e46d28 Ondrej Zajicek
  /* Path length checked in test_old_bird() */
1508 b93abffa Martin Mares
  sa.sun_family = AF_UNIX;
1509 97c6fa02 Ondrej Filip
  strcpy(sa.sun_path, name);
1510 05476c4d Ondrej Zajicek
1511 0b3bf4b1 Martin Mares
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1512 05476c4d Ondrej Zajicek
    return -1;
1513
1514
  if (listen(fd, 8) < 0)
1515
    return -1;
1516
1517
  s->fd = fd;
1518 38a608c5 Martin Mares
  sk_insert(s);
1519 05476c4d Ondrej Zajicek
  return 0;
1520
}
1521
1522
1523
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1524
                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1525
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1526
1527
static void
1528
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1529
{
1530
  if (sk_is_ipv4(s))
1531
    sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1532
  else
1533
    sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1534
}
1535
1536
static void
1537
sk_process_cmsgs(sock *s, struct msghdr *msg)
1538
{
1539
  struct cmsghdr *cm;
1540
1541
  s->laddr = IPA_NONE;
1542
  s->lifindex = 0;
1543
  s->rcv_ttl = -1;
1544
1545
  for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1546
  {
1547
    if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1548
    {
1549
      sk_process_cmsg4_pktinfo(s, cm);
1550
      sk_process_cmsg4_ttl(s, cm);
1551
    }
1552 b93abffa Martin Mares
1553 05476c4d Ondrej Zajicek
    if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1554
    {
1555
      sk_process_cmsg6_pktinfo(s, cm);
1556
      sk_process_cmsg6_ttl(s, cm);
1557
    }
1558
  }
1559 b93abffa Martin Mares
}
1560
1561 48e5f32d Ondrej Zajicek
1562
static inline int
1563
sk_sendmsg(sock *s)
1564
{
1565
  struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1566
  byte cmsg_buf[CMSG_TX_SPACE];
1567 966ca614 Pavel Tvrdík
  bzero(cmsg_buf, sizeof(cmsg_buf));
1568
  sockaddr dst = {};
1569 48e5f32d Ondrej Zajicek
1570 6ffa8f53 Jan Moskyto Matejka
  sockaddr_fill(&dst, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
1571 48e5f32d Ondrej Zajicek
1572
  struct msghdr msg = {
1573 05476c4d Ondrej Zajicek
    .msg_name = &dst.sa,
1574
    .msg_namelen = SA_LEN(dst),
1575 48e5f32d Ondrej Zajicek
    .msg_iov = &iov,
1576
    .msg_iovlen = 1
1577
  };
1578
1579
#ifdef CONFIG_USE_HDRINCL
1580
  byte hdr[20];
1581
  struct iovec iov2[2] = { {hdr, 20}, iov };
1582
1583
  if (s->flags & SKF_HDRINCL)
1584
  {
1585 05476c4d Ondrej Zajicek
    sk_prepare_ip_header(s, hdr, iov.iov_len);
1586 48e5f32d Ondrej Zajicek
    msg.msg_iov = iov2;
1587
    msg.msg_iovlen = 2;
1588
  }
1589
#endif
1590
1591
  if (s->flags & SKF_PKTINFO)
1592 05476c4d Ondrej Zajicek
    sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1593 48e5f32d Ondrej Zajicek
1594
  return sendmsg(s->fd, &msg, 0);
1595
}
1596
1597
static inline int
1598
sk_recvmsg(sock *s)
1599
{
1600
  struct iovec iov = {s->rbuf, s->rbsize};
1601
  byte cmsg_buf[CMSG_RX_SPACE];
1602
  sockaddr src;
1603
1604
  struct msghdr msg = {
1605 05476c4d Ondrej Zajicek
    .msg_name = &src.sa,
1606
    .msg_namelen = sizeof(src), // XXXX ??
1607 48e5f32d Ondrej Zajicek
    .msg_iov = &iov,
1608
    .msg_iovlen = 1,
1609
    .msg_control = cmsg_buf,
1610
    .msg_controllen = sizeof(cmsg_buf),
1611
    .msg_flags = 0
1612
  };
1613
1614
  int rv = recvmsg(s->fd, &msg, 0);
1615
  if (rv < 0)
1616
    return rv;
1617
1618
  //ifdef IPV4
1619
  //  if (cf_type == SK_IP)
1620
  //    rv = ipv4_skip_header(pbuf, rv);
1621
  //endif
1622
1623 6ffa8f53 Jan Moskyto Matejka
  sockaddr_read(&src, fam_to_af[s->fam], &s->faddr, NULL, &s->fport);
1624 05476c4d Ondrej Zajicek
  sk_process_cmsgs(s, &msg);
1625 48e5f32d Ondrej Zajicek
1626
  if (msg.msg_flags & MSG_TRUNC)
1627
    s->flags |= SKF_TRUNCATED;
1628
  else
1629
    s->flags &= ~SKF_TRUNCATED;
1630
1631
  return rv;
1632
}
1633
1634
1635 353729f5 Ondrej Zajicek
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1636
1637 b5d9ee5c Martin Mares
static int
1638
sk_maybe_write(sock *s)
1639
{
1640
  int e;
1641
1642
  switch (s->type)
1643 05476c4d Ondrej Zajicek
  {
1644
  case SK_TCP:
1645
  case SK_MAGIC:
1646
  case SK_UNIX:
1647
    while (s->ttx != s->tpos)
1648 b5d9ee5c Martin Mares
    {
1649 05476c4d Ondrej Zajicek
      e = write(s->fd, s->ttx, s->tpos - s->ttx);
1650
1651
      if (e < 0)
1652
      {
1653
        if (errno != EINTR && errno != EAGAIN)
1654 b5d9ee5c Martin Mares
        {
1655 05476c4d Ondrej Zajicek
          reset_tx_buffer(s);
1656
          /* EPIPE is just a connection close notification during TX */
1657
          s->err_hook(s, (errno != EPIPE) ? errno : 0);
1658
          return -1;
1659 b5d9ee5c Martin Mares
        }
1660 05476c4d Ondrej Zajicek
        return 0;
1661
      }
1662
      s->ttx += e;
1663
    }
1664
    reset_tx_buffer(s);
1665
    return 1;
1666
1667
  case SK_UDP:
1668
  case SK_IP:
1669
    {
1670
      if (s->tbuf == s->tpos)
1671 b5d9ee5c Martin Mares
        return 1;
1672 05476c4d Ondrej Zajicek
1673
      e = sk_sendmsg(s);
1674
1675
      if (e < 0)
1676
      {
1677
        if (errno != EINTR && errno != EAGAIN)
1678
        {
1679
          reset_tx_buffer(s);
1680
          s->err_hook(s, errno);
1681
          return -1;
1682
        }
1683
1684
        if (!s->tx_hook)
1685
          reset_tx_buffer(s);
1686
        return 0;
1687 b5d9ee5c Martin Mares
      }
1688 05476c4d Ondrej Zajicek
      reset_tx_buffer(s);
1689
      return 1;
1690 b5d9ee5c Martin Mares
    }
1691 05476c4d Ondrej Zajicek
  default:
1692
    bug("sk_maybe_write: unknown socket type %d", s->type);
1693
  }
1694 b5d9ee5c Martin Mares
}
1695
1696 ea89da38 Ondrej Zajicek
int
1697
sk_rx_ready(sock *s)
1698
{
1699
  fd_set rd, wr;
1700
  struct timeval timo;
1701
  int rv;
1702
1703
  FD_ZERO(&rd);
1704
  FD_ZERO(&wr);
1705
  FD_SET(s->fd, &rd);
1706
1707
  timo.tv_sec = 0;
1708
  timo.tv_usec = 0;
1709
1710
 redo:
1711
  rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1712 9c89560e Ondrej Zajicek
1713 ea89da38 Ondrej Zajicek
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1714
    goto redo;
1715
1716
  return rv;
1717
}
1718
1719 525fa2c1 Martin Mares
/**
1720
 * sk_send - send data to a socket
1721
 * @s: socket
1722
 * @len: number of bytes to send
1723
 *
1724
 * This function sends @len bytes of data prepared in the
1725
 * transmit buffer of the socket @s to the network connection.
1726
 * If the packet can be sent immediately, it does so and returns
1727
 * 1, else it queues the packet for later processing, returns 0
1728
 * and calls the @tx_hook of the socket when the tranmission
1729
 * takes place.
1730
 */
1731 b5d9ee5c Martin Mares
int
1732
sk_send(sock *s, unsigned len)
1733
{
1734
  s->ttx = s->tbuf;
1735
  s->tpos = s->tbuf + len;
1736
  return sk_maybe_write(s);
1737
}
1738
1739 525fa2c1 Martin Mares
/**
1740
 * sk_send_to - send data to a specific destination
1741
 * @s: socket
1742
 * @len: number of bytes to send
1743
 * @addr: IP address to send the packet to
1744
 * @port: port to send the packet to
1745
 *
1746 2e9b2421 Martin Mares
 * This is a sk_send() replacement for connection-less packet sockets
1747 525fa2c1 Martin Mares
 * which allows destination of the packet to be chosen dynamically.
1748 48e5f32d Ondrej Zajicek
 * Raw IP sockets should use 0 for @port.
1749 525fa2c1 Martin Mares
 */
1750 b5d9ee5c Martin Mares
int
1751
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1752
{
1753 353729f5 Ondrej Zajicek
  s->daddr = addr;
1754 48e5f32d Ondrej Zajicek
  if (port)
1755
    s->dport = port;
1756
1757 b5d9ee5c Martin Mares
  s->ttx = s->tbuf;
1758
  s->tpos = s->tbuf + len;
1759
  return sk_maybe_write(s);
1760
}
1761
1762 353729f5 Ondrej Zajicek
/*
1763
int
1764
sk_send_full(sock *s, unsigned len, struct iface *ifa,
1765
             ip_addr saddr, ip_addr daddr, unsigned dport)
1766
{
1767
  s->iface = ifa;
1768
  s->saddr = saddr;
1769
  s->daddr = daddr;
1770
  s->dport = dport;
1771
  s->ttx = s->tbuf;
1772
  s->tpos = s->tbuf + len;
1773
  return sk_maybe_write(s);
1774
}
1775
*/
1776
1777 6a8d3f1c Ondrej Zajicek
 /* sk_read() and sk_write() are called from BFD's event loop */
1778
1779
int
1780 b5d9ee5c Martin Mares
sk_read(sock *s)
1781
{
1782
  switch (s->type)
1783 05476c4d Ondrej Zajicek
  {
1784
  case SK_TCP_PASSIVE:
1785
    return sk_passive_connected(s, SK_TCP);
1786
1787
  case SK_UNIX_PASSIVE:
1788
    return sk_passive_connected(s, SK_UNIX);
1789
1790
  case SK_TCP:
1791
  case SK_UNIX:
1792 b5d9ee5c Martin Mares
    {
1793 05476c4d Ondrej Zajicek
      int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1794
1795
      if (c < 0)
1796 b93abffa Martin Mares
      {
1797 05476c4d Ondrej Zajicek
        if (errno != EINTR && errno != EAGAIN)
1798
          s->err_hook(s, errno);
1799 b5d9ee5c Martin Mares
      }
1800 05476c4d Ondrej Zajicek
      else if (!c)
1801
        s->err_hook(s, 0);
1802
      else
1803 b5d9ee5c Martin Mares
      {
1804 05476c4d Ondrej Zajicek
        s->rpos += c;
1805
        if (s->rx_hook(s, s->rpos - s->rbuf))
1806
        {
1807
          /* We need to be careful since the socket could have been deleted by the hook */
1808
          if (current_sock == s)
1809
            s->rpos = s->rbuf;
1810
        }
1811
        return 1;
1812 b5d9ee5c Martin Mares
      }
1813 05476c4d Ondrej Zajicek
      return 0;
1814
    }
1815 353729f5 Ondrej Zajicek
1816 05476c4d Ondrej Zajicek
  case SK_MAGIC:
1817
    return s->rx_hook(s, 0);
1818 b5d9ee5c Martin Mares
1819 05476c4d Ondrej Zajicek
  default:
1820
    {
1821
      int e = sk_recvmsg(s);
1822 353729f5 Ondrej Zajicek
1823 05476c4d Ondrej Zajicek
      if (e < 0)
1824
      {
1825
        if (errno != EINTR && errno != EAGAIN)
1826
          s->err_hook(s, errno);
1827
        return 0;
1828 b5d9ee5c Martin Mares
      }
1829 05476c4d Ondrej Zajicek
1830
      s->rpos = s->rbuf + e;
1831
      s->rx_hook(s, e);
1832
      return 1;
1833 b5d9ee5c Martin Mares
    }
1834 05476c4d Ondrej Zajicek
  }
1835 b5d9ee5c Martin Mares
}
1836
1837 6a8d3f1c Ondrej Zajicek
int
1838 b5d9ee5c Martin Mares
sk_write(sock *s)
1839
{
1840 320f4173 Martin Mares
  switch (s->type)
1841 05476c4d Ondrej Zajicek
  {
1842
  case SK_TCP_ACTIVE:
1843 320f4173 Martin Mares
    {
1844 05476c4d Ondrej Zajicek
      sockaddr sa;
1845 6ffa8f53 Jan Moskyto Matejka
      sockaddr_fill(&sa, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
1846 05476c4d Ondrej Zajicek
1847
      if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1848
        sk_tcp_connected(s);
1849
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1850
        s->err_hook(s, errno);
1851 38a608c5 Martin Mares
      return 0;
1852 320f4173 Martin Mares
    }
1853 05476c4d Ondrej Zajicek
1854
  default:
1855
    if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1856
    {
1857
      if (s->tx_hook)
1858
        s->tx_hook(s);
1859
      return 1;
1860
    }
1861
    return 0;
1862
  }
1863 b5d9ee5c Martin Mares
}
1864
1865 70b90dde Jan Moskyto Matejka
int sk_is_ipv4(sock *s)
1866 6ffa8f53 Jan Moskyto Matejka
{ return s->fam == SK_FAM_IPV4; }
1867 70b90dde Jan Moskyto Matejka
1868
int sk_is_ipv6(sock *s)
1869 6ffa8f53 Jan Moskyto Matejka
{ return s->fam == SK_FAM_IPV6; }
1870 70b90dde Jan Moskyto Matejka
1871 b5d9ee5c Martin Mares
void
1872
sk_dump_all(void)
1873
{
1874
  node *n;
1875
  sock *s;
1876
1877
  debug("Open sockets:\n");
1878
  WALK_LIST(n, sock_list)
1879 05476c4d Ondrej Zajicek
  {
1880
    s = SKIP_BACK(sock, n, n);
1881
    debug("%p ", s);
1882
    sk_dump(&s->r);
1883
  }
1884 b5d9ee5c Martin Mares
  debug("\n");
1885
}
1886
1887
1888
/*
1889 8bcb5fb1 Ondrej Zajicek
 *        Internal event log and watchdog
1890
 */
1891
1892
#define EVENT_LOG_LENGTH 32
1893
1894
struct event_log_entry
1895
{
1896
  void *hook;
1897
  void *data;
1898
  btime timestamp;
1899
  btime duration;
1900
};
1901
1902
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
1903
static struct event_log_entry *event_open;
1904
static int event_log_pos, event_log_num, watchdog_active;
1905
static btime last_time;
1906
static btime loop_time;
1907
1908
static void
1909
io_update_time(void)
1910
{
1911
  struct timespec ts;
1912
  int rv;
1913
1914
  if (!clock_monotonic_available)
1915
    return;
1916
1917
  /*
1918
   * This is third time-tracking procedure (after update_times() above and
1919
   * times_update() in BFD), dedicated to internal event log and latency
1920
   * tracking. Hopefully, we consolidate these sometimes.
1921
   */
1922
1923
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
1924
  if (rv < 0)
1925
    die("clock_gettime: %m");
1926
1927
  last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
1928
1929
  if (event_open)
1930
  {
1931
    event_open->duration = last_time - event_open->timestamp;
1932
1933
    if (event_open->duration > config->latency_limit)
1934
      log(L_WARN "Event 0x%p 0x%p took %d ms",
1935
          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
1936
1937
    event_open = NULL;
1938
  }
1939
}
1940
1941
/**
1942
 * io_log_event - mark approaching event into event log
1943
 * @hook: event hook address
1944
 * @data: event data address
1945
 *
1946
 * Store info (hook, data, timestamp) about the following internal event into
1947
 * a circular event log (@event_log). When latency tracking is enabled, the log
1948
 * entry is kept open (in @event_open) so the duration can be filled later.
1949
 */
1950
void
1951
io_log_event(void *hook, void *data)
1952
{
1953
  if (config->latency_debug)
1954
    io_update_time();
1955
1956
  struct event_log_entry *en = event_log + event_log_pos;
1957
1958
  en->hook = hook;
1959
  en->data = data;
1960
  en->timestamp = last_time;
1961
  en->duration = 0;
1962
1963
  event_log_num++;
1964
  event_log_pos++;
1965
  event_log_pos %= EVENT_LOG_LENGTH;
1966
1967
  event_open = config->latency_debug ? en : NULL;
1968
}
1969
1970
static inline void
1971
io_close_event(void)
1972
{
1973
  if (event_open)
1974
    io_update_time();
1975
}
1976
1977
void
1978
io_log_dump(void)
1979
{
1980
  int i;
1981
1982
  log(L_DEBUG "Event log:");
1983
  for (i = 0; i < EVENT_LOG_LENGTH; i++)
1984
  {
1985
    struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
1986
    if (en->hook)
1987
      log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
1988
          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
1989
  }
1990
}
1991
1992
void
1993
watchdog_sigalrm(int sig UNUSED)
1994
{
1995
  /* Update last_time and duration, but skip latency check */
1996
  config->latency_limit = 0xffffffff;
1997
  io_update_time();
1998
1999
  /* We want core dump */
2000
  abort();
2001
}
2002
2003
static inline void
2004
watchdog_start1(void)
2005
{
2006
  io_update_time();
2007
2008
  loop_time = last_time;
2009
}
2010
2011
static inline void
2012
watchdog_start(void)
2013
{
2014
  io_update_time();
2015
2016
  loop_time = last_time;
2017
  event_log_num = 0;
2018
2019
  if (config->watchdog_timeout)
2020
  {
2021
    alarm(config->watchdog_timeout);
2022
    watchdog_active = 1;
2023
  }
2024
}
2025
2026
static inline void
2027
watchdog_stop(void)
2028
{
2029
  io_update_time();
2030
2031
  if (watchdog_active)
2032
  {
2033
    alarm(0);
2034
    watchdog_active = 0;
2035
  }
2036
2037
  btime duration = last_time - loop_time;
2038
  if (duration > config->watchdog_warning)
2039
    log(L_WARN "I/O loop cycle took %d ms for %d events",
2040
        (int) (duration TO_MS), event_log_num);
2041
}
2042
2043
2044
/*
2045 b5d9ee5c Martin Mares
 *        Main I/O Loop
2046
 */
2047
2048 4c9dd1e4 Martin Mares
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
2049
volatile int async_dump_flag;
2050
2051 b5d9ee5c Martin Mares
void
2052
io_init(void)
2053
{
2054
  init_list(&near_timers);
2055
  init_list(&far_timers);
2056
  init_list(&sock_list);
2057 e8f73195 Martin Mares
  init_list(&global_event_list);
2058 7e5f5ffd Martin Mares
  krt_io_init();
2059 fd91ae33 Ondrej Zajicek
  init_times();
2060
  update_times();
2061 a92cf57d Ondrej Zajicek
  boot_time = now;
2062 fd91ae33 Ondrej Zajicek
  srandom((int) now_real);
2063 b5d9ee5c Martin Mares
}
2064
2065 ea89da38 Ondrej Zajicek
static int short_loops = 0;
2066
#define SHORT_LOOP_MAX 10
2067
2068 b5d9ee5c Martin Mares
void
2069
io_loop(void)
2070
{
2071
  fd_set rd, wr;
2072
  struct timeval timo;
2073
  time_t tout;
2074 30770df2 Martin Mares
  int hi, events;
2075 b5d9ee5c Martin Mares
  sock *s;
2076 38a608c5 Martin Mares
  node *n;
2077 b5d9ee5c Martin Mares
2078 8bcb5fb1 Ondrej Zajicek
  watchdog_start1();
2079 38a608c5 Martin Mares
  sock_recalc_fdsets_p = 1;
2080 b5d9ee5c Martin Mares
  for(;;)
2081
    {
2082 30770df2 Martin Mares
      events = ev_run_list(&global_event_list);
2083 fd91ae33 Ondrej Zajicek
      update_times();
2084 b5d9ee5c Martin Mares
      tout = tm_first_shot();
2085
      if (tout <= now)
2086
        {
2087
          tm_shot();
2088
          continue;
2089
        }
2090 a92cf57d Ondrej Zajicek
      timo.tv_sec = events ? 0 : MIN(tout - now, 3);
2091 30770df2 Martin Mares
      timo.tv_usec = 0;
2092 b5d9ee5c Martin Mares
2093 8bcb5fb1 Ondrej Zajicek
      io_close_event();
2094
2095 38a608c5 Martin Mares
      if (sock_recalc_fdsets_p)
2096
        {
2097
          sock_recalc_fdsets_p = 0;
2098
          FD_ZERO(&rd);
2099
          FD_ZERO(&wr);
2100
        }
2101
2102 b5d9ee5c Martin Mares
      hi = 0;
2103
      WALK_LIST(n, sock_list)
2104
        {
2105
          s = SKIP_BACK(sock, n, n);
2106
          if (s->rx_hook)
2107
            {
2108
              FD_SET(s->fd, &rd);
2109
              if (s->fd > hi)
2110
                hi = s->fd;
2111
            }
2112 38a608c5 Martin Mares
          else
2113
            FD_CLR(s->fd, &rd);
2114 b5d9ee5c Martin Mares
          if (s->tx_hook && s->ttx != s->tpos)
2115
            {
2116
              FD_SET(s->fd, &wr);
2117
              if (s->fd > hi)
2118
                hi = s->fd;
2119
            }
2120 38a608c5 Martin Mares
          else
2121
            FD_CLR(s->fd, &wr);
2122 b5d9ee5c Martin Mares
        }
2123
2124 4c9dd1e4 Martin Mares
      /*
2125
       * Yes, this is racy. But even if the signal comes before this test
2126
       * and entering select(), it gets caught on the next timer tick.
2127
       */
2128
2129
      if (async_config_flag)
2130
        {
2131 8bcb5fb1 Ondrej Zajicek
          io_log_event(async_config, NULL);
2132 4c9dd1e4 Martin Mares
          async_config();
2133
          async_config_flag = 0;
2134 f4aabcee Martin Mares
          continue;
2135 4c9dd1e4 Martin Mares
        }
2136
      if (async_dump_flag)
2137
        {
2138 8bcb5fb1 Ondrej Zajicek
          io_log_event(async_dump, NULL);
2139 4c9dd1e4 Martin Mares
          async_dump();
2140
          async_dump_flag = 0;
2141 f4aabcee Martin Mares
          continue;
2142
        }
2143
      if (async_shutdown_flag)
2144
        {
2145 8bcb5fb1 Ondrej Zajicek
          io_log_event(async_shutdown, NULL);
2146 f4aabcee Martin Mares
          async_shutdown();
2147
          async_shutdown_flag = 0;
2148
          continue;
2149 4c9dd1e4 Martin Mares
        }
2150
2151
      /* And finally enter select() to find active sockets */
2152 8bcb5fb1 Ondrej Zajicek
      watchdog_stop();
2153 b5d9ee5c Martin Mares
      hi = select(hi+1, &rd, &wr, NULL, &timo);
2154 8bcb5fb1 Ondrej Zajicek
      watchdog_start();
2155 ea89da38 Ondrej Zajicek
2156 b5d9ee5c Martin Mares
      if (hi < 0)
2157
        {
2158
          if (errno == EINTR || errno == EAGAIN)
2159
            continue;
2160
          die("select: %m");
2161
        }
2162
      if (hi)
2163
        {
2164 ea89da38 Ondrej Zajicek
          /* guaranteed to be non-empty */
2165
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2166
2167 38a608c5 Martin Mares
          while (current_sock)
2168 b5d9ee5c Martin Mares
            {
2169 38a608c5 Martin Mares
              sock *s = current_sock;
2170
              int e;
2171 ea89da38 Ondrej Zajicek
              int steps;
2172
2173
              steps = MAX_STEPS;
2174
              if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
2175 38a608c5 Martin Mares
                do
2176
                  {
2177 4323099d Ondrej Zajicek
                    steps--;
2178 8bcb5fb1 Ondrej Zajicek
                    io_log_event(s->rx_hook, s->data);
2179 38a608c5 Martin Mares
                    e = sk_read(s);
2180
                    if (s != current_sock)
2181
                      goto next;
2182
                  }
2183 4323099d Ondrej Zajicek
                while (e && s->rx_hook && steps);
2184
2185
              steps = MAX_STEPS;
2186 38a608c5 Martin Mares
              if (FD_ISSET(s->fd, &wr))
2187
                do
2188
                  {
2189 4323099d Ondrej Zajicek
                    steps--;
2190 8bcb5fb1 Ondrej Zajicek
                    io_log_event(s->tx_hook, s->data);
2191 38a608c5 Martin Mares
                    e = sk_write(s);
2192
                    if (s != current_sock)
2193
                      goto next;
2194
                  }
2195 4323099d Ondrej Zajicek
                while (e && steps);
2196 38a608c5 Martin Mares
              current_sock = sk_next(s);
2197
            next: ;
2198 b5d9ee5c Martin Mares
            }
2199 ea89da38 Ondrej Zajicek
2200
          short_loops++;
2201
          if (events && (short_loops < SHORT_LOOP_MAX))
2202
            continue;
2203
          short_loops = 0;
2204
2205
          int count = 0;
2206
          current_sock = stored_sock;
2207
          if (current_sock == NULL)
2208
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2209
2210
          while (current_sock && count < MAX_RX_STEPS)
2211
            {
2212
              sock *s = current_sock;
2213 0479b443 Ondrej Zajicek
              int e UNUSED;
2214 ea89da38 Ondrej Zajicek
2215
              if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
2216
                {
2217
                  count++;
2218 8bcb5fb1 Ondrej Zajicek
                  io_log_event(s->rx_hook, s->data);
2219 ea89da38 Ondrej Zajicek
                  e = sk_read(s);
2220
                  if (s != current_sock)
2221
                      goto next2;
2222
                }
2223
              current_sock = sk_next(s);
2224
            next2: ;
2225
            }
2226
2227
          stored_sock = current_sock;
2228 b5d9ee5c Martin Mares
        }
2229
    }
2230
}
2231 41c8976e Ondrej Filip
2232
void
2233
test_old_bird(char *path)
2234
{
2235
  int fd;
2236
  struct sockaddr_un sa;
2237
2238
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
2239
  if (fd < 0)
2240
    die("Cannot create socket: %m");
2241 97e46d28 Ondrej Zajicek
  if (strlen(path) >= sizeof(sa.sun_path))
2242
    die("Socket path too long");
2243 41c8976e Ondrej Filip
  bzero(&sa, sizeof(sa));
2244
  sa.sun_family = AF_UNIX;
2245
  strcpy(sa.sun_path, path);
2246
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2247
    die("I found another BIRD running.");
2248
  close(fd);
2249
}