Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ c8cafc8e

History | View | Annotate | Download (46.3 KB)

1 b5d9ee5c Martin Mares
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4 38a608c5 Martin Mares
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 b1a1faba Ondrej Filip
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6 b5d9ee5c Martin Mares
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9
10 607d9914 Ondrej Zajicek
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11
   if _GNU_SOURCE is not defined. */
12 c8cafc8e Ondrej Zajicek (work)
#ifndef _GNU_SOURCE
13
#define _GNU_SOURCE
14
#endif
15 607d9914 Ondrej Zajicek
16 b5d9ee5c Martin Mares
#include <stdio.h>
17
#include <stdlib.h>
18 01b776e1 Martin Mares
#include <time.h>
19 b5d9ee5c Martin Mares
#include <sys/time.h>
20
#include <sys/types.h>
21
#include <sys/socket.h>
22 46a82e9c Ondrej Zajicek
#include <sys/uio.h>
23 b93abffa Martin Mares
#include <sys/un.h>
24 e1c13a5a Jan Moskyto Matejka
#include <poll.h>
25 b5d9ee5c Martin Mares
#include <unistd.h>
26 a0b176e3 Ondrej Zajicek
#include <fcntl.h>
27 b5d9ee5c Martin Mares
#include <errno.h>
28 05476c4d Ondrej Zajicek
#include <net/if.h>
29 d0e9b36d Ondrej Zajicek
#include <netinet/in.h>
30 48e5f32d Ondrej Zajicek
#include <netinet/tcp.h>
31
#include <netinet/udp.h>
32 93e868c7 Ondrej Zajicek
#include <netinet/icmp6.h>
33 b5d9ee5c Martin Mares
34
#include "nest/bird.h"
35
#include "lib/lists.h"
36
#include "lib/resource.h"
37
#include "lib/timer.h"
38
#include "lib/socket.h"
39 e8f73195 Martin Mares
#include "lib/event.h"
40 afa8937a Martin Mares
#include "lib/string.h"
41 b5d9ee5c Martin Mares
#include "nest/iface.h"
42
43
#include "lib/unix.h"
44 a2867cd9 Martin Mares
#include "lib/sysio.h"
45 b5d9ee5c Martin Mares
46 ea89da38 Ondrej Zajicek
/* Maximum number of calls of tx handler for one socket in one
47 e1c13a5a Jan Moskyto Matejka
 * poll iteration. Should be small enough to not monopolize CPU by
48 4323099d Ondrej Zajicek
 * one protocol instance.
49
 */
50
#define MAX_STEPS 4
51
52 e1c13a5a Jan Moskyto Matejka
/* Maximum number of calls of rx handler for all sockets in one poll
53 ea89da38 Ondrej Zajicek
   iteration. RX callbacks are often much more costly so we limit
54
   this to gen small latencies */
55
#define MAX_RX_STEPS 4
56
57 b5d9ee5c Martin Mares
/*
58 a9c986f9 Martin Mares
 *        Tracked Files
59
 */
60
61
struct rfile {
62
  resource r;
63
  FILE *f;
64
};
65
66
static void
67
rf_free(resource *r)
68
{
69
  struct rfile *a = (struct rfile *) r;
70
71
  fclose(a->f);
72
}
73
74
static void
75
rf_dump(resource *r)
76
{
77
  struct rfile *a = (struct rfile *) r;
78
79
  debug("(FILE *%p)\n", a->f);
80
}
81
82
static struct resclass rf_class = {
83
  "FILE",
84
  sizeof(struct rfile),
85
  rf_free,
86 e81b440f Ondrej Zajicek
  rf_dump,
87 acb60628 Ondrej Zajicek
  NULL,
88 e81b440f Ondrej Zajicek
  NULL
89 a9c986f9 Martin Mares
};
90
91
void *
92 f78056fb Martin Mares
tracked_fopen(pool *p, char *name, char *mode)
93 a9c986f9 Martin Mares
{
94
  FILE *f = fopen(name, mode);
95
96
  if (f)
97
    {
98
      struct rfile *r = ralloc(p, &rf_class);
99
      r->f = f;
100
    }
101
  return f;
102
}
103
104 525fa2c1 Martin Mares
/**
105
 * DOC: Timers
106
 *
107
 * Timers are resources which represent a wish of a module to call
108
 * a function at the specified time. The platform dependent code
109 58f7d004 Martin Mares
 * doesn't guarantee exact timing, only that a timer function
110 525fa2c1 Martin Mares
 * won't be called before the requested time.
111
 *
112 fd91ae33 Ondrej Zajicek
 * In BIRD, time is represented by values of the &bird_clock_t type
113
 * which are integral numbers interpreted as a relative number of seconds since
114
 * some fixed time point in past. The current time can be read
115
 * from variable @now with reasonable accuracy and is monotonic. There is also
116
 * a current 'absolute' time in variable @now_real reported by OS.
117 525fa2c1 Martin Mares
 *
118
 * Each timer is described by a &timer structure containing a pointer
119
 * to the handler function (@hook), data private to this function (@data),
120
 * time the function should be called at (@expires, 0 for inactive timers),
121
 * for the other fields see |timer.h|.
122 b5d9ee5c Martin Mares
 */
123
124
#define NEAR_TIMER_LIMIT 4
125
126
static list near_timers, far_timers;
127
static bird_clock_t first_far_timer = TIME_INFINITY;
128
129 002b6423 Ondrej Zajicek
/* now must be different from 0, because 0 is a special value in timer->expires */
130 a92cf57d Ondrej Zajicek
bird_clock_t now = 1, now_real, boot_time;
131 fd91ae33 Ondrej Zajicek
132
static void
133
update_times_plain(void)
134
{
135
  bird_clock_t new_time = time(NULL);
136
  int delta = new_time - now_real;
137
138
  if ((delta >= 0) && (delta < 60))
139
    now += delta;
140
  else if (now_real != 0)
141
   log(L_WARN "Time jump, delta %d s", delta);
142
143
  now_real = new_time;
144
}
145
146
static void
147
update_times_gettime(void)
148
{
149
  struct timespec ts;
150
  int rv;
151
152
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
153
  if (rv != 0)
154
    die("clock_gettime: %m");
155
156
  if (ts.tv_sec != now) {
157
    if (ts.tv_sec < now)
158
      log(L_ERR "Monotonic timer is broken");
159
160
    now = ts.tv_sec;
161
    now_real = time(NULL);
162
  }
163
}
164
165
static int clock_monotonic_available;
166
167
static inline void
168
update_times(void)
169
{
170
  if (clock_monotonic_available)
171
    update_times_gettime();
172
  else
173
    update_times_plain();
174
}
175
176
static inline void
177
init_times(void)
178
{
179
 struct timespec ts;
180
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
181
 if (!clock_monotonic_available)
182
   log(L_WARN "Monotonic timer is missing");
183
}
184
185 b5d9ee5c Martin Mares
186
static void
187
tm_free(resource *r)
188
{
189
  timer *t = (timer *) r;
190
191
  tm_stop(t);
192
}
193
194
static void
195
tm_dump(resource *r)
196
{
197
  timer *t = (timer *) r;
198
199 e8f73195 Martin Mares
  debug("(code %p, data %p, ", t->hook, t->data);
200 af847acc Martin Mares
  if (t->randomize)
201
    debug("rand %d, ", t->randomize);
202
  if (t->recurrent)
203
    debug("recur %d, ", t->recurrent);
204 b5d9ee5c Martin Mares
  if (t->expires)
205
    debug("expires in %d sec)\n", t->expires - now);
206
  else
207
    debug("inactive)\n");
208
}
209
210
static struct resclass tm_class = {
211
  "Timer",
212
  sizeof(timer),
213
  tm_free,
214 e81b440f Ondrej Zajicek
  tm_dump,
215 acb60628 Ondrej Zajicek
  NULL,
216 e81b440f Ondrej Zajicek
  NULL
217 b5d9ee5c Martin Mares
};
218
219 525fa2c1 Martin Mares
/**
220
 * tm_new - create a timer
221
 * @p: pool
222
 *
223
 * This function creates a new timer resource and returns
224
 * a pointer to it. To use the timer, you need to fill in
225
 * the structure fields and call tm_start() to start timing.
226
 */
227 b5d9ee5c Martin Mares
timer *
228
tm_new(pool *p)
229
{
230
  timer *t = ralloc(p, &tm_class);
231
  return t;
232
}
233
234
static inline void
235
tm_insert_near(timer *t)
236
{
237
  node *n = HEAD(near_timers);
238
239
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
240
    n = n->next;
241
  insert_node(&t->n, n->prev);
242
}
243
244 525fa2c1 Martin Mares
/**
245
 * tm_start - start a timer
246
 * @t: timer
247
 * @after: number of seconds the timer should be run after
248
 *
249
 * This function schedules the hook function of the timer to
250
 * be called after @after seconds. If the timer has been already
251
 * started, it's @expire time is replaced by the new value.
252
 *
253
 * You can have set the @randomize field of @t, the timeout
254
 * will be increased by a random number of seconds chosen
255
 * uniformly from range 0 .. @randomize.
256
 *
257
 * You can call tm_start() from the handler function of the timer
258
 * to request another run of the timer. Also, you can set the @recurrent
259
 * field to have the timer re-added automatically with the same timeout.
260
 */
261 b5d9ee5c Martin Mares
void
262
tm_start(timer *t, unsigned after)
263
{
264
  bird_clock_t when;
265
266
  if (t->randomize)
267 af847acc Martin Mares
    after += random() % (t->randomize + 1);
268 b5d9ee5c Martin Mares
  when = now + after;
269
  if (t->expires == when)
270
    return;
271
  if (t->expires)
272
    rem_node(&t->n);
273
  t->expires = when;
274
  if (after <= NEAR_TIMER_LIMIT)
275
    tm_insert_near(t);
276
  else
277
    {
278
      if (!first_far_timer || first_far_timer > when)
279
        first_far_timer = when;
280
      add_tail(&far_timers, &t->n);
281
    }
282
}
283
284 525fa2c1 Martin Mares
/**
285
 * tm_stop - stop a timer
286
 * @t: timer
287
 *
288
 * This function stops a timer. If the timer is already stopped,
289
 * nothing happens.
290
 */
291 b5d9ee5c Martin Mares
void
292
tm_stop(timer *t)
293
{
294
  if (t->expires)
295
    {
296
      rem_node(&t->n);
297
      t->expires = 0;
298
    }
299
}
300
301
static void
302
tm_dump_them(char *name, list *l)
303
{
304
  node *n;
305
  timer *t;
306
307
  debug("%s timers:\n", name);
308
  WALK_LIST(n, *l)
309
    {
310
      t = SKIP_BACK(timer, n, n);
311
      debug("%p ", t);
312
      tm_dump(&t->r);
313
    }
314
  debug("\n");
315
}
316
317
void
318
tm_dump_all(void)
319
{
320
  tm_dump_them("Near", &near_timers);
321
  tm_dump_them("Far", &far_timers);
322
}
323
324
static inline time_t
325
tm_first_shot(void)
326
{
327
  time_t x = first_far_timer;
328
329
  if (!EMPTY_LIST(near_timers))
330
    {
331
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
332
      if (t->expires < x)
333
        x = t->expires;
334
    }
335
  return x;
336
}
337
338 8bcb5fb1 Ondrej Zajicek
void io_log_event(void *hook, void *data);
339
340 b5d9ee5c Martin Mares
static void
341
tm_shot(void)
342
{
343
  timer *t;
344
  node *n, *m;
345
346
  if (first_far_timer <= now)
347
    {
348 28a9a189 Martin Mares
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
349 b5d9ee5c Martin Mares
      first_far_timer = TIME_INFINITY;
350
      n = HEAD(far_timers);
351
      while (m = n->next)
352
        {
353
          t = SKIP_BACK(timer, n, n);
354
          if (t->expires <= limit)
355
            {
356
              rem_node(n);
357
              tm_insert_near(t);
358
            }
359
          else if (t->expires < first_far_timer)
360
            first_far_timer = t->expires;
361
          n = m;
362
        }
363
    }
364
  while ((n = HEAD(near_timers)) -> next)
365
    {
366 af847acc Martin Mares
      int delay;
367 b5d9ee5c Martin Mares
      t = SKIP_BACK(timer, n, n);
368
      if (t->expires > now)
369
        break;
370
      rem_node(n);
371 af847acc Martin Mares
      delay = t->expires - now;
372 b5d9ee5c Martin Mares
      t->expires = 0;
373 af847acc Martin Mares
      if (t->recurrent)
374
        {
375
          int i = t->recurrent - delay;
376
          if (i < 0)
377
            i = 0;
378
          tm_start(t, i);
379
        }
380 8bcb5fb1 Ondrej Zajicek
      io_log_event(t->hook, t->data);
381 b5d9ee5c Martin Mares
      t->hook(t);
382
    }
383
}
384
385 525fa2c1 Martin Mares
/**
386 0d3effcf Ondrej Filip
 * tm_parse_datetime - parse a date and time
387
 * @x: datetime string
388
 *
389
 * tm_parse_datetime() takes a textual representation of
390
 * a date and time (dd-mm-yyyy hh:mm:ss)
391
 * and converts it to the corresponding value of type &bird_clock_t.
392
 */
393
bird_clock_t
394
tm_parse_datetime(char *x)
395
{
396
  struct tm tm;
397
  int n;
398
  time_t t;
399
400
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
401
    return tm_parse_date(x);
402
  tm.tm_mon--;
403
  tm.tm_year -= 1900;
404
  t = mktime(&tm);
405
  if (t == (time_t) -1)
406
    return 0;
407
  return t;
408
}
409
/**
410 525fa2c1 Martin Mares
 * tm_parse_date - parse a date
411
 * @x: date string
412
 *
413
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
414
 * and converts it to the corresponding value of type &bird_clock_t.
415
 */
416 913f7dc9 Martin Mares
bird_clock_t
417
tm_parse_date(char *x)
418
{
419
  struct tm tm;
420
  int n;
421
  time_t t;
422
423
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
424
    return 0;
425
  tm.tm_mon--;
426
  tm.tm_year -= 1900;
427
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
428
  t = mktime(&tm);
429
  if (t == (time_t) -1)
430
    return 0;
431
  return t;
432
}
433
434 c37e7851 Ondrej Zajicek
static void
435
tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
436 913f7dc9 Martin Mares
{
437 c37e7851 Ondrej Zajicek
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
438
                                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
439 913f7dc9 Martin Mares
440 c37e7851 Ondrej Zajicek
  if (delta < 20*3600)
441
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
442
  else if (delta < 360*86400)
443
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
444
  else
445
    bsprintf(x, "%d", tm->tm_year+1900);
446 913f7dc9 Martin Mares
}
447
448 c37e7851 Ondrej Zajicek
#include "conf/conf.h"
449
450 525fa2c1 Martin Mares
/**
451
 * tm_format_datetime - convert date and time to textual representation
452
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
453 8e433d6a Pavel Tvrdik
 * @fmt_spec: specification of resulting textual representation of the time
454 525fa2c1 Martin Mares
 * @t: time
455
 *
456 fd91ae33 Ondrej Zajicek
 * This function formats the given relative time value @t to a textual
457
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
458 525fa2c1 Martin Mares
 */
459 afa8937a Martin Mares
void
460 c37e7851 Ondrej Zajicek
tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
461 7a88832e Martin Mares
{
462 c37e7851 Ondrej Zajicek
  const char *fmt_used;
463 7a88832e Martin Mares
  struct tm *tm;
464 fd91ae33 Ondrej Zajicek
  bird_clock_t delta = now - t;
465
  t = now_real - delta;
466 7a88832e Martin Mares
  tm = localtime(&t);
467
468 c37e7851 Ondrej Zajicek
  if (fmt_spec->fmt1 == NULL)
469
    return tm_format_reltime(x, tm, delta);
470 afa8937a Martin Mares
471 c37e7851 Ondrej Zajicek
  if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
472
    fmt_used = fmt_spec->fmt1;
473 afa8937a Martin Mares
  else
474 c37e7851 Ondrej Zajicek
    fmt_used = fmt_spec->fmt2;
475
476
  int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
477
  if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
478
    strcpy(x, "<too-long>");
479 afa8937a Martin Mares
}
480
481 05476c4d Ondrej Zajicek
482 525fa2c1 Martin Mares
/**
483
 * DOC: Sockets
484
 *
485
 * Socket resources represent network connections. Their data structure (&socket)
486
 * contains a lot of fields defining the exact type of the socket, the local and
487
 * remote addresses and ports, pointers to socket buffers and finally pointers to
488
 * hook functions to be called when new data have arrived to the receive buffer
489
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
490
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
491
 *
492 38a608c5 Martin Mares
 * Freeing of sockets from inside socket hooks is perfectly safe.
493 b5d9ee5c Martin Mares
 */
494
495 abae6e9c Martin Mares
#ifndef SOL_IP
496
#define SOL_IP IPPROTO_IP
497
#endif
498
499 b1a1faba Ondrej Filip
#ifndef SOL_IPV6
500
#define SOL_IPV6 IPPROTO_IPV6
501
#endif
502
503 48e5f32d Ondrej Zajicek
#ifndef SOL_ICMPV6
504
#define SOL_ICMPV6 IPPROTO_ICMPV6
505
#endif
506
507
508 05476c4d Ondrej Zajicek
/*
509
 *        Sockaddr helper functions
510
 */
511 38a608c5 Martin Mares
512 3e236955 Jan Moskyto Matejka
static inline int UNUSED sockaddr_length(int af)
513 05476c4d Ondrej Zajicek
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
514
515
static inline void
516 3e236955 Jan Moskyto Matejka
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
517 38a608c5 Martin Mares
{
518 05476c4d Ondrej Zajicek
  memset(sa, 0, sizeof(struct sockaddr_in));
519
#ifdef HAVE_SIN_LEN
520
  sa->sin_len = sizeof(struct sockaddr_in);
521
#endif
522
  sa->sin_family = AF_INET;
523
  sa->sin_port = htons(port);
524
  sa->sin_addr = ipa_to_in4(a);
525 38a608c5 Martin Mares
}
526 b5d9ee5c Martin Mares
527 05476c4d Ondrej Zajicek
static inline void
528
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
529 b5d9ee5c Martin Mares
{
530 05476c4d Ondrej Zajicek
  memset(sa, 0, sizeof(struct sockaddr_in6));
531
#ifdef SIN6_LEN
532
  sa->sin6_len = sizeof(struct sockaddr_in6);
533
#endif
534
  sa->sin6_family = AF_INET6;
535
  sa->sin6_port = htons(port);
536
  sa->sin6_flowinfo = 0;
537
  sa->sin6_addr = ipa_to_in6(a);
538
539
  if (ifa && ipa_is_link_local(a))
540
    sa->sin6_scope_id = ifa->index;
541 4da25acb Martin Mares
}
542 b5d9ee5c Martin Mares
543 05476c4d Ondrej Zajicek
void
544
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
545 4da25acb Martin Mares
{
546 05476c4d Ondrej Zajicek
  if (af == AF_INET)
547 3e236955 Jan Moskyto Matejka
    sockaddr_fill4((struct sockaddr_in *) sa, a, port);
548 05476c4d Ondrej Zajicek
  else if (af == AF_INET6)
549
    sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
550
  else
551
    bug("Unknown AF");
552 4da25acb Martin Mares
}
553
554 05476c4d Ondrej Zajicek
static inline void
555 3e236955 Jan Moskyto Matejka
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
556 4da25acb Martin Mares
{
557 05476c4d Ondrej Zajicek
  *port = ntohs(sa->sin_port);
558
  *a = ipa_from_in4(sa->sin_addr);
559 b5d9ee5c Martin Mares
}
560
561 05476c4d Ondrej Zajicek
static inline void
562
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
563 48e5f32d Ondrej Zajicek
{
564 05476c4d Ondrej Zajicek
  *port = ntohs(sa->sin6_port);
565
  *a = ipa_from_in6(sa->sin6_addr);
566 48e5f32d Ondrej Zajicek
567 05476c4d Ondrej Zajicek
  if (ifa && ipa_is_link_local(*a))
568
    *ifa = if_find_by_index(sa->sin6_scope_id);
569 48e5f32d Ondrej Zajicek
}
570
571 05476c4d Ondrej Zajicek
int
572
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
573 48e5f32d Ondrej Zajicek
{
574 05476c4d Ondrej Zajicek
  if (sa->sa.sa_family != af)
575
    goto fail;
576 48e5f32d Ondrej Zajicek
577 05476c4d Ondrej Zajicek
  if (af == AF_INET)
578 3e236955 Jan Moskyto Matejka
    sockaddr_read4((struct sockaddr_in *) sa, a, port);
579 05476c4d Ondrej Zajicek
  else if (af == AF_INET6)
580
    sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
581
  else
582
    goto fail;
583 48e5f32d Ondrej Zajicek
584 05476c4d Ondrej Zajicek
  return 0;
585 48e5f32d Ondrej Zajicek
586 05476c4d Ondrej Zajicek
 fail:
587
  *a = IPA_NONE;
588
  *port = 0;
589
  return -1;
590 48e5f32d Ondrej Zajicek
}
591
592
593 05476c4d Ondrej Zajicek
/*
594
 *        IPv6 multicast syscalls
595
 */
596 4da25acb Martin Mares
597 05476c4d Ondrej Zajicek
/* Fortunately standardized in RFC 3493 */
598 b5d9ee5c Martin Mares
599 05476c4d Ondrej Zajicek
#define INIT_MREQ6(maddr,ifa) \
600
  { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
601 b5d9ee5c Martin Mares
602 05476c4d Ondrej Zajicek
static inline int
603
sk_setup_multicast6(sock *s)
604 b5d9ee5c Martin Mares
{
605 05476c4d Ondrej Zajicek
  int index = s->iface->index;
606
  int ttl = s->ttl;
607
  int n = 0;
608 b5d9ee5c Martin Mares
609 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
610
    ERR("IPV6_MULTICAST_IF");
611 b5d9ee5c Martin Mares
612 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
613
    ERR("IPV6_MULTICAST_HOPS");
614 4f22c981 Martin Mares
615 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
616
    ERR("IPV6_MULTICAST_LOOP");
617 4f22c981 Martin Mares
618 05476c4d Ondrej Zajicek
  return 0;
619 061ab802 Ondrej Zajicek
}
620
621 05476c4d Ondrej Zajicek
static inline int
622
sk_join_group6(sock *s, ip_addr maddr)
623 4f22c981 Martin Mares
{
624 05476c4d Ondrej Zajicek
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
625 eb1451a3 Ondrej Zajicek
626 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
627
    ERR("IPV6_JOIN_GROUP");
628 4f22c981 Martin Mares
629 05476c4d Ondrej Zajicek
  return 0;
630 b5d9ee5c Martin Mares
}
631
632 05476c4d Ondrej Zajicek
static inline int
633
sk_leave_group6(sock *s, ip_addr maddr)
634 b5d9ee5c Martin Mares
{
635 05476c4d Ondrej Zajicek
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
636 b5d9ee5c Martin Mares
637 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
638
    ERR("IPV6_LEAVE_GROUP");
639
640
  return 0;
641
}
642 4f22c981 Martin Mares
643 bed41728 Ondrej Zajicek
644 05476c4d Ondrej Zajicek
/*
645
 *        IPv6 packet control messages
646
 */
647 bed41728 Ondrej Zajicek
648 05476c4d Ondrej Zajicek
/* Also standardized, in RFC 3542 */
649 bed41728 Ondrej Zajicek
650 dcc60494 Ondrej Zajicek
/*
651
 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
652
 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
653
 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
654
 * RFC and we use IPV6_PKTINFO.
655
 */
656
#ifndef IPV6_RECVPKTINFO
657
#define IPV6_RECVPKTINFO IPV6_PKTINFO
658
#endif
659 70e212f9 Ondrej Zajicek
/*
660
 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
661
 */
662
#ifndef IPV6_RECVHOPLIMIT
663
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
664
#endif
665 dcc60494 Ondrej Zajicek
666 70e212f9 Ondrej Zajicek
667 05476c4d Ondrej Zajicek
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
668
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
669 bed41728 Ondrej Zajicek
670 05476c4d Ondrej Zajicek
static inline int
671
sk_request_cmsg6_pktinfo(sock *s)
672
{
673
  int y = 1;
674 70e212f9 Ondrej Zajicek
675 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
676
    ERR("IPV6_RECVPKTINFO");
677
678
  return 0;
679 bed41728 Ondrej Zajicek
}
680
681 05476c4d Ondrej Zajicek
static inline int
682
sk_request_cmsg6_ttl(sock *s)
683 bed41728 Ondrej Zajicek
{
684 05476c4d Ondrej Zajicek
  int y = 1;
685 bed41728 Ondrej Zajicek
686 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
687
    ERR("IPV6_RECVHOPLIMIT");
688 70e212f9 Ondrej Zajicek
689 05476c4d Ondrej Zajicek
  return 0;
690
}
691 70e212f9 Ondrej Zajicek
692 05476c4d Ondrej Zajicek
static inline void
693
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
694
{
695
  if (cm->cmsg_type == IPV6_PKTINFO)
696 70e212f9 Ondrej Zajicek
  {
697 05476c4d Ondrej Zajicek
    struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
698
    s->laddr = ipa_from_in6(pi->ipi6_addr);
699
    s->lifindex = pi->ipi6_ifindex;
700 70e212f9 Ondrej Zajicek
  }
701 05476c4d Ondrej Zajicek
}
702 70e212f9 Ondrej Zajicek
703 05476c4d Ondrej Zajicek
static inline void
704
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
705
{
706
  if (cm->cmsg_type == IPV6_HOPLIMIT)
707
    s->rcv_ttl = * (int *) CMSG_DATA(cm);
708 bed41728 Ondrej Zajicek
}
709
710 05476c4d Ondrej Zajicek
static inline void
711
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
712 bed41728 Ondrej Zajicek
{
713
  struct cmsghdr *cm;
714
  struct in6_pktinfo *pi;
715 8945f73d Ondrej Zajicek
  int controllen = 0;
716 bed41728 Ondrej Zajicek
717
  msg->msg_control = cbuf;
718
  msg->msg_controllen = cbuflen;
719
720
  cm = CMSG_FIRSTHDR(msg);
721 48e5f32d Ondrej Zajicek
  cm->cmsg_level = SOL_IPV6;
722 bed41728 Ondrej Zajicek
  cm->cmsg_type = IPV6_PKTINFO;
723
  cm->cmsg_len = CMSG_LEN(sizeof(*pi));
724 8945f73d Ondrej Zajicek
  controllen += CMSG_SPACE(sizeof(*pi));
725 bed41728 Ondrej Zajicek
726
  pi = (struct in6_pktinfo *) CMSG_DATA(cm);
727
  pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
728 05476c4d Ondrej Zajicek
  pi->ipi6_addr = ipa_to_in6(s->saddr);
729 bed41728 Ondrej Zajicek
730 8945f73d Ondrej Zajicek
  msg->msg_controllen = controllen;
731 bed41728 Ondrej Zajicek
}
732 48e5f32d Ondrej Zajicek
733 bed41728 Ondrej Zajicek
734 05476c4d Ondrej Zajicek
/*
735
 *        Miscellaneous socket syscalls
736
 */
737
738
static inline int
739
sk_set_ttl4(sock *s, int ttl)
740 a39b165e Ondrej Zajicek
{
741 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
742
    ERR("IP_TTL");
743
744
  return 0;
745 a39b165e Ondrej Zajicek
}
746
747 05476c4d Ondrej Zajicek
static inline int
748
sk_set_ttl6(sock *s, int ttl)
749
{
750
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
751
    ERR("IPV6_UNICAST_HOPS");
752 38a608c5 Martin Mares
753 05476c4d Ondrej Zajicek
  return 0;
754
}
755
756
static inline int
757
sk_set_tos4(sock *s, int tos)
758 b5d9ee5c Martin Mares
{
759 05476c4d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
760
    ERR("IP_TOS");
761 b5d9ee5c Martin Mares
762 05476c4d Ondrej Zajicek
  return 0;
763
}
764 ef4a50be Ondrej Zajicek
765 05476c4d Ondrej Zajicek
static inline int
766
sk_set_tos6(sock *s, int tos)
767
{
768
  if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
769
    ERR("IPV6_TCLASS");
770 48e5f32d Ondrej Zajicek
771 05476c4d Ondrej Zajicek
  return 0;
772
}
773 48e5f32d Ondrej Zajicek
774 b867a87c Ondrej Zajicek
static inline int
775 3e236955 Jan Moskyto Matejka
sk_set_high_port(sock *s UNUSED)
776 b867a87c Ondrej Zajicek
{
777
  /* Port range setting is optional, ignore it if not supported */
778
779
#ifdef IP_PORTRANGE
780
  if (sk_is_ipv4(s))
781
  {
782
    int range = IP_PORTRANGE_HIGH;
783
    if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
784
      ERR("IP_PORTRANGE");
785
  }
786
#endif
787
788
#ifdef IPV6_PORTRANGE
789
  if (sk_is_ipv6(s))
790
  {
791
    int range = IPV6_PORTRANGE_HIGH;
792
    if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
793
      ERR("IPV6_PORTRANGE");
794
  }
795
#endif
796
797
  return 0;
798
}
799
800 88a183c6 Ondrej Zajicek
static inline byte *
801
sk_skip_ip_header(byte *pkt, int *len)
802
{
803
  if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
804
    return NULL;
805
806
  int hlen = (*pkt & 0x0f) * 4;
807
  if ((hlen < 20) || (hlen > *len))
808
    return NULL;
809
810
  *len -= hlen;
811
  return pkt + hlen;
812
}
813
814
byte *
815
sk_rx_buffer(sock *s, int *len)
816
{
817
  if (sk_is_ipv4(s) && (s->type == SK_IP))
818
    return sk_skip_ip_header(s->rbuf, len);
819
  else
820
    return s->rbuf;
821
}
822
823 48e5f32d Ondrej Zajicek
824 05476c4d Ondrej Zajicek
/*
825
 *        Public socket functions
826
 */
827 48e5f32d Ondrej Zajicek
828 05476c4d Ondrej Zajicek
/**
829
 * sk_setup_multicast - enable multicast for given socket
830
 * @s: socket
831
 *
832
 * Prepare transmission of multicast packets for given datagram socket.
833
 * The socket must have defined @iface.
834
 *
835
 * Result: 0 for success, -1 for an error.
836
 */
837 48e5f32d Ondrej Zajicek
838 05476c4d Ondrej Zajicek
int
839
sk_setup_multicast(sock *s)
840
{
841
  ASSERT(s->iface);
842 48e5f32d Ondrej Zajicek
843 05476c4d Ondrej Zajicek
  if (sk_is_ipv4(s))
844
    return sk_setup_multicast4(s);
845
  else
846
    return sk_setup_multicast6(s);
847
}
848 48e5f32d Ondrej Zajicek
849 05476c4d Ondrej Zajicek
/**
850
 * sk_join_group - join multicast group for given socket
851
 * @s: socket
852
 * @maddr: multicast address
853
 *
854
 * Join multicast group for given datagram socket and associated interface.
855
 * The socket must have defined @iface.
856
 *
857
 * Result: 0 for success, -1 for an error.
858
 */
859 789772ed Ondrej Zajicek
860 05476c4d Ondrej Zajicek
int
861
sk_join_group(sock *s, ip_addr maddr)
862
{
863
  if (sk_is_ipv4(s))
864
    return sk_join_group4(s, maddr);
865
  else
866
    return sk_join_group6(s, maddr);
867
}
868 ef4a50be Ondrej Zajicek
869 05476c4d Ondrej Zajicek
/**
870
 * sk_leave_group - leave multicast group for given socket
871
 * @s: socket
872
 * @maddr: multicast address
873
 *
874
 * Leave multicast group for given datagram socket and associated interface.
875
 * The socket must have defined @iface.
876
 *
877
 * Result: 0 for success, -1 for an error.
878
 */
879 789772ed Ondrej Zajicek
880 05476c4d Ondrej Zajicek
int
881
sk_leave_group(sock *s, ip_addr maddr)
882
{
883
  if (sk_is_ipv4(s))
884
    return sk_leave_group4(s, maddr);
885
  else
886
    return sk_leave_group6(s, maddr);
887 b5d9ee5c Martin Mares
}
888
889 a39b165e Ondrej Zajicek
/**
890 05476c4d Ondrej Zajicek
 * sk_setup_broadcast - enable broadcast for given socket
891
 * @s: socket
892
 *
893
 * Allow reception and transmission of broadcast packets for given datagram
894
 * socket. The socket must have defined @iface. For transmission, packets should
895
 * be send to @brd address of @iface.
896
 *
897
 * Result: 0 for success, -1 for an error.
898
 */
899
900
int
901
sk_setup_broadcast(sock *s)
902
{
903
  int y = 1;
904
905
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
906
    ERR("SO_BROADCAST");
907
908
  return 0;
909
}
910
911
/**
912
 * sk_set_ttl - set transmit TTL for given socket
913 a39b165e Ondrej Zajicek
 * @s: socket
914
 * @ttl: TTL value
915
 *
916 05476c4d Ondrej Zajicek
 * Set TTL for already opened connections when TTL was not set before. Useful
917
 * for accepted connections when different ones should have different TTL.
918 a39b165e Ondrej Zajicek
 *
919
 * Result: 0 for success, -1 for an error.
920
 */
921
922
int
923
sk_set_ttl(sock *s, int ttl)
924
{
925
  s->ttl = ttl;
926
927 05476c4d Ondrej Zajicek
  if (sk_is_ipv4(s))
928
    return sk_set_ttl4(s, ttl);
929
  else
930
    return sk_set_ttl6(s, ttl);
931 a39b165e Ondrej Zajicek
}
932
933 b1b19433 Ondrej Zajicek
/**
934 05476c4d Ondrej Zajicek
 * sk_set_min_ttl - set minimal accepted TTL for given socket
935 b1b19433 Ondrej Zajicek
 * @s: socket
936
 * @ttl: TTL value
937
 *
938 05476c4d Ondrej Zajicek
 * Set minimal accepted TTL for given socket. Can be used for TTL security.
939
 * implementations.
940 b1b19433 Ondrej Zajicek
 *
941
 * Result: 0 for success, -1 for an error.
942
 */
943
944
int
945
sk_set_min_ttl(sock *s, int ttl)
946
{
947 05476c4d Ondrej Zajicek
  if (sk_is_ipv4(s))
948
    return sk_set_min_ttl4(s, ttl);
949
  else
950
    return sk_set_min_ttl6(s, ttl);
951 b1b19433 Ondrej Zajicek
}
952 d51aa281 Ondrej Zajicek
953 05476c4d Ondrej Zajicek
#if 0
954 d51aa281 Ondrej Zajicek
/**
955 05476c4d Ondrej Zajicek
 * sk_set_md5_auth - add / remove MD5 security association for given socket
956 d51aa281 Ondrej Zajicek
 * @s: socket
957 a7baa098 Ondrej Zajicek (work)
 * @local: IP address of local side
958
 * @remote: IP address of remote side
959 eb1451a3 Ondrej Zajicek
 * @ifa: Interface for link-local IP address
960 a7baa098 Ondrej Zajicek (work)
 * @passwd: Password used for MD5 authentication
961
 * @setkey: Update also system SA/SP database
962 d51aa281 Ondrej Zajicek
 *
963 a7baa098 Ondrej Zajicek (work)
 * In TCP MD5 handling code in kernel, there is a set of security associations
964
 * used for choosing password and other authentication parameters according to
965
 * the local and remote address. This function is useful for listening socket,
966
 * for active sockets it may be enough to set s->password field.
967 d51aa281 Ondrej Zajicek
 *
968
 * When called with passwd != NULL, the new pair is added,
969
 * When called with passwd == NULL, the existing pair is removed.
970
 *
971 a7baa098 Ondrej Zajicek (work)
 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
972
 * stored in global SA/SP database (but the behavior also must be enabled on
973
 * per-socket basis). In case of multiple sockets to the same neighbor, the
974
 * socket-specific state must be configured for each socket while global state
975
 * just once per src-dst pair. The @setkey argument controls whether the global
976
 * state (SA/SP database) is also updated.
977
 *
978 d51aa281 Ondrej Zajicek
 * Result: 0 for success, -1 for an error.
979
 */
980

981
int
982 a7baa098 Ondrej Zajicek (work)
sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
983 05476c4d Ondrej Zajicek
{ DUMMY; }
984
#endif
985 f9c799a0 Ondrej Zajicek
986 05476c4d Ondrej Zajicek
/**
987
 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
988
 * @s: socket
989
 * @offset: offset
990
 *
991
 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
992
 * kernel will automatically fill it for outgoing packets and check it for
993
 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
994
 * known to the kernel.
995
 *
996
 * Result: 0 for success, -1 for an error.
997
 */
998 f9c799a0 Ondrej Zajicek
999
int
1000 4ac7c834 Ondrej Zajicek
sk_set_ipv6_checksum(sock *s, int offset)
1001
{
1002 48e5f32d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
1003 05476c4d Ondrej Zajicek
    ERR("IPV6_CHECKSUM");
1004 4ac7c834 Ondrej Zajicek
1005
  return 0;
1006
}
1007
1008
int
1009 05476c4d Ondrej Zajicek
sk_set_icmp6_filter(sock *s, int p1, int p2)
1010 93e868c7 Ondrej Zajicek
{
1011
  /* a bit of lame interface, but it is here only for Radv */
1012
  struct icmp6_filter f;
1013
1014
  ICMP6_FILTER_SETBLOCKALL(&f);
1015
  ICMP6_FILTER_SETPASS(p1, &f);
1016
  ICMP6_FILTER_SETPASS(p2, &f);
1017
1018 48e5f32d Ondrej Zajicek
  if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
1019 05476c4d Ondrej Zajicek
    ERR("ICMP6_FILTER");
1020 93e868c7 Ondrej Zajicek
1021
  return 0;
1022
}
1023
1024 05476c4d Ondrej Zajicek
void
1025
sk_log_error(sock *s, const char *p)
1026
{
1027
  log(L_ERR "%s: Socket error: %s%#m", p, s->err);
1028
}
1029
1030
1031
/*
1032
 *        Actual struct birdsock code
1033
 */
1034
1035
static list sock_list;
1036
static struct birdsock *current_sock;
1037
static struct birdsock *stored_sock;
1038
1039
static inline sock *
1040
sk_next(sock *s)
1041
{
1042
  if (!s->n.next->next)
1043
    return NULL;
1044
  else
1045
    return SKIP_BACK(sock, n, s->n.next);
1046
}
1047
1048
static void
1049
sk_alloc_bufs(sock *s)
1050
{
1051
  if (!s->rbuf && s->rbsize)
1052
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
1053
  s->rpos = s->rbuf;
1054
  if (!s->tbuf && s->tbsize)
1055
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
1056
  s->tpos = s->ttx = s->tbuf;
1057
}
1058
1059
static void
1060
sk_free_bufs(sock *s)
1061
{
1062
  if (s->rbuf_alloc)
1063
  {
1064
    xfree(s->rbuf_alloc);
1065
    s->rbuf = s->rbuf_alloc = NULL;
1066
  }
1067
  if (s->tbuf_alloc)
1068
  {
1069
    xfree(s->tbuf_alloc);
1070
    s->tbuf = s->tbuf_alloc = NULL;
1071
  }
1072
}
1073
1074
static void
1075
sk_free(resource *r)
1076
{
1077
  sock *s = (sock *) r;
1078
1079
  sk_free_bufs(s);
1080
  if (s->fd >= 0)
1081
  {
1082
    close(s->fd);
1083
1084
    /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1085
    if (s->flags & SKF_THREAD)
1086
      return;
1087
1088
    if (s == current_sock)
1089
      current_sock = sk_next(s);
1090
    if (s == stored_sock)
1091
      stored_sock = sk_next(s);
1092
    rem_node(&s->n);
1093
  }
1094
}
1095
1096
void
1097
sk_set_rbsize(sock *s, uint val)
1098
{
1099
  ASSERT(s->rbuf_alloc == s->rbuf);
1100
1101
  if (s->rbsize == val)
1102
    return;
1103
1104
  s->rbsize = val;
1105
  xfree(s->rbuf_alloc);
1106
  s->rbuf_alloc = xmalloc(val);
1107
  s->rpos = s->rbuf = s->rbuf_alloc;
1108
}
1109
1110
void
1111
sk_set_tbsize(sock *s, uint val)
1112
{
1113
  ASSERT(s->tbuf_alloc == s->tbuf);
1114
1115
  if (s->tbsize == val)
1116
    return;
1117
1118
  byte *old_tbuf = s->tbuf;
1119
1120
  s->tbsize = val;
1121
  s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1122
  s->tpos = s->tbuf + (s->tpos - old_tbuf);
1123
  s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
1124
}
1125
1126
void
1127
sk_set_tbuf(sock *s, void *tbuf)
1128
{
1129
  s->tbuf = tbuf ?: s->tbuf_alloc;
1130
  s->ttx = s->tpos = s->tbuf;
1131
}
1132
1133
void
1134
sk_reallocate(sock *s)
1135
{
1136
  sk_free_bufs(s);
1137
  sk_alloc_bufs(s);
1138
}
1139
1140
static void
1141
sk_dump(resource *r)
1142
{
1143
  sock *s = (sock *) r;
1144
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1145
1146 af454f9b Ondrej Zajicek
  debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1147 05476c4d Ondrej Zajicek
        sk_type_names[s->type],
1148
        s->data,
1149
        s->saddr,
1150
        s->sport,
1151
        s->daddr,
1152
        s->dport,
1153
        s->tos,
1154
        s->ttl,
1155
        s->iface ? s->iface->name : "none");
1156
}
1157
1158
static struct resclass sk_class = {
1159
  "Socket",
1160
  sizeof(sock),
1161
  sk_free,
1162
  sk_dump,
1163
  NULL,
1164
  NULL
1165
};
1166
1167
/**
1168
 * sk_new - create a socket
1169
 * @p: pool
1170
 *
1171
 * This function creates a new socket resource. If you want to use it,
1172
 * you need to fill in all the required fields of the structure and
1173
 * call sk_open() to do the actual opening of the socket.
1174
 *
1175
 * The real function name is sock_new(), sk_new() is a macro wrapper
1176
 * to avoid collision with OpenSSL.
1177
 */
1178
sock *
1179
sock_new(pool *p)
1180
{
1181
  sock *s = ralloc(p, &sk_class);
1182
  s->pool = p;
1183
  // s->saddr = s->daddr = IPA_NONE;
1184
  s->tos = s->priority = s->ttl = -1;
1185
  s->fd = -1;
1186
  return s;
1187
}
1188
1189
static int
1190
sk_setup(sock *s)
1191 f9c799a0 Ondrej Zajicek
{
1192 05476c4d Ondrej Zajicek
  int y = 1;
1193
  int fd = s->fd;
1194 f9c799a0 Ondrej Zajicek
1195 05476c4d Ondrej Zajicek
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1196
    ERR("O_NONBLOCK");
1197 f9c799a0 Ondrej Zajicek
1198 05476c4d Ondrej Zajicek
  if (!s->af)
1199
    return 0;
1200 f9c799a0 Ondrej Zajicek
1201 05476c4d Ondrej Zajicek
  if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1202
    s->flags |= SKF_PKTINFO;
1203 f9c799a0 Ondrej Zajicek
1204 05476c4d Ondrej Zajicek
#ifdef CONFIG_USE_HDRINCL
1205
  if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1206
  {
1207
    s->flags &= ~SKF_PKTINFO;
1208
    s->flags |= SKF_HDRINCL;
1209
    if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1210
      ERR("IP_HDRINCL");
1211
  }
1212 48e5f32d Ondrej Zajicek
#endif
1213
1214 05476c4d Ondrej Zajicek
  if (s->iface)
1215
  {
1216
#ifdef SO_BINDTODEVICE
1217 f7a99acb Pavel Tvrdik
    struct ifreq ifr = {};
1218 05476c4d Ondrej Zajicek
    strcpy(ifr.ifr_name, s->iface->name);
1219
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1220
      ERR("SO_BINDTODEVICE");
1221
#endif
1222 f1aceff5 Ondrej Zajicek
1223 05476c4d Ondrej Zajicek
#ifdef CONFIG_UNIX_DONTROUTE
1224
    if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1225
      ERR("SO_DONTROUTE");
1226
#endif
1227
  }
1228 f9c799a0 Ondrej Zajicek
1229 05476c4d Ondrej Zajicek
  if (s->priority >= 0)
1230
    if (sk_set_priority(s, s->priority) < 0)
1231 f9c799a0 Ondrej Zajicek
      return -1;
1232
1233 05476c4d Ondrej Zajicek
  if (sk_is_ipv4(s))
1234
  {
1235
    if (s->flags & SKF_LADDR_RX)
1236
      if (sk_request_cmsg4_pktinfo(s) < 0)
1237
        return -1;
1238 f9c799a0 Ondrej Zajicek
1239 05476c4d Ondrej Zajicek
    if (s->flags & SKF_TTL_RX)
1240
      if (sk_request_cmsg4_ttl(s) < 0)
1241
        return -1;
1242 f9c799a0 Ondrej Zajicek
1243 05476c4d Ondrej Zajicek
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1244
      if (sk_disable_mtu_disc4(s) < 0)
1245
        return -1;
1246 f9c799a0 Ondrej Zajicek
1247 05476c4d Ondrej Zajicek
    if (s->ttl >= 0)
1248
      if (sk_set_ttl4(s, s->ttl) < 0)
1249
        return -1;
1250 f9c799a0 Ondrej Zajicek
1251 05476c4d Ondrej Zajicek
    if (s->tos >= 0)
1252
      if (sk_set_tos4(s, s->tos) < 0)
1253
        return -1;
1254
  }
1255 f9c799a0 Ondrej Zajicek
1256 05476c4d Ondrej Zajicek
  if (sk_is_ipv6(s))
1257
  {
1258
    if (s->flags & SKF_V6ONLY)
1259
      if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1260
        ERR("IPV6_V6ONLY");
1261 f9c799a0 Ondrej Zajicek
1262 05476c4d Ondrej Zajicek
    if (s->flags & SKF_LADDR_RX)
1263
      if (sk_request_cmsg6_pktinfo(s) < 0)
1264
        return -1;
1265 f9c799a0 Ondrej Zajicek
1266 05476c4d Ondrej Zajicek
    if (s->flags & SKF_TTL_RX)
1267
      if (sk_request_cmsg6_ttl(s) < 0)
1268
        return -1;
1269 f9c799a0 Ondrej Zajicek
1270 05476c4d Ondrej Zajicek
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1271
      if (sk_disable_mtu_disc6(s) < 0)
1272
        return -1;
1273 f9c799a0 Ondrej Zajicek
1274 05476c4d Ondrej Zajicek
    if (s->ttl >= 0)
1275
      if (sk_set_ttl6(s, s->ttl) < 0)
1276
        return -1;
1277 f9c799a0 Ondrej Zajicek
1278 05476c4d Ondrej Zajicek
    if (s->tos >= 0)
1279
      if (sk_set_tos6(s, s->tos) < 0)
1280
        return -1;
1281
  }
1282 f9c799a0 Ondrej Zajicek
1283
  return 0;
1284
}
1285
1286 05476c4d Ondrej Zajicek
static void
1287
sk_insert(sock *s)
1288 f9c799a0 Ondrej Zajicek
{
1289 05476c4d Ondrej Zajicek
  add_tail(&sock_list, &s->n);
1290 f9c799a0 Ondrej Zajicek
}
1291
1292 b5d9ee5c Martin Mares
static void
1293
sk_tcp_connected(sock *s)
1294
{
1295 05476c4d Ondrej Zajicek
  sockaddr sa;
1296
  int sa_len = sizeof(sa);
1297
1298
  if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1299
      (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1300
    log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1301 9be9a264 Ondrej Zajicek
1302 b5d9ee5c Martin Mares
  s->type = SK_TCP;
1303
  sk_alloc_bufs(s);
1304 320f4173 Martin Mares
  s->tx_hook(s);
1305 b5d9ee5c Martin Mares
}
1306
1307 b93abffa Martin Mares
static int
1308 05476c4d Ondrej Zajicek
sk_passive_connected(sock *s, int type)
1309 b93abffa Martin Mares
{
1310 05476c4d Ondrej Zajicek
  sockaddr loc_sa, rem_sa;
1311
  int loc_sa_len = sizeof(loc_sa);
1312
  int rem_sa_len = sizeof(rem_sa);
1313 cf31112f Ondrej Zajicek
1314 05476c4d Ondrej Zajicek
  int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1315
  if (fd < 0)
1316
  {
1317
    if ((errno != EINTR) && (errno != EAGAIN))
1318 c025b852 Ondrej Filip
      s->err_hook(s, errno);
1319 05476c4d Ondrej Zajicek
    return 0;
1320
  }
1321
1322
  sock *t = sk_new(s->pool);
1323
  t->type = type;
1324
  t->fd = fd;
1325
  t->af = s->af;
1326
  t->ttl = s->ttl;
1327
  t->tos = s->tos;
1328
  t->rbsize = s->rbsize;
1329
  t->tbsize = s->tbsize;
1330
1331
  if (type == SK_TCP)
1332
  {
1333
    if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1334
        (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1335
      log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1336
1337
    if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1338
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1339
  }
1340
1341
  if (sk_setup(t) < 0)
1342
  {
1343
    /* FIXME: Call err_hook instead ? */
1344
    log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1345
1346
    /* FIXME: handle it better in rfree() */
1347 9c89560e Ondrej Zajicek
    close(t->fd);
1348 05476c4d Ondrej Zajicek
    t->fd = -1;
1349
    rfree(t);
1350
    return 1;
1351
  }
1352
1353
  sk_insert(t);
1354
  sk_alloc_bufs(t);
1355
  s->rx_hook(t, 0);
1356
  return 1;
1357 b93abffa Martin Mares
}
1358
1359 525fa2c1 Martin Mares
/**
1360
 * sk_open - open a socket
1361
 * @s: socket
1362
 *
1363
 * This function takes a socket resource created by sk_new() and
1364
 * initialized by the user and binds a corresponding network connection
1365
 * to it.
1366
 *
1367
 * Result: 0 for success, -1 for an error.
1368
 */
1369 b5d9ee5c Martin Mares
int
1370
sk_open(sock *s)
1371
{
1372 05476c4d Ondrej Zajicek
  int af = BIRD_AF;
1373
  int fd = -1;
1374 48e5f32d Ondrej Zajicek
  int do_bind = 0;
1375
  int bind_port = 0;
1376
  ip_addr bind_addr = IPA_NONE;
1377
  sockaddr sa;
1378 b5d9ee5c Martin Mares
1379 48e5f32d Ondrej Zajicek
  switch (s->type)
1380 05476c4d Ondrej Zajicek
  {
1381
  case SK_TCP_ACTIVE:
1382
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1383
    /* Fall thru */
1384
  case SK_TCP_PASSIVE:
1385
    fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1386
    bind_port = s->sport;
1387
    bind_addr = s->saddr;
1388
    do_bind = bind_port || ipa_nonzero(bind_addr);
1389
    break;
1390 9c89560e Ondrej Zajicek
1391 05476c4d Ondrej Zajicek
  case SK_UDP:
1392
    fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1393
    bind_port = s->sport;
1394
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1395
    do_bind = 1;
1396
    break;
1397
1398
  case SK_IP:
1399
    fd = socket(af, SOCK_RAW, s->dport);
1400
    bind_port = 0;
1401
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1402
    do_bind = ipa_nonzero(bind_addr);
1403
    break;
1404
1405
  case SK_MAGIC:
1406
    af = 0;
1407
    fd = s->fd;
1408
    break;
1409
1410
  default:
1411
    bug("sk_open() called for invalid sock type %d", s->type);
1412
  }
1413
1414 b5d9ee5c Martin Mares
  if (fd < 0)
1415 05476c4d Ondrej Zajicek
    ERR("socket");
1416
1417
  s->af = af;
1418 b5d9ee5c Martin Mares
  s->fd = fd;
1419
1420 05476c4d Ondrej Zajicek
  if (sk_setup(s) < 0)
1421
    goto err;
1422 38a608c5 Martin Mares
1423 48e5f32d Ondrej Zajicek
  if (do_bind)
1424 05476c4d Ondrej Zajicek
  {
1425
    if (bind_port)
1426 b5d9ee5c Martin Mares
    {
1427 05476c4d Ondrej Zajicek
      int y = 1;
1428
1429
      if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1430
        ERR2("SO_REUSEADDR");
1431 48e5f32d Ondrej Zajicek
1432 8931425d Ondrej Zajicek
#ifdef CONFIG_NO_IFACE_BIND
1433 05476c4d Ondrej Zajicek
      /* Workaround missing ability to bind to an iface */
1434
      if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1435
      {
1436
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1437
          ERR2("SO_REUSEPORT");
1438
      }
1439 8931425d Ondrej Zajicek
#endif
1440 b5d9ee5c Martin Mares
    }
1441 b867a87c Ondrej Zajicek
    else
1442
      if (s->flags & SKF_HIGH_PORT)
1443
        if (sk_set_high_port(s) < 0)
1444
          log(L_WARN "Socket error: %s%#m", s->err);
1445 48e5f32d Ondrej Zajicek
1446 05476c4d Ondrej Zajicek
    sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
1447
    if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1448
      ERR2("bind");
1449
  }
1450 d51aa281 Ondrej Zajicek
1451
  if (s->password)
1452 a7baa098 Ondrej Zajicek (work)
    if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
1453 05476c4d Ondrej Zajicek
      goto err;
1454 d51aa281 Ondrej Zajicek
1455 48e5f32d Ondrej Zajicek
  switch (s->type)
1456 05476c4d Ondrej Zajicek
  {
1457
  case SK_TCP_ACTIVE:
1458
    sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
1459
    if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1460
      sk_tcp_connected(s);
1461
    else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1462
             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1463
      ERR2("connect");
1464
    break;
1465
1466
  case SK_TCP_PASSIVE:
1467
    if (listen(fd, 8) < 0)
1468
      ERR2("listen");
1469
    break;
1470
1471
  case SK_MAGIC:
1472
    break;
1473
1474
  default:
1475
    sk_alloc_bufs(s);
1476
  }
1477 b5d9ee5c Martin Mares
1478 bf139664 Ondrej Zajicek
  if (!(s->flags & SKF_THREAD))
1479
    sk_insert(s);
1480 b5d9ee5c Martin Mares
  return 0;
1481
1482 05476c4d Ondrej Zajicek
err:
1483 b5d9ee5c Martin Mares
  close(fd);
1484
  s->fd = -1;
1485
  return -1;
1486
}
1487
1488 05476c4d Ondrej Zajicek
int
1489 b93abffa Martin Mares
sk_open_unix(sock *s, char *name)
1490
{
1491
  struct sockaddr_un sa;
1492 05476c4d Ondrej Zajicek
  int fd;
1493
1494
  /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1495 b93abffa Martin Mares
1496
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1497
  if (fd < 0)
1498 05476c4d Ondrej Zajicek
    return -1;
1499
1500
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1501
    return -1;
1502 68fa95cf Ondrej Zajicek
1503 97e46d28 Ondrej Zajicek
  /* Path length checked in test_old_bird() */
1504 b93abffa Martin Mares
  sa.sun_family = AF_UNIX;
1505 97c6fa02 Ondrej Filip
  strcpy(sa.sun_path, name);
1506 05476c4d Ondrej Zajicek
1507 0b3bf4b1 Martin Mares
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1508 05476c4d Ondrej Zajicek
    return -1;
1509
1510
  if (listen(fd, 8) < 0)
1511
    return -1;
1512
1513
  s->fd = fd;
1514 38a608c5 Martin Mares
  sk_insert(s);
1515 05476c4d Ondrej Zajicek
  return 0;
1516
}
1517
1518
1519
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1520
                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1521
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1522
1523
static void
1524
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1525
{
1526
  if (sk_is_ipv4(s))
1527
    sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1528
  else
1529
    sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1530
}
1531
1532
static void
1533
sk_process_cmsgs(sock *s, struct msghdr *msg)
1534
{
1535
  struct cmsghdr *cm;
1536
1537
  s->laddr = IPA_NONE;
1538
  s->lifindex = 0;
1539
  s->rcv_ttl = -1;
1540
1541
  for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1542
  {
1543
    if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1544
    {
1545
      sk_process_cmsg4_pktinfo(s, cm);
1546
      sk_process_cmsg4_ttl(s, cm);
1547
    }
1548 b93abffa Martin Mares
1549 05476c4d Ondrej Zajicek
    if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1550
    {
1551
      sk_process_cmsg6_pktinfo(s, cm);
1552
      sk_process_cmsg6_ttl(s, cm);
1553
    }
1554
  }
1555 b93abffa Martin Mares
}
1556
1557 48e5f32d Ondrej Zajicek
1558
static inline int
1559
sk_sendmsg(sock *s)
1560
{
1561
  struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1562
  byte cmsg_buf[CMSG_TX_SPACE];
1563
  sockaddr dst;
1564
1565 05476c4d Ondrej Zajicek
  sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
1566 48e5f32d Ondrej Zajicek
1567
  struct msghdr msg = {
1568 05476c4d Ondrej Zajicek
    .msg_name = &dst.sa,
1569
    .msg_namelen = SA_LEN(dst),
1570 48e5f32d Ondrej Zajicek
    .msg_iov = &iov,
1571
    .msg_iovlen = 1
1572
  };
1573
1574
#ifdef CONFIG_USE_HDRINCL
1575
  byte hdr[20];
1576
  struct iovec iov2[2] = { {hdr, 20}, iov };
1577
1578
  if (s->flags & SKF_HDRINCL)
1579
  {
1580 05476c4d Ondrej Zajicek
    sk_prepare_ip_header(s, hdr, iov.iov_len);
1581 48e5f32d Ondrej Zajicek
    msg.msg_iov = iov2;
1582
    msg.msg_iovlen = 2;
1583
  }
1584
#endif
1585
1586
  if (s->flags & SKF_PKTINFO)
1587 05476c4d Ondrej Zajicek
    sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1588 48e5f32d Ondrej Zajicek
1589
  return sendmsg(s->fd, &msg, 0);
1590
}
1591
1592
static inline int
1593
sk_recvmsg(sock *s)
1594
{
1595
  struct iovec iov = {s->rbuf, s->rbsize};
1596
  byte cmsg_buf[CMSG_RX_SPACE];
1597
  sockaddr src;
1598
1599
  struct msghdr msg = {
1600 05476c4d Ondrej Zajicek
    .msg_name = &src.sa,
1601
    .msg_namelen = sizeof(src), // XXXX ??
1602 48e5f32d Ondrej Zajicek
    .msg_iov = &iov,
1603
    .msg_iovlen = 1,
1604
    .msg_control = cmsg_buf,
1605
    .msg_controllen = sizeof(cmsg_buf),
1606
    .msg_flags = 0
1607
  };
1608
1609
  int rv = recvmsg(s->fd, &msg, 0);
1610
  if (rv < 0)
1611
    return rv;
1612
1613
  //ifdef IPV4
1614
  //  if (cf_type == SK_IP)
1615
  //    rv = ipv4_skip_header(pbuf, rv);
1616
  //endif
1617
1618 05476c4d Ondrej Zajicek
  sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1619
  sk_process_cmsgs(s, &msg);
1620 48e5f32d Ondrej Zajicek
1621
  if (msg.msg_flags & MSG_TRUNC)
1622
    s->flags |= SKF_TRUNCATED;
1623
  else
1624
    s->flags &= ~SKF_TRUNCATED;
1625
1626
  return rv;
1627
}
1628
1629
1630 353729f5 Ondrej Zajicek
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1631
1632 b5d9ee5c Martin Mares
static int
1633
sk_maybe_write(sock *s)
1634
{
1635
  int e;
1636
1637
  switch (s->type)
1638 05476c4d Ondrej Zajicek
  {
1639
  case SK_TCP:
1640
  case SK_MAGIC:
1641
  case SK_UNIX:
1642
    while (s->ttx != s->tpos)
1643 b5d9ee5c Martin Mares
    {
1644 05476c4d Ondrej Zajicek
      e = write(s->fd, s->ttx, s->tpos - s->ttx);
1645
1646
      if (e < 0)
1647
      {
1648
        if (errno != EINTR && errno != EAGAIN)
1649 b5d9ee5c Martin Mares
        {
1650 05476c4d Ondrej Zajicek
          reset_tx_buffer(s);
1651
          /* EPIPE is just a connection close notification during TX */
1652
          s->err_hook(s, (errno != EPIPE) ? errno : 0);
1653
          return -1;
1654 b5d9ee5c Martin Mares
        }
1655 05476c4d Ondrej Zajicek
        return 0;
1656
      }
1657
      s->ttx += e;
1658
    }
1659
    reset_tx_buffer(s);
1660
    return 1;
1661
1662
  case SK_UDP:
1663
  case SK_IP:
1664
    {
1665
      if (s->tbuf == s->tpos)
1666 b5d9ee5c Martin Mares
        return 1;
1667 05476c4d Ondrej Zajicek
1668
      e = sk_sendmsg(s);
1669
1670
      if (e < 0)
1671
      {
1672
        if (errno != EINTR && errno != EAGAIN)
1673
        {
1674
          reset_tx_buffer(s);
1675
          s->err_hook(s, errno);
1676
          return -1;
1677
        }
1678
1679
        if (!s->tx_hook)
1680
          reset_tx_buffer(s);
1681
        return 0;
1682 b5d9ee5c Martin Mares
      }
1683 05476c4d Ondrej Zajicek
      reset_tx_buffer(s);
1684
      return 1;
1685 b5d9ee5c Martin Mares
    }
1686 05476c4d Ondrej Zajicek
  default:
1687
    bug("sk_maybe_write: unknown socket type %d", s->type);
1688
  }
1689 b5d9ee5c Martin Mares
}
1690
1691 ea89da38 Ondrej Zajicek
int
1692
sk_rx_ready(sock *s)
1693
{
1694
  int rv;
1695 9c92f692 Jan Moskyto Matejka
  struct pollfd pfd = { .fd = s->fd };
1696
  pfd.events |= POLLIN;
1697 ea89da38 Ondrej Zajicek
1698
 redo:
1699 9c92f692 Jan Moskyto Matejka
  rv = poll(&pfd, 1, 0);
1700 9c89560e Ondrej Zajicek
1701 ea89da38 Ondrej Zajicek
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1702
    goto redo;
1703
1704
  return rv;
1705
}
1706
1707 525fa2c1 Martin Mares
/**
1708
 * sk_send - send data to a socket
1709
 * @s: socket
1710
 * @len: number of bytes to send
1711
 *
1712
 * This function sends @len bytes of data prepared in the
1713
 * transmit buffer of the socket @s to the network connection.
1714
 * If the packet can be sent immediately, it does so and returns
1715
 * 1, else it queues the packet for later processing, returns 0
1716
 * and calls the @tx_hook of the socket when the tranmission
1717
 * takes place.
1718
 */
1719 b5d9ee5c Martin Mares
int
1720
sk_send(sock *s, unsigned len)
1721
{
1722
  s->ttx = s->tbuf;
1723
  s->tpos = s->tbuf + len;
1724
  return sk_maybe_write(s);
1725
}
1726
1727 525fa2c1 Martin Mares
/**
1728
 * sk_send_to - send data to a specific destination
1729
 * @s: socket
1730
 * @len: number of bytes to send
1731
 * @addr: IP address to send the packet to
1732
 * @port: port to send the packet to
1733
 *
1734 2e9b2421 Martin Mares
 * This is a sk_send() replacement for connection-less packet sockets
1735 525fa2c1 Martin Mares
 * which allows destination of the packet to be chosen dynamically.
1736 48e5f32d Ondrej Zajicek
 * Raw IP sockets should use 0 for @port.
1737 525fa2c1 Martin Mares
 */
1738 b5d9ee5c Martin Mares
int
1739
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1740
{
1741 353729f5 Ondrej Zajicek
  s->daddr = addr;
1742 48e5f32d Ondrej Zajicek
  if (port)
1743
    s->dport = port;
1744
1745 b5d9ee5c Martin Mares
  s->ttx = s->tbuf;
1746
  s->tpos = s->tbuf + len;
1747
  return sk_maybe_write(s);
1748
}
1749
1750 353729f5 Ondrej Zajicek
/*
1751
int
1752
sk_send_full(sock *s, unsigned len, struct iface *ifa,
1753
             ip_addr saddr, ip_addr daddr, unsigned dport)
1754
{
1755
  s->iface = ifa;
1756
  s->saddr = saddr;
1757
  s->daddr = daddr;
1758
  s->dport = dport;
1759
  s->ttx = s->tbuf;
1760
  s->tpos = s->tbuf + len;
1761
  return sk_maybe_write(s);
1762
}
1763
*/
1764
1765 6a8d3f1c Ondrej Zajicek
 /* sk_read() and sk_write() are called from BFD's event loop */
1766
1767
int
1768 fd926ed4 Jan Moskyto Matejka
sk_read(sock *s, int revents)
1769 b5d9ee5c Martin Mares
{
1770
  switch (s->type)
1771 05476c4d Ondrej Zajicek
  {
1772
  case SK_TCP_PASSIVE:
1773
    return sk_passive_connected(s, SK_TCP);
1774
1775
  case SK_UNIX_PASSIVE:
1776
    return sk_passive_connected(s, SK_UNIX);
1777
1778
  case SK_TCP:
1779
  case SK_UNIX:
1780 b5d9ee5c Martin Mares
    {
1781 05476c4d Ondrej Zajicek
      int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1782
1783
      if (c < 0)
1784 b93abffa Martin Mares
      {
1785 05476c4d Ondrej Zajicek
        if (errno != EINTR && errno != EAGAIN)
1786
          s->err_hook(s, errno);
1787 fd926ed4 Jan Moskyto Matejka
        else if (errno == EAGAIN && !(revents & POLLIN))
1788
        {
1789
          log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
1790
          s->err_hook(s, 0);
1791
        }
1792 b5d9ee5c Martin Mares
      }
1793 05476c4d Ondrej Zajicek
      else if (!c)
1794
        s->err_hook(s, 0);
1795
      else
1796 b5d9ee5c Martin Mares
      {
1797 05476c4d Ondrej Zajicek
        s->rpos += c;
1798
        if (s->rx_hook(s, s->rpos - s->rbuf))
1799
        {
1800
          /* We need to be careful since the socket could have been deleted by the hook */
1801
          if (current_sock == s)
1802
            s->rpos = s->rbuf;
1803
        }
1804
        return 1;
1805 b5d9ee5c Martin Mares
      }
1806 05476c4d Ondrej Zajicek
      return 0;
1807
    }
1808 353729f5 Ondrej Zajicek
1809 05476c4d Ondrej Zajicek
  case SK_MAGIC:
1810
    return s->rx_hook(s, 0);
1811 b5d9ee5c Martin Mares
1812 05476c4d Ondrej Zajicek
  default:
1813
    {
1814
      int e = sk_recvmsg(s);
1815 353729f5 Ondrej Zajicek
1816 05476c4d Ondrej Zajicek
      if (e < 0)
1817
      {
1818
        if (errno != EINTR && errno != EAGAIN)
1819
          s->err_hook(s, errno);
1820
        return 0;
1821 b5d9ee5c Martin Mares
      }
1822 05476c4d Ondrej Zajicek
1823
      s->rpos = s->rbuf + e;
1824
      s->rx_hook(s, e);
1825
      return 1;
1826 b5d9ee5c Martin Mares
    }
1827 05476c4d Ondrej Zajicek
  }
1828 b5d9ee5c Martin Mares
}
1829
1830 6a8d3f1c Ondrej Zajicek
int
1831 b5d9ee5c Martin Mares
sk_write(sock *s)
1832
{
1833 320f4173 Martin Mares
  switch (s->type)
1834 05476c4d Ondrej Zajicek
  {
1835
  case SK_TCP_ACTIVE:
1836 320f4173 Martin Mares
    {
1837 05476c4d Ondrej Zajicek
      sockaddr sa;
1838
      sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1839
1840
      if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1841
        sk_tcp_connected(s);
1842
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1843
        s->err_hook(s, errno);
1844 38a608c5 Martin Mares
      return 0;
1845 320f4173 Martin Mares
    }
1846 05476c4d Ondrej Zajicek
1847
  default:
1848
    if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1849
    {
1850
      if (s->tx_hook)
1851
        s->tx_hook(s);
1852
      return 1;
1853
    }
1854
    return 0;
1855
  }
1856 b5d9ee5c Martin Mares
}
1857
1858
void
1859 9dbcb11c Jan Moskyto Matejka
sk_err(sock *s, int revents)
1860
{
1861
  int se = 0, sse = sizeof(se);
1862 ccd2a3ed Jan Moskyto Matejka
  if ((s->type != SK_MAGIC) && (revents & POLLERR))
1863 9dbcb11c Jan Moskyto Matejka
    if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
1864
    {
1865
      log(L_ERR "IO: Socket error: SO_ERROR: %m");
1866
      se = 0;
1867
    }
1868
1869
  s->err_hook(s, se);
1870
}
1871
1872
void
1873 b5d9ee5c Martin Mares
sk_dump_all(void)
1874
{
1875
  node *n;
1876
  sock *s;
1877
1878
  debug("Open sockets:\n");
1879
  WALK_LIST(n, sock_list)
1880 05476c4d Ondrej Zajicek
  {
1881
    s = SKIP_BACK(sock, n, n);
1882
    debug("%p ", s);
1883
    sk_dump(&s->r);
1884
  }
1885 b5d9ee5c Martin Mares
  debug("\n");
1886
}
1887
1888
1889
/*
1890 8bcb5fb1 Ondrej Zajicek
 *        Internal event log and watchdog
1891
 */
1892
1893
#define EVENT_LOG_LENGTH 32
1894
1895
struct event_log_entry
1896
{
1897
  void *hook;
1898
  void *data;
1899
  btime timestamp;
1900
  btime duration;
1901
};
1902
1903
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
1904
static struct event_log_entry *event_open;
1905
static int event_log_pos, event_log_num, watchdog_active;
1906
static btime last_time;
1907
static btime loop_time;
1908
1909
static void
1910
io_update_time(void)
1911
{
1912
  struct timespec ts;
1913
  int rv;
1914
1915
  if (!clock_monotonic_available)
1916
    return;
1917
1918
  /*
1919
   * This is third time-tracking procedure (after update_times() above and
1920
   * times_update() in BFD), dedicated to internal event log and latency
1921
   * tracking. Hopefully, we consolidate these sometimes.
1922
   */
1923
1924
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
1925
  if (rv < 0)
1926
    die("clock_gettime: %m");
1927
1928
  last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
1929
1930
  if (event_open)
1931
  {
1932
    event_open->duration = last_time - event_open->timestamp;
1933
1934
    if (event_open->duration > config->latency_limit)
1935
      log(L_WARN "Event 0x%p 0x%p took %d ms",
1936
          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
1937
1938
    event_open = NULL;
1939
  }
1940
}
1941
1942
/**
1943
 * io_log_event - mark approaching event into event log
1944
 * @hook: event hook address
1945
 * @data: event data address
1946
 *
1947
 * Store info (hook, data, timestamp) about the following internal event into
1948
 * a circular event log (@event_log). When latency tracking is enabled, the log
1949
 * entry is kept open (in @event_open) so the duration can be filled later.
1950
 */
1951
void
1952
io_log_event(void *hook, void *data)
1953
{
1954
  if (config->latency_debug)
1955
    io_update_time();
1956
1957
  struct event_log_entry *en = event_log + event_log_pos;
1958
1959
  en->hook = hook;
1960
  en->data = data;
1961
  en->timestamp = last_time;
1962
  en->duration = 0;
1963
1964
  event_log_num++;
1965
  event_log_pos++;
1966
  event_log_pos %= EVENT_LOG_LENGTH;
1967
1968
  event_open = config->latency_debug ? en : NULL;
1969
}
1970
1971
static inline void
1972
io_close_event(void)
1973
{
1974
  if (event_open)
1975
    io_update_time();
1976
}
1977
1978
void
1979
io_log_dump(void)
1980
{
1981
  int i;
1982
1983
  log(L_DEBUG "Event log:");
1984
  for (i = 0; i < EVENT_LOG_LENGTH; i++)
1985
  {
1986
    struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
1987
    if (en->hook)
1988
      log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
1989
          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
1990
  }
1991
}
1992
1993
void
1994
watchdog_sigalrm(int sig UNUSED)
1995
{
1996
  /* Update last_time and duration, but skip latency check */
1997
  config->latency_limit = 0xffffffff;
1998
  io_update_time();
1999
2000
  /* We want core dump */
2001
  abort();
2002
}
2003
2004
static inline void
2005
watchdog_start1(void)
2006
{
2007
  io_update_time();
2008
2009
  loop_time = last_time;
2010
}
2011
2012
static inline void
2013
watchdog_start(void)
2014
{
2015
  io_update_time();
2016
2017
  loop_time = last_time;
2018
  event_log_num = 0;
2019
2020
  if (config->watchdog_timeout)
2021
  {
2022
    alarm(config->watchdog_timeout);
2023
    watchdog_active = 1;
2024
  }
2025
}
2026
2027
static inline void
2028
watchdog_stop(void)
2029
{
2030
  io_update_time();
2031
2032
  if (watchdog_active)
2033
  {
2034
    alarm(0);
2035
    watchdog_active = 0;
2036
  }
2037
2038
  btime duration = last_time - loop_time;
2039
  if (duration > config->watchdog_warning)
2040
    log(L_WARN "I/O loop cycle took %d ms for %d events",
2041
        (int) (duration TO_MS), event_log_num);
2042
}
2043
2044
2045
/*
2046 b5d9ee5c Martin Mares
 *        Main I/O Loop
2047
 */
2048
2049 4c9dd1e4 Martin Mares
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
2050
volatile int async_dump_flag;
2051 c8cafc8e Ondrej Zajicek (work)
volatile int async_shutdown_flag;
2052 4c9dd1e4 Martin Mares
2053 b5d9ee5c Martin Mares
void
2054
io_init(void)
2055
{
2056
  init_list(&near_timers);
2057
  init_list(&far_timers);
2058
  init_list(&sock_list);
2059 e8f73195 Martin Mares
  init_list(&global_event_list);
2060 7e5f5ffd Martin Mares
  krt_io_init();
2061 fd91ae33 Ondrej Zajicek
  init_times();
2062
  update_times();
2063 a92cf57d Ondrej Zajicek
  boot_time = now;
2064 fd91ae33 Ondrej Zajicek
  srandom((int) now_real);
2065 b5d9ee5c Martin Mares
}
2066
2067 ea89da38 Ondrej Zajicek
static int short_loops = 0;
2068
#define SHORT_LOOP_MAX 10
2069
2070 b5d9ee5c Martin Mares
void
2071
io_loop(void)
2072
{
2073 e1c13a5a Jan Moskyto Matejka
  int poll_tout;
2074 b5d9ee5c Martin Mares
  time_t tout;
2075 ea0a8be2 Jan Moskyto Matejka
  int nfds, events, pout;
2076 b5d9ee5c Martin Mares
  sock *s;
2077 38a608c5 Martin Mares
  node *n;
2078 e1c13a5a Jan Moskyto Matejka
  int fdmax = 256;
2079
  struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
2080 b5d9ee5c Martin Mares
2081 8bcb5fb1 Ondrej Zajicek
  watchdog_start1();
2082 b5d9ee5c Martin Mares
  for(;;)
2083
    {
2084 30770df2 Martin Mares
      events = ev_run_list(&global_event_list);
2085 bd22d7f4 Ondrej Zajicek (work)
    timers:
2086 fd91ae33 Ondrej Zajicek
      update_times();
2087 b5d9ee5c Martin Mares
      tout = tm_first_shot();
2088
      if (tout <= now)
2089
        {
2090
          tm_shot();
2091 bd22d7f4 Ondrej Zajicek (work)
          goto timers;
2092 b5d9ee5c Martin Mares
        }
2093 e1c13a5a Jan Moskyto Matejka
      poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
2094 b5d9ee5c Martin Mares
2095 8bcb5fb1 Ondrej Zajicek
      io_close_event();
2096
2097 e1c13a5a Jan Moskyto Matejka
      nfds = 0;
2098 b5d9ee5c Martin Mares
      WALK_LIST(n, sock_list)
2099
        {
2100 e1c13a5a Jan Moskyto Matejka
          pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
2101 b5d9ee5c Martin Mares
          s = SKIP_BACK(sock, n, n);
2102
          if (s->rx_hook)
2103
            {
2104 e1c13a5a Jan Moskyto Matejka
              pfd[nfds].fd = s->fd;
2105
              pfd[nfds].events |= POLLIN;
2106 b5d9ee5c Martin Mares
            }
2107
          if (s->tx_hook && s->ttx != s->tpos)
2108
            {
2109 e1c13a5a Jan Moskyto Matejka
              pfd[nfds].fd = s->fd;
2110
              pfd[nfds].events |= POLLOUT;
2111
            }
2112
          if (pfd[nfds].fd != -1)
2113
            {
2114
              s->index = nfds;
2115
              nfds++;
2116 b5d9ee5c Martin Mares
            }
2117 38a608c5 Martin Mares
          else
2118 e1c13a5a Jan Moskyto Matejka
            s->index = -1;
2119
2120
          if (nfds >= fdmax)
2121
            {
2122
              fdmax *= 2;
2123
              pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
2124
            }
2125 b5d9ee5c Martin Mares
        }
2126
2127 4c9dd1e4 Martin Mares
      /*
2128
       * Yes, this is racy. But even if the signal comes before this test
2129 e1c13a5a Jan Moskyto Matejka
       * and entering poll(), it gets caught on the next timer tick.
2130 4c9dd1e4 Martin Mares
       */
2131
2132
      if (async_config_flag)
2133
        {
2134 8bcb5fb1 Ondrej Zajicek
          io_log_event(async_config, NULL);
2135 4c9dd1e4 Martin Mares
          async_config();
2136
          async_config_flag = 0;
2137 f4aabcee Martin Mares
          continue;
2138 4c9dd1e4 Martin Mares
        }
2139
      if (async_dump_flag)
2140
        {
2141 8bcb5fb1 Ondrej Zajicek
          io_log_event(async_dump, NULL);
2142 4c9dd1e4 Martin Mares
          async_dump();
2143
          async_dump_flag = 0;
2144 f4aabcee Martin Mares
          continue;
2145
        }
2146
      if (async_shutdown_flag)
2147
        {
2148 8bcb5fb1 Ondrej Zajicek
          io_log_event(async_shutdown, NULL);
2149 f4aabcee Martin Mares
          async_shutdown();
2150
          async_shutdown_flag = 0;
2151
          continue;
2152 4c9dd1e4 Martin Mares
        }
2153
2154 e1c13a5a Jan Moskyto Matejka
      /* And finally enter poll() to find active sockets */
2155 8bcb5fb1 Ondrej Zajicek
      watchdog_stop();
2156 ea0a8be2 Jan Moskyto Matejka
      pout = poll(pfd, nfds, poll_tout);
2157 8bcb5fb1 Ondrej Zajicek
      watchdog_start();
2158 ea89da38 Ondrej Zajicek
2159 ea0a8be2 Jan Moskyto Matejka
      if (pout < 0)
2160 b5d9ee5c Martin Mares
        {
2161
          if (errno == EINTR || errno == EAGAIN)
2162
            continue;
2163 e1c13a5a Jan Moskyto Matejka
          die("poll: %m");
2164 b5d9ee5c Martin Mares
        }
2165 ea0a8be2 Jan Moskyto Matejka
      if (pout)
2166 b5d9ee5c Martin Mares
        {
2167 ea89da38 Ondrej Zajicek
          /* guaranteed to be non-empty */
2168
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2169
2170 38a608c5 Martin Mares
          while (current_sock)
2171 b5d9ee5c Martin Mares
            {
2172 38a608c5 Martin Mares
              sock *s = current_sock;
2173 e1c13a5a Jan Moskyto Matejka
              if (s->index == -1)
2174
                {
2175
                  current_sock = sk_next(s);
2176
                  goto next;
2177
                }
2178
2179 38a608c5 Martin Mares
              int e;
2180 ea89da38 Ondrej Zajicek
              int steps;
2181
2182
              steps = MAX_STEPS;
2183 9dbcb11c Jan Moskyto Matejka
              if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2184 38a608c5 Martin Mares
                do
2185
                  {
2186 4323099d Ondrej Zajicek
                    steps--;
2187 8bcb5fb1 Ondrej Zajicek
                    io_log_event(s->rx_hook, s->data);
2188 fd926ed4 Jan Moskyto Matejka
                    e = sk_read(s, pfd[s->index].revents);
2189 38a608c5 Martin Mares
                    if (s != current_sock)
2190
                      goto next;
2191
                  }
2192 4323099d Ondrej Zajicek
                while (e && s->rx_hook && steps);
2193
2194
              steps = MAX_STEPS;
2195 e1c13a5a Jan Moskyto Matejka
              if (pfd[s->index].revents & POLLOUT)
2196 38a608c5 Martin Mares
                do
2197
                  {
2198 4323099d Ondrej Zajicek
                    steps--;
2199 8bcb5fb1 Ondrej Zajicek
                    io_log_event(s->tx_hook, s->data);
2200 38a608c5 Martin Mares
                    e = sk_write(s);
2201
                    if (s != current_sock)
2202
                      goto next;
2203
                  }
2204 4323099d Ondrej Zajicek
                while (e && steps);
2205 9dbcb11c Jan Moskyto Matejka
2206 38a608c5 Martin Mares
              current_sock = sk_next(s);
2207
            next: ;
2208 b5d9ee5c Martin Mares
            }
2209 ea89da38 Ondrej Zajicek
2210
          short_loops++;
2211
          if (events && (short_loops < SHORT_LOOP_MAX))
2212
            continue;
2213
          short_loops = 0;
2214
2215
          int count = 0;
2216
          current_sock = stored_sock;
2217
          if (current_sock == NULL)
2218
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2219
2220
          while (current_sock && count < MAX_RX_STEPS)
2221
            {
2222
              sock *s = current_sock;
2223 e1c13a5a Jan Moskyto Matejka
              if (s->index == -1)
2224
                {
2225
                  current_sock = sk_next(s);
2226
                  goto next2;
2227
                }
2228 ea89da38 Ondrej Zajicek
2229 9dbcb11c Jan Moskyto Matejka
              if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2230 ea89da38 Ondrej Zajicek
                {
2231
                  count++;
2232 8bcb5fb1 Ondrej Zajicek
                  io_log_event(s->rx_hook, s->data);
2233 fd926ed4 Jan Moskyto Matejka
                  sk_read(s, pfd[s->index].revents);
2234 ea89da38 Ondrej Zajicek
                  if (s != current_sock)
2235 9dbcb11c Jan Moskyto Matejka
                    goto next2;
2236
                }
2237
2238
              if (pfd[s->index].revents & (POLLHUP | POLLERR))
2239
                {
2240
                  sk_err(s, pfd[s->index].revents);
2241 33d22f0e Ondřej Surý
                  goto next2;
2242 ea89da38 Ondrej Zajicek
                }
2243 9dbcb11c Jan Moskyto Matejka
2244 ea89da38 Ondrej Zajicek
              current_sock = sk_next(s);
2245
            next2: ;
2246
            }
2247
2248 9dbcb11c Jan Moskyto Matejka
2249 ea89da38 Ondrej Zajicek
          stored_sock = current_sock;
2250 b5d9ee5c Martin Mares
        }
2251
    }
2252
}
2253 41c8976e Ondrej Filip
2254
void
2255
test_old_bird(char *path)
2256
{
2257
  int fd;
2258
  struct sockaddr_un sa;
2259
2260
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
2261
  if (fd < 0)
2262
    die("Cannot create socket: %m");
2263 97e46d28 Ondrej Zajicek
  if (strlen(path) >= sizeof(sa.sun_path))
2264
    die("Socket path too long");
2265 41c8976e Ondrej Filip
  bzero(&sa, sizeof(sa));
2266
  sa.sun_family = AF_UNIX;
2267
  strcpy(sa.sun_path, path);
2268
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2269
    die("I found another BIRD running.");
2270
  close(fd);
2271
}