Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 7152e5ef

History | View | Annotate | Download (45.5 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11
   if _GNU_SOURCE is not defined. */
12
#define _GNU_SOURCE 1
13

    
14
#include <stdio.h>
15
#include <stdlib.h>
16
#include <time.h>
17
#include <sys/time.h>
18
#include <sys/types.h>
19
#include <sys/socket.h>
20
#include <sys/uio.h>
21
#include <sys/un.h>
22
#include <unistd.h>
23
#include <fcntl.h>
24
#include <errno.h>
25
#include <net/if.h>
26
#include <netinet/in.h>
27
#include <netinet/tcp.h>
28
#include <netinet/udp.h>
29
#include <netinet/icmp6.h>
30

    
31
#include "nest/bird.h"
32
#include "lib/lists.h"
33
#include "lib/resource.h"
34
#include "sysdep/unix/timer.h"
35
#include "lib/socket.h"
36
#include "lib/event.h"
37
#include "lib/string.h"
38
#include "nest/iface.h"
39

    
40
#include "sysdep/unix/unix.h"
41
#include CONFIG_INCLUDE_SYSIO_H
42

    
43
/* Maximum number of calls of tx handler for one socket in one
44
 * select iteration. Should be small enough to not monopolize CPU by
45
 * one protocol instance.
46
 */
47
#define MAX_STEPS 4
48

    
49
/* Maximum number of calls of rx handler for all sockets in one select
50
   iteration. RX callbacks are often much more costly so we limit
51
   this to gen small latencies */
52
#define MAX_RX_STEPS 4
53

    
54
/*
55
 *        Tracked Files
56
 */
57

    
58
struct rfile {
59
  resource r;
60
  FILE *f;
61
};
62

    
63
static void
64
rf_free(resource *r)
65
{
66
  struct rfile *a = (struct rfile *) r;
67

    
68
  fclose(a->f);
69
}
70

    
71
static void
72
rf_dump(resource *r)
73
{
74
  struct rfile *a = (struct rfile *) r;
75

    
76
  debug("(FILE *%p)\n", a->f);
77
}
78

    
79
static struct resclass rf_class = {
80
  "FILE",
81
  sizeof(struct rfile),
82
  rf_free,
83
  rf_dump,
84
  NULL,
85
  NULL
86
};
87

    
88
void *
89
tracked_fopen(pool *p, char *name, char *mode)
90
{
91
  FILE *f = fopen(name, mode);
92

    
93
  if (f)
94
    {
95
      struct rfile *r = ralloc(p, &rf_class);
96
      r->f = f;
97
    }
98
  return f;
99
}
100

    
101
/**
102
 * DOC: Timers
103
 *
104
 * Timers are resources which represent a wish of a module to call
105
 * a function at the specified time. The platform dependent code
106
 * doesn't guarantee exact timing, only that a timer function
107
 * won't be called before the requested time.
108
 *
109
 * In BIRD, time is represented by values of the &bird_clock_t type
110
 * which are integral numbers interpreted as a relative number of seconds since
111
 * some fixed time point in past. The current time can be read
112
 * from variable @now with reasonable accuracy and is monotonic. There is also
113
 * a current 'absolute' time in variable @now_real reported by OS.
114
 *
115
 * Each timer is described by a &timer structure containing a pointer
116
 * to the handler function (@hook), data private to this function (@data),
117
 * time the function should be called at (@expires, 0 for inactive timers),
118
 * for the other fields see |timer.h|.
119
 */
120

    
121
#define NEAR_TIMER_LIMIT 4
122

    
123
static list near_timers, far_timers;
124
static bird_clock_t first_far_timer = TIME_INFINITY;
125

    
126
/* now must be different from 0, because 0 is a special value in timer->expires */
127
bird_clock_t now = 1, now_real, boot_time;
128

    
129
static void
130
update_times_plain(void)
131
{
132
  bird_clock_t new_time = time(NULL);
133
  int delta = new_time - now_real;
134

    
135
  if ((delta >= 0) && (delta < 60))
136
    now += delta;
137
  else if (now_real != 0)
138
   log(L_WARN "Time jump, delta %d s", delta);
139

    
140
  now_real = new_time;
141
}
142

    
143
static void
144
update_times_gettime(void)
145
{
146
  struct timespec ts;
147
  int rv;
148

    
149
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
150
  if (rv != 0)
151
    die("clock_gettime: %m");
152

    
153
  if (ts.tv_sec != now) {
154
    if (ts.tv_sec < now)
155
      log(L_ERR "Monotonic timer is broken");
156

    
157
    now = ts.tv_sec;
158
    now_real = time(NULL);
159
  }
160
}
161

    
162
static int clock_monotonic_available;
163

    
164
static inline void
165
update_times(void)
166
{
167
  if (clock_monotonic_available)
168
    update_times_gettime();
169
  else
170
    update_times_plain();
171
}
172

    
173
static inline void
174
init_times(void)
175
{
176
 struct timespec ts;
177
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
178
 if (!clock_monotonic_available)
179
   log(L_WARN "Monotonic timer is missing");
180
}
181

    
182

    
183
static void
184
tm_free(resource *r)
185
{
186
  timer *t = (timer *) r;
187

    
188
  tm_stop(t);
189
}
190

    
191
static void
192
tm_dump(resource *r)
193
{
194
  timer *t = (timer *) r;
195

    
196
  debug("(code %p, data %p, ", t->hook, t->data);
197
  if (t->randomize)
198
    debug("rand %d, ", t->randomize);
199
  if (t->recurrent)
200
    debug("recur %d, ", t->recurrent);
201
  if (t->expires)
202
    debug("expires in %d sec)\n", t->expires - now);
203
  else
204
    debug("inactive)\n");
205
}
206

    
207
static struct resclass tm_class = {
208
  "Timer",
209
  sizeof(timer),
210
  tm_free,
211
  tm_dump,
212
  NULL,
213
  NULL
214
};
215

    
216
/**
217
 * tm_new - create a timer
218
 * @p: pool
219
 *
220
 * This function creates a new timer resource and returns
221
 * a pointer to it. To use the timer, you need to fill in
222
 * the structure fields and call tm_start() to start timing.
223
 */
224
timer *
225
tm_new(pool *p)
226
{
227
  timer *t = ralloc(p, &tm_class);
228
  return t;
229
}
230

    
231
static inline void
232
tm_insert_near(timer *t)
233
{
234
  node *n = HEAD(near_timers);
235

    
236
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
237
    n = n->next;
238
  insert_node(&t->n, n->prev);
239
}
240

    
241
/**
242
 * tm_start - start a timer
243
 * @t: timer
244
 * @after: number of seconds the timer should be run after
245
 *
246
 * This function schedules the hook function of the timer to
247
 * be called after @after seconds. If the timer has been already
248
 * started, it's @expire time is replaced by the new value.
249
 *
250
 * You can have set the @randomize field of @t, the timeout
251
 * will be increased by a random number of seconds chosen
252
 * uniformly from range 0 .. @randomize.
253
 *
254
 * You can call tm_start() from the handler function of the timer
255
 * to request another run of the timer. Also, you can set the @recurrent
256
 * field to have the timer re-added automatically with the same timeout.
257
 */
258
void
259
tm_start(timer *t, unsigned after)
260
{
261
  bird_clock_t when;
262

    
263
  if (t->randomize)
264
    after += random() % (t->randomize + 1);
265
  when = now + after;
266
  if (t->expires == when)
267
    return;
268
  if (t->expires)
269
    rem_node(&t->n);
270
  t->expires = when;
271
  if (after <= NEAR_TIMER_LIMIT)
272
    tm_insert_near(t);
273
  else
274
    {
275
      if (!first_far_timer || first_far_timer > when)
276
        first_far_timer = when;
277
      add_tail(&far_timers, &t->n);
278
    }
279
}
280

    
281
/**
282
 * tm_stop - stop a timer
283
 * @t: timer
284
 *
285
 * This function stops a timer. If the timer is already stopped,
286
 * nothing happens.
287
 */
288
void
289
tm_stop(timer *t)
290
{
291
  if (t->expires)
292
    {
293
      rem_node(&t->n);
294
      t->expires = 0;
295
    }
296
}
297

    
298
static void
299
tm_dump_them(char *name, list *l)
300
{
301
  node *n;
302
  timer *t;
303

    
304
  debug("%s timers:\n", name);
305
  WALK_LIST(n, *l)
306
    {
307
      t = SKIP_BACK(timer, n, n);
308
      debug("%p ", t);
309
      tm_dump(&t->r);
310
    }
311
  debug("\n");
312
}
313

    
314
void
315
tm_dump_all(void)
316
{
317
  tm_dump_them("Near", &near_timers);
318
  tm_dump_them("Far", &far_timers);
319
}
320

    
321
static inline time_t
322
tm_first_shot(void)
323
{
324
  time_t x = first_far_timer;
325

    
326
  if (!EMPTY_LIST(near_timers))
327
    {
328
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
329
      if (t->expires < x)
330
        x = t->expires;
331
    }
332
  return x;
333
}
334

    
335
void io_log_event(void *hook, void *data);
336

    
337
static void
338
tm_shot(void)
339
{
340
  timer *t;
341
  node *n, *m;
342

    
343
  if (first_far_timer <= now)
344
    {
345
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
346
      first_far_timer = TIME_INFINITY;
347
      n = HEAD(far_timers);
348
      while (m = n->next)
349
        {
350
          t = SKIP_BACK(timer, n, n);
351
          if (t->expires <= limit)
352
            {
353
              rem_node(n);
354
              tm_insert_near(t);
355
            }
356
          else if (t->expires < first_far_timer)
357
            first_far_timer = t->expires;
358
          n = m;
359
        }
360
    }
361
  while ((n = HEAD(near_timers)) -> next)
362
    {
363
      int delay;
364
      t = SKIP_BACK(timer, n, n);
365
      if (t->expires > now)
366
        break;
367
      rem_node(n);
368
      delay = t->expires - now;
369
      t->expires = 0;
370
      if (t->recurrent)
371
        {
372
          int i = t->recurrent - delay;
373
          if (i < 0)
374
            i = 0;
375
          tm_start(t, i);
376
        }
377
      io_log_event(t->hook, t->data);
378
      t->hook(t);
379
    }
380
}
381

    
382
/**
383
 * tm_parse_datetime - parse a date and time
384
 * @x: datetime string
385
 *
386
 * tm_parse_datetime() takes a textual representation of
387
 * a date and time (dd-mm-yyyy hh:mm:ss)
388
 * and converts it to the corresponding value of type &bird_clock_t.
389
 */
390
bird_clock_t
391
tm_parse_datetime(char *x)
392
{
393
  struct tm tm;
394
  int n;
395
  time_t t;
396

    
397
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
398
    return tm_parse_date(x);
399
  tm.tm_mon--;
400
  tm.tm_year -= 1900;
401
  t = mktime(&tm);
402
  if (t == (time_t) -1)
403
    return 0;
404
  return t;
405
}
406
/**
407
 * tm_parse_date - parse a date
408
 * @x: date string
409
 *
410
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
411
 * and converts it to the corresponding value of type &bird_clock_t.
412
 */
413
bird_clock_t
414
tm_parse_date(char *x)
415
{
416
  struct tm tm;
417
  int n;
418
  time_t t;
419

    
420
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
421
    return 0;
422
  tm.tm_mon--;
423
  tm.tm_year -= 1900;
424
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
425
  t = mktime(&tm);
426
  if (t == (time_t) -1)
427
    return 0;
428
  return t;
429
}
430

    
431
static void
432
tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
433
{
434
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
435
                                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
436

    
437
  if (delta < 20*3600)
438
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
439
  else if (delta < 360*86400)
440
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
441
  else
442
    bsprintf(x, "%d", tm->tm_year+1900);
443
}
444

    
445
#include "conf/conf.h"
446

    
447
/**
448
 * tm_format_datetime - convert date and time to textual representation
449
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
450
 * @t: time
451
 *
452
 * This function formats the given relative time value @t to a textual
453
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
454
 */
455
void
456
tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
457
{
458
  const char *fmt_used;
459
  struct tm *tm;
460
  bird_clock_t delta = now - t;
461
  t = now_real - delta;
462
  tm = localtime(&t);
463

    
464
  if (fmt_spec->fmt1 == NULL)
465
    return tm_format_reltime(x, tm, delta);
466

    
467
  if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
468
    fmt_used = fmt_spec->fmt1;
469
  else
470
    fmt_used = fmt_spec->fmt2;
471

    
472
  int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
473
  if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
474
    strcpy(x, "<too-long>");
475
}
476

    
477

    
478
/**
479
 * DOC: Sockets
480
 *
481
 * Socket resources represent network connections. Their data structure (&socket)
482
 * contains a lot of fields defining the exact type of the socket, the local and
483
 * remote addresses and ports, pointers to socket buffers and finally pointers to
484
 * hook functions to be called when new data have arrived to the receive buffer
485
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
486
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
487
 *
488
 * Freeing of sockets from inside socket hooks is perfectly safe.
489
 */
490

    
491
#ifndef SOL_IP
492
#define SOL_IP IPPROTO_IP
493
#endif
494

    
495
#ifndef SOL_IPV6
496
#define SOL_IPV6 IPPROTO_IPV6
497
#endif
498

    
499
#ifndef SOL_ICMPV6
500
#define SOL_ICMPV6 IPPROTO_ICMPV6
501
#endif
502

    
503

    
504
/*
505
 *        Sockaddr helper functions
506
 */
507

    
508
static inline int sockaddr_length(int af)
509
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
510

    
511
static inline void
512
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port)
513
{
514
  memset(sa, 0, sizeof(struct sockaddr_in));
515
#ifdef HAVE_SIN_LEN
516
  sa->sin_len = sizeof(struct sockaddr_in);
517
#endif
518
  sa->sin_family = AF_INET;
519
  sa->sin_port = htons(port);
520
  sa->sin_addr = ipa_to_in4(a);
521
}
522

    
523
static inline void
524
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
525
{
526
  memset(sa, 0, sizeof(struct sockaddr_in6));
527
#ifdef SIN6_LEN
528
  sa->sin6_len = sizeof(struct sockaddr_in6);
529
#endif
530
  sa->sin6_family = AF_INET6;
531
  sa->sin6_port = htons(port);
532
  sa->sin6_flowinfo = 0;
533
  sa->sin6_addr = ipa_to_in6(a);
534

    
535
  if (ifa && ipa_is_link_local(a))
536
    sa->sin6_scope_id = ifa->index;
537
}
538

    
539
void
540
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
541
{
542
  if (af == AF_INET)
543
    sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
544
  else if (af == AF_INET6)
545
    sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
546
  else
547
    bug("Unknown AF");
548
}
549

    
550
static inline void
551
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port)
552
{
553
  *port = ntohs(sa->sin_port);
554
  *a = ipa_from_in4(sa->sin_addr);
555
}
556

    
557
static inline void
558
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
559
{
560
  *port = ntohs(sa->sin6_port);
561
  *a = ipa_from_in6(sa->sin6_addr);
562

    
563
  if (ifa && ipa_is_link_local(*a))
564
    *ifa = if_find_by_index(sa->sin6_scope_id);
565
}
566

    
567
int
568
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
569
{
570
  if (sa->sa.sa_family != af)
571
    goto fail;
572

    
573
  if (af == AF_INET)
574
    sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
575
  else if (af == AF_INET6)
576
    sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
577
  else
578
    goto fail;
579

    
580
  return 0;
581

    
582
 fail:
583
  *a = IPA_NONE;
584
  *port = 0;
585
  return -1;
586
}
587

    
588
const int fam_to_af[] = { [SK_FAM_IPV4] = AF_INET, [SK_FAM_IPV6] = AF_INET6 };
589

    
590
/*
591
 *        IPv6 multicast syscalls
592
 */
593

    
594
/* Fortunately standardized in RFC 3493 */
595

    
596
#define INIT_MREQ6(maddr,ifa) \
597
  { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
598

    
599
static inline int
600
sk_setup_multicast6(sock *s)
601
{
602
  int index = s->iface->index;
603
  int ttl = s->ttl;
604
  int n = 0;
605

    
606
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
607
    ERR("IPV6_MULTICAST_IF");
608

    
609
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
610
    ERR("IPV6_MULTICAST_HOPS");
611

    
612
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
613
    ERR("IPV6_MULTICAST_LOOP");
614

    
615
  return 0;
616
}
617

    
618
static inline int
619
sk_join_group6(sock *s, ip_addr maddr)
620
{
621
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
622

    
623
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
624
    ERR("IPV6_JOIN_GROUP");
625

    
626
  return 0;
627
}
628

    
629
static inline int
630
sk_leave_group6(sock *s, ip_addr maddr)
631
{
632
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
633

    
634
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
635
    ERR("IPV6_LEAVE_GROUP");
636

    
637
  return 0;
638
}
639

    
640

    
641
/*
642
 *        IPv6 packet control messages
643
 */
644

    
645
/* Also standardized, in RFC 3542 */
646

    
647
/*
648
 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
649
 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
650
 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
651
 * RFC and we use IPV6_PKTINFO.
652
 */
653
#ifndef IPV6_RECVPKTINFO
654
#define IPV6_RECVPKTINFO IPV6_PKTINFO
655
#endif
656
/*
657
 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
658
 */
659
#ifndef IPV6_RECVHOPLIMIT
660
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
661
#endif
662

    
663

    
664
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
665
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
666

    
667
static inline int
668
sk_request_cmsg6_pktinfo(sock *s)
669
{
670
  int y = 1;
671

    
672
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
673
    ERR("IPV6_RECVPKTINFO");
674

    
675
  return 0;
676
}
677

    
678
static inline int
679
sk_request_cmsg6_ttl(sock *s)
680
{
681
  int y = 1;
682

    
683
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
684
    ERR("IPV6_RECVHOPLIMIT");
685

    
686
  return 0;
687
}
688

    
689
static inline void
690
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
691
{
692
  if (cm->cmsg_type == IPV6_PKTINFO)
693
  {
694
    struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
695
    s->laddr = ipa_from_in6(pi->ipi6_addr);
696
    s->lifindex = pi->ipi6_ifindex;
697
  }
698
}
699

    
700
static inline void
701
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
702
{
703
  if (cm->cmsg_type == IPV6_HOPLIMIT)
704
    s->rcv_ttl = * (int *) CMSG_DATA(cm);
705
}
706

    
707
static inline void
708
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
709
{
710
  struct cmsghdr *cm;
711
  struct in6_pktinfo *pi;
712
  int controllen = 0;
713

    
714
  msg->msg_control = cbuf;
715
  msg->msg_controllen = cbuflen;
716

    
717
  cm = CMSG_FIRSTHDR(msg);
718
  cm->cmsg_level = SOL_IPV6;
719
  cm->cmsg_type = IPV6_PKTINFO;
720
  cm->cmsg_len = CMSG_LEN(sizeof(*pi));
721
  controllen += CMSG_SPACE(sizeof(*pi));
722

    
723
  pi = (struct in6_pktinfo *) CMSG_DATA(cm);
724
  pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
725
  pi->ipi6_addr = ipa_to_in6(s->saddr);
726

    
727
  msg->msg_controllen = controllen;
728
}
729

    
730

    
731
/*
732
 *        Miscellaneous socket syscalls
733
 */
734

    
735
static inline int
736
sk_set_ttl4(sock *s, int ttl)
737
{
738
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
739
    ERR("IP_TTL");
740

    
741
  return 0;
742
}
743

    
744
static inline int
745
sk_set_ttl6(sock *s, int ttl)
746
{
747
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
748
    ERR("IPV6_UNICAST_HOPS");
749

    
750
  return 0;
751
}
752

    
753
static inline int
754
sk_set_tos4(sock *s, int tos)
755
{
756
  if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
757
    ERR("IP_TOS");
758

    
759
  return 0;
760
}
761

    
762
static inline int
763
sk_set_tos6(sock *s, int tos)
764
{
765
  if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
766
    ERR("IPV6_TCLASS");
767

    
768
  return 0;
769
}
770

    
771
static inline int
772
sk_set_high_port(sock *s)
773
{
774
  /* Port range setting is optional, ignore it if not supported */
775

    
776
#ifdef IP_PORTRANGE
777
  if (sk_is_ipv4(s))
778
  {
779
    int range = IP_PORTRANGE_HIGH;
780
    if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
781
      ERR("IP_PORTRANGE");
782
  }
783
#endif
784

    
785
#ifdef IPV6_PORTRANGE
786
  if (sk_is_ipv6(s))
787
  {
788
    int range = IPV6_PORTRANGE_HIGH;
789
    if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
790
      ERR("IPV6_PORTRANGE");
791
  }
792
#endif
793

    
794
  return 0;
795
}
796

    
797
static inline byte *
798
sk_skip_ip_header(byte *pkt, int *len)
799
{
800
  if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
801
    return NULL;
802

    
803
  int hlen = (*pkt & 0x0f) * 4;
804
  if ((hlen < 20) || (hlen > *len))
805
    return NULL;
806

    
807
  *len -= hlen;
808
  return pkt + hlen;
809
}
810

    
811
byte *
812
sk_rx_buffer(sock *s, int *len)
813
{
814
  if (sk_is_ipv4(s) && (s->type == SK_IP))
815
    return sk_skip_ip_header(s->rbuf, len);
816
  else
817
    return s->rbuf;
818
}
819

    
820

    
821
/*
822
 *        Public socket functions
823
 */
824

    
825
/**
826
 * sk_setup_multicast - enable multicast for given socket
827
 * @s: socket
828
 *
829
 * Prepare transmission of multicast packets for given datagram socket.
830
 * The socket must have defined @iface.
831
 *
832
 * Result: 0 for success, -1 for an error.
833
 */
834

    
835
int
836
sk_setup_multicast(sock *s)
837
{
838
  ASSERT(s->iface);
839

    
840
  if (sk_is_ipv4(s))
841
    return sk_setup_multicast4(s);
842
  else
843
    return sk_setup_multicast6(s);
844
}
845

    
846
/**
847
 * sk_join_group - join multicast group for given socket
848
 * @s: socket
849
 * @maddr: multicast address
850
 *
851
 * Join multicast group for given datagram socket and associated interface.
852
 * The socket must have defined @iface.
853
 *
854
 * Result: 0 for success, -1 for an error.
855
 */
856

    
857
int
858
sk_join_group(sock *s, ip_addr maddr)
859
{
860
  if (sk_is_ipv4(s))
861
    return sk_join_group4(s, maddr);
862
  else
863
    return sk_join_group6(s, maddr);
864
}
865

    
866
/**
867
 * sk_leave_group - leave multicast group for given socket
868
 * @s: socket
869
 * @maddr: multicast address
870
 *
871
 * Leave multicast group for given datagram socket and associated interface.
872
 * The socket must have defined @iface.
873
 *
874
 * Result: 0 for success, -1 for an error.
875
 */
876

    
877
int
878
sk_leave_group(sock *s, ip_addr maddr)
879
{
880
  if (sk_is_ipv4(s))
881
    return sk_leave_group4(s, maddr);
882
  else
883
    return sk_leave_group6(s, maddr);
884
}
885

    
886
/**
887
 * sk_setup_broadcast - enable broadcast for given socket
888
 * @s: socket
889
 *
890
 * Allow reception and transmission of broadcast packets for given datagram
891
 * socket. The socket must have defined @iface. For transmission, packets should
892
 * be send to @brd address of @iface.
893
 *
894
 * Result: 0 for success, -1 for an error.
895
 */
896

    
897
int
898
sk_setup_broadcast(sock *s)
899
{
900
  int y = 1;
901

    
902
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
903
    ERR("SO_BROADCAST");
904

    
905
  return 0;
906
}
907

    
908
/**
909
 * sk_set_ttl - set transmit TTL for given socket
910
 * @s: socket
911
 * @ttl: TTL value
912
 *
913
 * Set TTL for already opened connections when TTL was not set before. Useful
914
 * for accepted connections when different ones should have different TTL.
915
 *
916
 * Result: 0 for success, -1 for an error.
917
 */
918

    
919
int
920
sk_set_ttl(sock *s, int ttl)
921
{
922
  s->ttl = ttl;
923

    
924
  if (sk_is_ipv4(s))
925
    return sk_set_ttl4(s, ttl);
926
  else
927
    return sk_set_ttl6(s, ttl);
928
}
929

    
930
/**
931
 * sk_set_min_ttl - set minimal accepted TTL for given socket
932
 * @s: socket
933
 * @ttl: TTL value
934
 *
935
 * Set minimal accepted TTL for given socket. Can be used for TTL security.
936
 * implementations.
937
 *
938
 * Result: 0 for success, -1 for an error.
939
 */
940

    
941
int
942
sk_set_min_ttl(sock *s, int ttl)
943
{
944
  if (sk_is_ipv4(s))
945
    return sk_set_min_ttl4(s, ttl);
946
  else
947
    return sk_set_min_ttl6(s, ttl);
948
}
949

    
950
#if 0
951
/**
952
 * sk_set_md5_auth - add / remove MD5 security association for given socket
953
 * @s: socket
954
 * @a: IP address of the other side
955
 * @ifa: Interface for link-local IP address
956
 * @passwd: password used for MD5 authentication
957
 *
958
 * In TCP MD5 handling code in kernel, there is a set of pairs (address,
959
 * password) used to choose password according to address of the other side.
960
 * This function is useful for listening socket, for active sockets it is enough
961
 * to set s->password field.
962
 *
963
 * When called with passwd != NULL, the new pair is added,
964
 * When called with passwd == NULL, the existing pair is removed.
965
 *
966
 * Result: 0 for success, -1 for an error.
967
 */
968

969
int
970
sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
971
{ DUMMY; }
972
#endif
973

    
974
/**
975
 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
976
 * @s: socket
977
 * @offset: offset
978
 *
979
 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
980
 * kernel will automatically fill it for outgoing packets and check it for
981
 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
982
 * known to the kernel.
983
 *
984
 * Result: 0 for success, -1 for an error.
985
 */
986

    
987
int
988
sk_set_ipv6_checksum(sock *s, int offset)
989
{
990
  if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
991
    ERR("IPV6_CHECKSUM");
992

    
993
  return 0;
994
}
995

    
996
int
997
sk_set_icmp6_filter(sock *s, int p1, int p2)
998
{
999
  /* a bit of lame interface, but it is here only for Radv */
1000
  struct icmp6_filter f;
1001

    
1002
  ICMP6_FILTER_SETBLOCKALL(&f);
1003
  ICMP6_FILTER_SETPASS(p1, &f);
1004
  ICMP6_FILTER_SETPASS(p2, &f);
1005

    
1006
  if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
1007
    ERR("ICMP6_FILTER");
1008

    
1009
  return 0;
1010
}
1011

    
1012
void
1013
sk_log_error(sock *s, const char *p)
1014
{
1015
  log(L_ERR "%s: Socket error: %s%#m", p, s->err);
1016
}
1017

    
1018

    
1019
/*
1020
 *        Actual struct birdsock code
1021
 */
1022

    
1023
static list sock_list;
1024
static struct birdsock *current_sock;
1025
static struct birdsock *stored_sock;
1026
static int sock_recalc_fdsets_p;
1027

    
1028
static inline sock *
1029
sk_next(sock *s)
1030
{
1031
  if (!s->n.next->next)
1032
    return NULL;
1033
  else
1034
    return SKIP_BACK(sock, n, s->n.next);
1035
}
1036

    
1037
static void
1038
sk_alloc_bufs(sock *s)
1039
{
1040
  if (!s->rbuf && s->rbsize)
1041
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
1042
  s->rpos = s->rbuf;
1043
  if (!s->tbuf && s->tbsize)
1044
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
1045
  s->tpos = s->ttx = s->tbuf;
1046
}
1047

    
1048
static void
1049
sk_free_bufs(sock *s)
1050
{
1051
  if (s->rbuf_alloc)
1052
  {
1053
    xfree(s->rbuf_alloc);
1054
    s->rbuf = s->rbuf_alloc = NULL;
1055
  }
1056
  if (s->tbuf_alloc)
1057
  {
1058
    xfree(s->tbuf_alloc);
1059
    s->tbuf = s->tbuf_alloc = NULL;
1060
  }
1061
}
1062

    
1063
static void
1064
sk_free(resource *r)
1065
{
1066
  sock *s = (sock *) r;
1067

    
1068
  sk_free_bufs(s);
1069
  if (s->fd >= 0)
1070
  {
1071
    close(s->fd);
1072

    
1073
    /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1074
    if (s->flags & SKF_THREAD)
1075
      return;
1076

    
1077
    if (s == current_sock)
1078
      current_sock = sk_next(s);
1079
    if (s == stored_sock)
1080
      stored_sock = sk_next(s);
1081
    rem_node(&s->n);
1082
    sock_recalc_fdsets_p = 1;
1083
  }
1084
}
1085

    
1086
void
1087
sk_set_rbsize(sock *s, uint val)
1088
{
1089
  ASSERT(s->rbuf_alloc == s->rbuf);
1090

    
1091
  if (s->rbsize == val)
1092
    return;
1093

    
1094
  s->rbsize = val;
1095
  xfree(s->rbuf_alloc);
1096
  s->rbuf_alloc = xmalloc(val);
1097
  s->rpos = s->rbuf = s->rbuf_alloc;
1098
}
1099

    
1100
void
1101
sk_set_tbsize(sock *s, uint val)
1102
{
1103
  ASSERT(s->tbuf_alloc == s->tbuf);
1104

    
1105
  if (s->tbsize == val)
1106
    return;
1107

    
1108
  byte *old_tbuf = s->tbuf;
1109

    
1110
  s->tbsize = val;
1111
  s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1112
  s->tpos = s->tbuf + (s->tpos - old_tbuf);
1113
  s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
1114
}
1115

    
1116
void
1117
sk_set_tbuf(sock *s, void *tbuf)
1118
{
1119
  s->tbuf = tbuf ?: s->tbuf_alloc;
1120
  s->ttx = s->tpos = s->tbuf;
1121
}
1122

    
1123
void
1124
sk_reallocate(sock *s)
1125
{
1126
  sk_free_bufs(s);
1127
  sk_alloc_bufs(s);
1128
}
1129

    
1130
static void
1131
sk_dump(resource *r)
1132
{
1133
  sock *s = (sock *) r;
1134
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1135

    
1136
  debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1137
        sk_type_names[s->type],
1138
        s->data,
1139
        s->saddr,
1140
        s->sport,
1141
        s->daddr,
1142
        s->dport,
1143
        s->tos,
1144
        s->ttl,
1145
        s->iface ? s->iface->name : "none");
1146
}
1147

    
1148
static struct resclass sk_class = {
1149
  "Socket",
1150
  sizeof(sock),
1151
  sk_free,
1152
  sk_dump,
1153
  NULL,
1154
  NULL
1155
};
1156

    
1157
/**
1158
 * sk_new - create a socket
1159
 * @p: pool
1160
 *
1161
 * This function creates a new socket resource. If you want to use it,
1162
 * you need to fill in all the required fields of the structure and
1163
 * call sk_open() to do the actual opening of the socket.
1164
 *
1165
 * The real function name is sock_new(), sk_new() is a macro wrapper
1166
 * to avoid collision with OpenSSL.
1167
 */
1168
sock *
1169
sock_new(pool *p)
1170
{
1171
  sock *s = ralloc(p, &sk_class);
1172
  s->pool = p;
1173
  // s->saddr = s->daddr = IPA_NONE;
1174
  s->tos = s->priority = s->ttl = -1;
1175
  s->fd = -1;
1176
  return s;
1177
}
1178

    
1179
static int
1180
sk_setup(sock *s)
1181
{
1182
  int y = 1;
1183
  int fd = s->fd;
1184

    
1185
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1186
    ERR("O_NONBLOCK");
1187

    
1188
  if (!s->fam)
1189
    return 0;
1190

    
1191
  if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1192
    s->flags |= SKF_PKTINFO;
1193

    
1194
#ifdef CONFIG_USE_HDRINCL
1195
  if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1196
  {
1197
    s->flags &= ~SKF_PKTINFO;
1198
    s->flags |= SKF_HDRINCL;
1199
    if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1200
      ERR("IP_HDRINCL");
1201
  }
1202
#endif
1203

    
1204
  if (s->iface)
1205
  {
1206
#ifdef SO_BINDTODEVICE
1207
    struct ifreq ifr = {};
1208
    strcpy(ifr.ifr_name, s->iface->name);
1209
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1210
      ERR("SO_BINDTODEVICE");
1211
#endif
1212

    
1213
#ifdef CONFIG_UNIX_DONTROUTE
1214
    if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1215
      ERR("SO_DONTROUTE");
1216
#endif
1217
  }
1218

    
1219
  if (s->priority >= 0)
1220
    if (sk_set_priority(s, s->priority) < 0)
1221
      return -1;
1222

    
1223
  if (sk_is_ipv4(s))
1224
  {
1225
    if (s->flags & SKF_LADDR_RX)
1226
      if (sk_request_cmsg4_pktinfo(s) < 0)
1227
        return -1;
1228

    
1229
    if (s->flags & SKF_TTL_RX)
1230
      if (sk_request_cmsg4_ttl(s) < 0)
1231
        return -1;
1232

    
1233
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1234
      if (sk_disable_mtu_disc4(s) < 0)
1235
        return -1;
1236

    
1237
    if (s->ttl >= 0)
1238
      if (sk_set_ttl4(s, s->ttl) < 0)
1239
        return -1;
1240

    
1241
    if (s->tos >= 0)
1242
      if (sk_set_tos4(s, s->tos) < 0)
1243
        return -1;
1244
  }
1245

    
1246
  if (sk_is_ipv6(s))
1247
  {
1248
    if (s->flags & SKF_V6ONLY)
1249
      if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1250
        ERR("IPV6_V6ONLY");
1251

    
1252
    if (s->flags & SKF_LADDR_RX)
1253
      if (sk_request_cmsg6_pktinfo(s) < 0)
1254
        return -1;
1255

    
1256
    if (s->flags & SKF_TTL_RX)
1257
      if (sk_request_cmsg6_ttl(s) < 0)
1258
        return -1;
1259

    
1260
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1261
      if (sk_disable_mtu_disc6(s) < 0)
1262
        return -1;
1263

    
1264
    if (s->ttl >= 0)
1265
      if (sk_set_ttl6(s, s->ttl) < 0)
1266
        return -1;
1267

    
1268
    if (s->tos >= 0)
1269
      if (sk_set_tos6(s, s->tos) < 0)
1270
        return -1;
1271
  }
1272

    
1273
  return 0;
1274
}
1275

    
1276
static void
1277
sk_insert(sock *s)
1278
{
1279
  add_tail(&sock_list, &s->n);
1280
  sock_recalc_fdsets_p = 1;
1281
}
1282

    
1283
static void
1284
sk_tcp_connected(sock *s)
1285
{
1286
  sockaddr sa;
1287
  int sa_len = sizeof(sa);
1288

    
1289
  if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1290
      (sockaddr_read(&sa, fam_to_af[s->fam], &s->saddr, &s->iface, &s->sport) < 0))
1291
    log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1292

    
1293
  s->type = SK_TCP;
1294
  sk_alloc_bufs(s);
1295
  s->tx_hook(s);
1296
}
1297

    
1298
static int
1299
sk_passive_connected(sock *s, int type)
1300
{
1301
  sockaddr loc_sa, rem_sa;
1302
  int loc_sa_len = sizeof(loc_sa);
1303
  int rem_sa_len = sizeof(rem_sa);
1304

    
1305
  int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1306
  if (fd < 0)
1307
  {
1308
    if ((errno != EINTR) && (errno != EAGAIN))
1309
      s->err_hook(s, errno);
1310
    return 0;
1311
  }
1312

    
1313
  sock *t = sk_new(s->pool);
1314
  t->type = type;
1315
  t->fam = s->fam;
1316
  t->fd = fd;
1317
  t->ttl = s->ttl;
1318
  t->tos = s->tos;
1319
  t->rbsize = s->rbsize;
1320
  t->tbsize = s->tbsize;
1321

    
1322
  if (type == SK_TCP)
1323
  {
1324
    if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1325
        (sockaddr_read(&loc_sa, fam_to_af[s->fam], &t->saddr, &t->iface, &t->sport) < 0))
1326
      log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1327

    
1328
    if (sockaddr_read(&rem_sa, fam_to_af[s->fam], &t->daddr, &t->iface, &t->dport) < 0)
1329
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1330
  }
1331

    
1332
  if (fd >= FD_SETSIZE)
1333
  {
1334
    /* FIXME: Call err_hook instead ? */
1335
    log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s",
1336
        t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL,
1337
        t->dport, "rejected due to FD_SETSIZE limit");
1338
    close(fd);
1339
    t->fd = -1;
1340
    rfree(t);
1341
    return 1;
1342
  }
1343

    
1344
  if (sk_setup(t) < 0)
1345
  {
1346
    /* FIXME: Call err_hook instead ? */
1347
    log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1348

    
1349
    /* FIXME: handle it better in rfree() */
1350
    close(t->fd);
1351
    t->fd = -1;
1352
    rfree(t);
1353
    return 1;
1354
  }
1355

    
1356
  sk_insert(t);
1357
  sk_alloc_bufs(t);
1358
  s->rx_hook(t, 0);
1359
  return 1;
1360
}
1361

    
1362
/**
1363
 * sk_open - open a socket
1364
 * @s: socket
1365
 *
1366
 * This function takes a socket resource created by sk_new() and
1367
 * initialized by the user and binds a corresponding network connection
1368
 * to it.
1369
 *
1370
 * Result: 0 for success, -1 for an error.
1371
 */
1372
int
1373
sk_open(sock *s)
1374
{
1375
  int fd = -1;
1376
  int do_bind = 0;
1377
  int bind_port = 0;
1378
  ip_addr bind_addr = IPA_NONE;
1379
  sockaddr sa;
1380

    
1381
  switch (s->type)
1382
  {
1383
  case SK_TCP_ACTIVE:
1384
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1385
    /* Fall thru */
1386
  case SK_TCP_PASSIVE:
1387
    fd = socket(fam_to_af[s->fam], SOCK_STREAM, IPPROTO_TCP);
1388
    bind_port = s->sport;
1389
    bind_addr = s->saddr;
1390
    do_bind = bind_port || ipa_nonzero(bind_addr);
1391
    break;
1392

    
1393
  case SK_UDP:
1394
    fd = socket(fam_to_af[s->fam], SOCK_DGRAM, IPPROTO_UDP);
1395
    bind_port = s->sport;
1396
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1397
    do_bind = 1;
1398
    break;
1399

    
1400
  case SK_IP:
1401
    fd = socket(fam_to_af[s->fam], SOCK_RAW, s->dport);
1402
    bind_port = 0;
1403
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1404
    do_bind = ipa_nonzero(bind_addr);
1405
    break;
1406

    
1407
  case SK_MAGIC:
1408
    s->fam = SK_FAM_NONE;
1409
    fd = s->fd;
1410
    break;
1411

    
1412
  default:
1413
    bug("sk_open() called for invalid sock type %d", s->type);
1414
  }
1415

    
1416
  if (fd < 0)
1417
    ERR("socket");
1418

    
1419
  if (fd >= FD_SETSIZE)
1420
    ERR2("FD_SETSIZE limit reached");
1421

    
1422
  s->fd = fd;
1423

    
1424
  if (sk_setup(s) < 0)
1425
    goto err;
1426

    
1427
  if (do_bind)
1428
  {
1429
    if (bind_port)
1430
    {
1431
      int y = 1;
1432

    
1433
      if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1434
        ERR2("SO_REUSEADDR");
1435

    
1436
#ifdef CONFIG_NO_IFACE_BIND
1437
      /* Workaround missing ability to bind to an iface */
1438
      if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1439
      {
1440
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1441
          ERR2("SO_REUSEPORT");
1442
      }
1443
#endif
1444
    }
1445
    else
1446
      if (s->flags & SKF_HIGH_PORT)
1447
        if (sk_set_high_port(s) < 0)
1448
          log(L_WARN "Socket error: %s%#m", s->err);
1449

    
1450
    sockaddr_fill(&sa, fam_to_af[s->fam], bind_addr, s->iface, bind_port);
1451
    if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1452
      ERR2("bind");
1453
  }
1454

    
1455
  if (s->password)
1456
    if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0)
1457
      goto err;
1458

    
1459
  switch (s->type)
1460
  {
1461
  case SK_TCP_ACTIVE:
1462
    sockaddr_fill(&sa, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
1463
    if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1464
      sk_tcp_connected(s);
1465
    else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1466
             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1467
      ERR2("connect");
1468
    break;
1469

    
1470
  case SK_TCP_PASSIVE:
1471
    if (listen(fd, 8) < 0)
1472
      ERR2("listen");
1473
    break;
1474

    
1475
  case SK_MAGIC:
1476
    break;
1477

    
1478
  default:
1479
    sk_alloc_bufs(s);
1480
  }
1481

    
1482
  if (!(s->flags & SKF_THREAD))
1483
    sk_insert(s);
1484
  return 0;
1485

    
1486
err:
1487
  close(fd);
1488
  s->fd = -1;
1489
  return -1;
1490
}
1491

    
1492
int
1493
sk_open_unix(sock *s, char *name)
1494
{
1495
  struct sockaddr_un sa;
1496
  int fd;
1497

    
1498
  /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1499

    
1500
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1501
  if (fd < 0)
1502
    return -1;
1503

    
1504
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1505
    return -1;
1506

    
1507
  /* Path length checked in test_old_bird() */
1508
  sa.sun_family = AF_UNIX;
1509
  strcpy(sa.sun_path, name);
1510

    
1511
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1512
    return -1;
1513

    
1514
  if (listen(fd, 8) < 0)
1515
    return -1;
1516

    
1517
  s->fd = fd;
1518
  sk_insert(s);
1519
  return 0;
1520
}
1521

    
1522

    
1523
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1524
                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1525
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1526

    
1527
static void
1528
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1529
{
1530
  if (sk_is_ipv4(s))
1531
    sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1532
  else
1533
    sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1534
}
1535

    
1536
static void
1537
sk_process_cmsgs(sock *s, struct msghdr *msg)
1538
{
1539
  struct cmsghdr *cm;
1540

    
1541
  s->laddr = IPA_NONE;
1542
  s->lifindex = 0;
1543
  s->rcv_ttl = -1;
1544

    
1545
  for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1546
  {
1547
    if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1548
    {
1549
      sk_process_cmsg4_pktinfo(s, cm);
1550
      sk_process_cmsg4_ttl(s, cm);
1551
    }
1552

    
1553
    if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1554
    {
1555
      sk_process_cmsg6_pktinfo(s, cm);
1556
      sk_process_cmsg6_ttl(s, cm);
1557
    }
1558
  }
1559
}
1560

    
1561

    
1562
static inline int
1563
sk_sendmsg(sock *s)
1564
{
1565
  struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1566
  byte cmsg_buf[CMSG_TX_SPACE];
1567
  bzero(cmsg_buf, sizeof(cmsg_buf));
1568
  sockaddr dst = {};
1569

    
1570
  sockaddr_fill(&dst, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
1571

    
1572
  struct msghdr msg = {
1573
    .msg_name = &dst.sa,
1574
    .msg_namelen = SA_LEN(dst),
1575
    .msg_iov = &iov,
1576
    .msg_iovlen = 1
1577
  };
1578

    
1579
#ifdef CONFIG_USE_HDRINCL
1580
  byte hdr[20];
1581
  struct iovec iov2[2] = { {hdr, 20}, iov };
1582

    
1583
  if (s->flags & SKF_HDRINCL)
1584
  {
1585
    sk_prepare_ip_header(s, hdr, iov.iov_len);
1586
    msg.msg_iov = iov2;
1587
    msg.msg_iovlen = 2;
1588
  }
1589
#endif
1590

    
1591
  if (s->flags & SKF_PKTINFO)
1592
    sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1593

    
1594
  return sendmsg(s->fd, &msg, 0);
1595
}
1596

    
1597
static inline int
1598
sk_recvmsg(sock *s)
1599
{
1600
  struct iovec iov = {s->rbuf, s->rbsize};
1601
  byte cmsg_buf[CMSG_RX_SPACE];
1602
  sockaddr src;
1603

    
1604
  struct msghdr msg = {
1605
    .msg_name = &src.sa,
1606
    .msg_namelen = sizeof(src), // XXXX ??
1607
    .msg_iov = &iov,
1608
    .msg_iovlen = 1,
1609
    .msg_control = cmsg_buf,
1610
    .msg_controllen = sizeof(cmsg_buf),
1611
    .msg_flags = 0
1612
  };
1613

    
1614
  int rv = recvmsg(s->fd, &msg, 0);
1615
  if (rv < 0)
1616
    return rv;
1617

    
1618
  //ifdef IPV4
1619
  //  if (cf_type == SK_IP)
1620
  //    rv = ipv4_skip_header(pbuf, rv);
1621
  //endif
1622

    
1623
  sockaddr_read(&src, fam_to_af[s->fam], &s->faddr, NULL, &s->fport);
1624
  sk_process_cmsgs(s, &msg);
1625

    
1626
  if (msg.msg_flags & MSG_TRUNC)
1627
    s->flags |= SKF_TRUNCATED;
1628
  else
1629
    s->flags &= ~SKF_TRUNCATED;
1630

    
1631
  return rv;
1632
}
1633

    
1634

    
1635
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1636

    
1637
static int
1638
sk_maybe_write(sock *s)
1639
{
1640
  int e;
1641

    
1642
  switch (s->type)
1643
  {
1644
  case SK_TCP:
1645
  case SK_MAGIC:
1646
  case SK_UNIX:
1647
    while (s->ttx != s->tpos)
1648
    {
1649
      e = write(s->fd, s->ttx, s->tpos - s->ttx);
1650

    
1651
      if (e < 0)
1652
      {
1653
        if (errno != EINTR && errno != EAGAIN)
1654
        {
1655
          reset_tx_buffer(s);
1656
          /* EPIPE is just a connection close notification during TX */
1657
          s->err_hook(s, (errno != EPIPE) ? errno : 0);
1658
          return -1;
1659
        }
1660
        return 0;
1661
      }
1662
      s->ttx += e;
1663
    }
1664
    reset_tx_buffer(s);
1665
    return 1;
1666

    
1667
  case SK_UDP:
1668
  case SK_IP:
1669
    {
1670
      if (s->tbuf == s->tpos)
1671
        return 1;
1672

    
1673
      e = sk_sendmsg(s);
1674

    
1675
      if (e < 0)
1676
      {
1677
        if (errno != EINTR && errno != EAGAIN)
1678
        {
1679
          reset_tx_buffer(s);
1680
          s->err_hook(s, errno);
1681
          return -1;
1682
        }
1683

    
1684
        if (!s->tx_hook)
1685
          reset_tx_buffer(s);
1686
        return 0;
1687
      }
1688
      reset_tx_buffer(s);
1689
      return 1;
1690
    }
1691
  default:
1692
    bug("sk_maybe_write: unknown socket type %d", s->type);
1693
  }
1694
}
1695

    
1696
int
1697
sk_rx_ready(sock *s)
1698
{
1699
  fd_set rd, wr;
1700
  struct timeval timo;
1701
  int rv;
1702

    
1703
  FD_ZERO(&rd);
1704
  FD_ZERO(&wr);
1705
  FD_SET(s->fd, &rd);
1706

    
1707
  timo.tv_sec = 0;
1708
  timo.tv_usec = 0;
1709

    
1710
 redo:
1711
  rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1712

    
1713
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1714
    goto redo;
1715

    
1716
  return rv;
1717
}
1718

    
1719
/**
1720
 * sk_send - send data to a socket
1721
 * @s: socket
1722
 * @len: number of bytes to send
1723
 *
1724
 * This function sends @len bytes of data prepared in the
1725
 * transmit buffer of the socket @s to the network connection.
1726
 * If the packet can be sent immediately, it does so and returns
1727
 * 1, else it queues the packet for later processing, returns 0
1728
 * and calls the @tx_hook of the socket when the tranmission
1729
 * takes place.
1730
 */
1731
int
1732
sk_send(sock *s, unsigned len)
1733
{
1734
  s->ttx = s->tbuf;
1735
  s->tpos = s->tbuf + len;
1736
  return sk_maybe_write(s);
1737
}
1738

    
1739
/**
1740
 * sk_send_to - send data to a specific destination
1741
 * @s: socket
1742
 * @len: number of bytes to send
1743
 * @addr: IP address to send the packet to
1744
 * @port: port to send the packet to
1745
 *
1746
 * This is a sk_send() replacement for connection-less packet sockets
1747
 * which allows destination of the packet to be chosen dynamically.
1748
 * Raw IP sockets should use 0 for @port.
1749
 */
1750
int
1751
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1752
{
1753
  s->daddr = addr;
1754
  if (port)
1755
    s->dport = port;
1756

    
1757
  s->ttx = s->tbuf;
1758
  s->tpos = s->tbuf + len;
1759
  return sk_maybe_write(s);
1760
}
1761

    
1762
/*
1763
int
1764
sk_send_full(sock *s, unsigned len, struct iface *ifa,
1765
             ip_addr saddr, ip_addr daddr, unsigned dport)
1766
{
1767
  s->iface = ifa;
1768
  s->saddr = saddr;
1769
  s->daddr = daddr;
1770
  s->dport = dport;
1771
  s->ttx = s->tbuf;
1772
  s->tpos = s->tbuf + len;
1773
  return sk_maybe_write(s);
1774
}
1775
*/
1776

    
1777
 /* sk_read() and sk_write() are called from BFD's event loop */
1778

    
1779
int
1780
sk_read(sock *s)
1781
{
1782
  switch (s->type)
1783
  {
1784
  case SK_TCP_PASSIVE:
1785
    return sk_passive_connected(s, SK_TCP);
1786

    
1787
  case SK_UNIX_PASSIVE:
1788
    return sk_passive_connected(s, SK_UNIX);
1789

    
1790
  case SK_TCP:
1791
  case SK_UNIX:
1792
    {
1793
      int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1794

    
1795
      if (c < 0)
1796
      {
1797
        if (errno != EINTR && errno != EAGAIN)
1798
          s->err_hook(s, errno);
1799
      }
1800
      else if (!c)
1801
        s->err_hook(s, 0);
1802
      else
1803
      {
1804
        s->rpos += c;
1805
        if (s->rx_hook(s, s->rpos - s->rbuf))
1806
        {
1807
          /* We need to be careful since the socket could have been deleted by the hook */
1808
          if (current_sock == s)
1809
            s->rpos = s->rbuf;
1810
        }
1811
        return 1;
1812
      }
1813
      return 0;
1814
    }
1815

    
1816
  case SK_MAGIC:
1817
    return s->rx_hook(s, 0);
1818

    
1819
  default:
1820
    {
1821
      int e = sk_recvmsg(s);
1822

    
1823
      if (e < 0)
1824
      {
1825
        if (errno != EINTR && errno != EAGAIN)
1826
          s->err_hook(s, errno);
1827
        return 0;
1828
      }
1829

    
1830
      s->rpos = s->rbuf + e;
1831
      s->rx_hook(s, e);
1832
      return 1;
1833
    }
1834
  }
1835
}
1836

    
1837
int
1838
sk_write(sock *s)
1839
{
1840
  switch (s->type)
1841
  {
1842
  case SK_TCP_ACTIVE:
1843
    {
1844
      sockaddr sa;
1845
      sockaddr_fill(&sa, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
1846

    
1847
      if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1848
        sk_tcp_connected(s);
1849
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1850
        s->err_hook(s, errno);
1851
      return 0;
1852
    }
1853

    
1854
  default:
1855
    if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1856
    {
1857
      if (s->tx_hook)
1858
        s->tx_hook(s);
1859
      return 1;
1860
    }
1861
    return 0;
1862
  }
1863
}
1864

    
1865
int sk_is_ipv4(sock *s)
1866
{ return s->fam == SK_FAM_IPV4; }
1867

    
1868
int sk_is_ipv6(sock *s)
1869
{ return s->fam == SK_FAM_IPV6; }
1870

    
1871
void
1872
sk_dump_all(void)
1873
{
1874
  node *n;
1875
  sock *s;
1876

    
1877
  debug("Open sockets:\n");
1878
  WALK_LIST(n, sock_list)
1879
  {
1880
    s = SKIP_BACK(sock, n, n);
1881
    debug("%p ", s);
1882
    sk_dump(&s->r);
1883
  }
1884
  debug("\n");
1885
}
1886

    
1887

    
1888
/*
1889
 *        Internal event log and watchdog
1890
 */
1891

    
1892
#define EVENT_LOG_LENGTH 32
1893

    
1894
struct event_log_entry
1895
{
1896
  void *hook;
1897
  void *data;
1898
  btime timestamp;
1899
  btime duration;
1900
};
1901

    
1902
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
1903
static struct event_log_entry *event_open;
1904
static int event_log_pos, event_log_num, watchdog_active;
1905
static btime last_time;
1906
static btime loop_time;
1907

    
1908
static void
1909
io_update_time(void)
1910
{
1911
  struct timespec ts;
1912
  int rv;
1913

    
1914
  if (!clock_monotonic_available)
1915
    return;
1916

    
1917
  /*
1918
   * This is third time-tracking procedure (after update_times() above and
1919
   * times_update() in BFD), dedicated to internal event log and latency
1920
   * tracking. Hopefully, we consolidate these sometimes.
1921
   */
1922

    
1923
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
1924
  if (rv < 0)
1925
    die("clock_gettime: %m");
1926

    
1927
  last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
1928

    
1929
  if (event_open)
1930
  {
1931
    event_open->duration = last_time - event_open->timestamp;
1932

    
1933
    if (event_open->duration > config->latency_limit)
1934
      log(L_WARN "Event 0x%p 0x%p took %d ms",
1935
          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
1936

    
1937
    event_open = NULL;
1938
  }
1939
}
1940

    
1941
/**
1942
 * io_log_event - mark approaching event into event log
1943
 * @hook: event hook address
1944
 * @data: event data address
1945
 *
1946
 * Store info (hook, data, timestamp) about the following internal event into
1947
 * a circular event log (@event_log). When latency tracking is enabled, the log
1948
 * entry is kept open (in @event_open) so the duration can be filled later.
1949
 */
1950
void
1951
io_log_event(void *hook, void *data)
1952
{
1953
  if (config->latency_debug)
1954
    io_update_time();
1955

    
1956
  struct event_log_entry *en = event_log + event_log_pos;
1957

    
1958
  en->hook = hook;
1959
  en->data = data;
1960
  en->timestamp = last_time;
1961
  en->duration = 0;
1962

    
1963
  event_log_num++;
1964
  event_log_pos++;
1965
  event_log_pos %= EVENT_LOG_LENGTH;
1966

    
1967
  event_open = config->latency_debug ? en : NULL;
1968
}
1969

    
1970
static inline void
1971
io_close_event(void)
1972
{
1973
  if (event_open)
1974
    io_update_time();
1975
}
1976

    
1977
void
1978
io_log_dump(void)
1979
{
1980
  int i;
1981

    
1982
  log(L_DEBUG "Event log:");
1983
  for (i = 0; i < EVENT_LOG_LENGTH; i++)
1984
  {
1985
    struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
1986
    if (en->hook)
1987
      log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
1988
          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
1989
  }
1990
}
1991

    
1992
void
1993
watchdog_sigalrm(int sig UNUSED)
1994
{
1995
  /* Update last_time and duration, but skip latency check */
1996
  config->latency_limit = 0xffffffff;
1997
  io_update_time();
1998

    
1999
  /* We want core dump */
2000
  abort();
2001
}
2002

    
2003
static inline void
2004
watchdog_start1(void)
2005
{
2006
  io_update_time();
2007

    
2008
  loop_time = last_time;
2009
}
2010

    
2011
static inline void
2012
watchdog_start(void)
2013
{
2014
  io_update_time();
2015

    
2016
  loop_time = last_time;
2017
  event_log_num = 0;
2018

    
2019
  if (config->watchdog_timeout)
2020
  {
2021
    alarm(config->watchdog_timeout);
2022
    watchdog_active = 1;
2023
  }
2024
}
2025

    
2026
static inline void
2027
watchdog_stop(void)
2028
{
2029
  io_update_time();
2030

    
2031
  if (watchdog_active)
2032
  {
2033
    alarm(0);
2034
    watchdog_active = 0;
2035
  }
2036

    
2037
  btime duration = last_time - loop_time;
2038
  if (duration > config->watchdog_warning)
2039
    log(L_WARN "I/O loop cycle took %d ms for %d events",
2040
        (int) (duration TO_MS), event_log_num);
2041
}
2042

    
2043

    
2044
/*
2045
 *        Main I/O Loop
2046
 */
2047

    
2048
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
2049
volatile int async_dump_flag;
2050

    
2051
void
2052
io_init(void)
2053
{
2054
  init_list(&near_timers);
2055
  init_list(&far_timers);
2056
  init_list(&sock_list);
2057
  init_list(&global_event_list);
2058
  krt_io_init();
2059
  init_times();
2060
  update_times();
2061
  boot_time = now;
2062
  srandom((int) now_real);
2063
}
2064

    
2065
static int short_loops = 0;
2066
#define SHORT_LOOP_MAX 10
2067

    
2068
void
2069
io_loop(void)
2070
{
2071
  fd_set rd, wr;
2072
  struct timeval timo;
2073
  time_t tout;
2074
  int hi, events;
2075
  sock *s;
2076
  node *n;
2077

    
2078
  watchdog_start1();
2079
  sock_recalc_fdsets_p = 1;
2080
  for(;;)
2081
    {
2082
      events = ev_run_list(&global_event_list);
2083
      update_times();
2084
      tout = tm_first_shot();
2085
      if (tout <= now)
2086
        {
2087
          tm_shot();
2088
          continue;
2089
        }
2090
      timo.tv_sec = events ? 0 : MIN(tout - now, 3);
2091
      timo.tv_usec = 0;
2092

    
2093
      io_close_event();
2094

    
2095
      if (sock_recalc_fdsets_p)
2096
        {
2097
          sock_recalc_fdsets_p = 0;
2098
          FD_ZERO(&rd);
2099
          FD_ZERO(&wr);
2100
        }
2101

    
2102
      hi = 0;
2103
      WALK_LIST(n, sock_list)
2104
        {
2105
          s = SKIP_BACK(sock, n, n);
2106
          if (s->rx_hook)
2107
            {
2108
              FD_SET(s->fd, &rd);
2109
              if (s->fd > hi)
2110
                hi = s->fd;
2111
            }
2112
          else
2113
            FD_CLR(s->fd, &rd);
2114
          if (s->tx_hook && s->ttx != s->tpos)
2115
            {
2116
              FD_SET(s->fd, &wr);
2117
              if (s->fd > hi)
2118
                hi = s->fd;
2119
            }
2120
          else
2121
            FD_CLR(s->fd, &wr);
2122
        }
2123

    
2124
      /*
2125
       * Yes, this is racy. But even if the signal comes before this test
2126
       * and entering select(), it gets caught on the next timer tick.
2127
       */
2128

    
2129
      if (async_config_flag)
2130
        {
2131
          io_log_event(async_config, NULL);
2132
          async_config();
2133
          async_config_flag = 0;
2134
          continue;
2135
        }
2136
      if (async_dump_flag)
2137
        {
2138
          io_log_event(async_dump, NULL);
2139
          async_dump();
2140
          async_dump_flag = 0;
2141
          continue;
2142
        }
2143
      if (async_shutdown_flag)
2144
        {
2145
          io_log_event(async_shutdown, NULL);
2146
          async_shutdown();
2147
          async_shutdown_flag = 0;
2148
          continue;
2149
        }
2150

    
2151
      /* And finally enter select() to find active sockets */
2152
      watchdog_stop();
2153
      hi = select(hi+1, &rd, &wr, NULL, &timo);
2154
      watchdog_start();
2155

    
2156
      if (hi < 0)
2157
        {
2158
          if (errno == EINTR || errno == EAGAIN)
2159
            continue;
2160
          die("select: %m");
2161
        }
2162
      if (hi)
2163
        {
2164
          /* guaranteed to be non-empty */
2165
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2166

    
2167
          while (current_sock)
2168
            {
2169
              sock *s = current_sock;
2170
              int e;
2171
              int steps;
2172

    
2173
              steps = MAX_STEPS;
2174
              if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
2175
                do
2176
                  {
2177
                    steps--;
2178
                    io_log_event(s->rx_hook, s->data);
2179
                    e = sk_read(s);
2180
                    if (s != current_sock)
2181
                      goto next;
2182
                  }
2183
                while (e && s->rx_hook && steps);
2184

    
2185
              steps = MAX_STEPS;
2186
              if (FD_ISSET(s->fd, &wr))
2187
                do
2188
                  {
2189
                    steps--;
2190
                    io_log_event(s->tx_hook, s->data);
2191
                    e = sk_write(s);
2192
                    if (s != current_sock)
2193
                      goto next;
2194
                  }
2195
                while (e && steps);
2196
              current_sock = sk_next(s);
2197
            next: ;
2198
            }
2199

    
2200
          short_loops++;
2201
          if (events && (short_loops < SHORT_LOOP_MAX))
2202
            continue;
2203
          short_loops = 0;
2204

    
2205
          int count = 0;
2206
          current_sock = stored_sock;
2207
          if (current_sock == NULL)
2208
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2209

    
2210
          while (current_sock && count < MAX_RX_STEPS)
2211
            {
2212
              sock *s = current_sock;
2213
              int e UNUSED;
2214

    
2215
              if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
2216
                {
2217
                  count++;
2218
                  io_log_event(s->rx_hook, s->data);
2219
                  e = sk_read(s);
2220
                  if (s != current_sock)
2221
                      goto next2;
2222
                }
2223
              current_sock = sk_next(s);
2224
            next2: ;
2225
            }
2226

    
2227
          stored_sock = current_sock;
2228
        }
2229
    }
2230
}
2231

    
2232
void
2233
test_old_bird(char *path)
2234
{
2235
  int fd;
2236
  struct sockaddr_un sa;
2237

    
2238
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
2239
  if (fd < 0)
2240
    die("Cannot create socket: %m");
2241
  if (strlen(path) >= sizeof(sa.sun_path))
2242
    die("Socket path too long");
2243
  bzero(&sa, sizeof(sa));
2244
  sa.sun_family = AF_UNIX;
2245
  strcpy(sa.sun_path, path);
2246
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2247
    die("I found another BIRD running.");
2248
  close(fd);
2249
}